summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-02-14 21:24:03 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-02-14 21:24:03 +0000
commitd75c7debad4509ece98792074e64b8a650a27bdb (patch)
treef8d77975739b43bf7ffef0612579168cb9ec9474
parent9c2f6c4bb805c7ac08c8925c96e429fcc322725e (diff)
Notes
-rw-r--r--clang/include/clang/AST/ASTConcept.h6
-rw-r--r--clang/include/clang/AST/ExprConcepts.h13
-rw-r--r--clang/include/clang/Basic/Cuda.h3
-rw-r--r--clang/include/clang/Basic/DiagnosticDriverKinds.td3
-rw-r--r--clang/include/clang/Basic/DiagnosticGroups.td11
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td18
-rw-r--r--clang/include/clang/Driver/CC1Options.td2
-rw-r--r--clang/include/clang/Driver/Job.h16
-rw-r--r--clang/include/clang/Lex/PreprocessorOptions.h3
-rw-r--r--clang/include/clang/Sema/Sema.h15
-rw-r--r--clang/include/clang/Sema/SemaConcept.h12
-rw-r--r--clang/lib/AST/ASTConcept.cpp4
-rw-r--r--clang/lib/AST/ASTContext.cpp8
-rw-r--r--clang/lib/AST/CXXInheritance.cpp2
-rw-r--r--clang/lib/AST/DeclCXX.cpp32
-rw-r--r--clang/lib/AST/Expr.cpp5
-rw-r--r--clang/lib/AST/ExprConcepts.cpp63
-rw-r--r--clang/lib/AST/StmtProfile.cpp4
-rw-r--r--clang/lib/Basic/Cuda.cpp8
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp7
-rw-r--r--clang/lib/Driver/Compilation.cpp13
-rw-r--r--clang/lib/Driver/Driver.cpp5
-rw-r--r--clang/lib/Driver/Job.cpp19
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp7
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp43
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp1
-rw-r--r--clang/lib/Headers/__clang_cuda_intrinsics.h4
-rw-r--r--clang/lib/Headers/__clang_cuda_runtime_wrapper.h2
-rw-r--r--clang/lib/Headers/xmmintrin.h4
-rw-r--r--clang/lib/Lex/Lexer.cpp4
-rw-r--r--clang/lib/Lex/Pragma.cpp33
-rw-r--r--clang/lib/Parse/ParseDecl.cpp11
-rw-r--r--clang/lib/Parse/ParseDeclCXX.cpp2
-rw-r--r--clang/lib/Parse/ParseExprCXX.cpp27
-rw-r--r--clang/lib/Sema/SemaCast.cpp18
-rw-r--r--clang/lib/Sema/SemaConcept.cpp70
-rw-r--r--clang/lib/Sema/SemaDecl.cpp1
-rw-r--r--clang/lib/Sema/SemaDeclCXX.cpp54
-rw-r--r--clang/lib/Sema/SemaExpr.cpp49
-rw-r--r--clang/lib/Sema/SemaExprCXX.cpp3
-rw-r--r--clang/lib/Sema/SemaOverload.cpp88
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp6
-rw-r--r--clang/lib/Sema/SemaTemplateDeduction.cpp40
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiate.cpp186
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiateDecl.cpp98
-rw-r--r--clang/lib/Serialization/ASTReaderDecl.cpp2
-rw-r--r--clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp11
-rw-r--r--clang/tools/driver/cc1_main.cpp3
-rw-r--r--clang/tools/driver/cc1as_main.cpp3
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp9
-rw-r--r--libcxx/include/__config4
-rw-r--r--lld/ELF/Arch/ARM.cpp8
-rw-r--r--lld/ELF/Arch/PPC.cpp37
-rw-r--r--lld/ELF/InputSection.cpp8
-rw-r--r--lld/ELF/Relocations.cpp60
-rw-r--r--lld/ELF/SyntheticSections.cpp16
-rw-r--r--lld/ELF/SyntheticSections.h8
-rw-r--r--lld/ELF/Thunks.cpp45
-rw-r--r--lld/docs/ReleaseNotes.rst36
-rw-r--r--lldb/source/DataFormatters/FormatCache.cpp4
-rw-r--r--lldb/source/DataFormatters/LanguageCategory.cpp4
-rw-r--r--lldb/source/Interpreter/CommandAlias.cpp3
-rw-r--r--lldb/source/Interpreter/Options.cpp4
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp166
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h35
-rw-r--r--llvm/include/llvm/ADT/StringRef.h3
-rw-r--r--llvm/include/llvm/CodeGen/AsmPrinter.h3
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h19
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Core.h13
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h29
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h4
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Layer.h89
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Speculation.h4
-rw-r--r--llvm/include/llvm/Support/CrashRecoveryContext.h3
-rw-r--r--llvm/include/llvm/Support/Process.h6
-rw-r--r--llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h28
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp24
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp9
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp12
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp16
-rw-r--r--llvm/lib/CodeGen/GlobalMerge.cpp2
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp77
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp28
-rw-r--r--llvm/lib/CodeGen/StackColoring.cpp16
-rw-r--r--llvm/lib/CodeGen/TypePromotion.cpp12
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp39
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp33
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Core.cpp77
-rw-r--r--llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp8
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LLJIT.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Layer.cpp51
-rw-r--r--llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp43
-rw-r--r--llvm/lib/IR/AsmWriter.cpp4
-rw-r--r--llvm/lib/Linker/IRMover.cpp70
-rw-r--r--llvm/lib/Support/CRC.cpp10
-rw-r--r--llvm/lib/Support/CrashRecoveryContext.cpp89
-rw-r--r--llvm/lib/Support/ErrorHandling.cpp3
-rw-r--r--llvm/lib/Support/Process.cpp10
-rw-r--r--llvm/lib/Support/Windows/Signals.inc8
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp20
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp79
-rw-r--r--llvm/lib/Target/AMDGPU/CaymanInstructions.td4
-rw-r--r--llvm/lib/Target/AMDGPU/EvergreenInstructions.td3
-rw-r--r--llvm/lib/Target/AMDGPU/R600Instructions.td7
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertSkips.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp158
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp18
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp49
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp57
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h8
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td27
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td9
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h21
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.h13
-rw-r--r--llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp7
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td109
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td64
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoC.td156
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td69
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td72
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoM.td39
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket32.td213
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket64.td214
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td138
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp14
-rw-r--r--llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp24
-rw-r--r--llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp5
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp14
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp19
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp41
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp5
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp3
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp43
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h7
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp16
-rw-r--r--llvm/tools/lli/lli.cpp21
158 files changed, 2870 insertions, 1377 deletions
diff --git a/clang/include/clang/AST/ASTConcept.h b/clang/include/clang/AST/ASTConcept.h
index 30c4706d2a15..3ebaad4eafdd 100644
--- a/clang/include/clang/AST/ASTConcept.h
+++ b/clang/include/clang/AST/ASTConcept.h
@@ -29,14 +29,14 @@ class ConceptSpecializationExpr;
class ConstraintSatisfaction : public llvm::FoldingSetNode {
// The template-like entity that 'owns' the constraint checked here (can be a
// constrained entity or a concept).
- NamedDecl *ConstraintOwner = nullptr;
+ const NamedDecl *ConstraintOwner = nullptr;
llvm::SmallVector<TemplateArgument, 4> TemplateArgs;
public:
ConstraintSatisfaction() = default;
- ConstraintSatisfaction(NamedDecl *ConstraintOwner,
+ ConstraintSatisfaction(const NamedDecl *ConstraintOwner,
ArrayRef<TemplateArgument> TemplateArgs) :
ConstraintOwner(ConstraintOwner), TemplateArgs(TemplateArgs.begin(),
TemplateArgs.end()) { }
@@ -57,7 +57,7 @@ public:
}
static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &C,
- NamedDecl *ConstraintOwner,
+ const NamedDecl *ConstraintOwner,
ArrayRef<TemplateArgument> TemplateArgs);
};
diff --git a/clang/include/clang/AST/ExprConcepts.h b/clang/include/clang/AST/ExprConcepts.h
index 2a64326e8604..271d487e2fc9 100644
--- a/clang/include/clang/AST/ExprConcepts.h
+++ b/clang/include/clang/AST/ExprConcepts.h
@@ -63,6 +63,12 @@ protected:
ArrayRef<TemplateArgument> ConvertedArgs,
const ConstraintSatisfaction *Satisfaction);
+ ConceptSpecializationExpr(const ASTContext &C, ConceptDecl *NamedConcept,
+ ArrayRef<TemplateArgument> ConvertedArgs,
+ const ConstraintSatisfaction *Satisfaction,
+ bool Dependent,
+ bool ContainsUnexpandedParameterPack);
+
ConceptSpecializationExpr(EmptyShell Empty, unsigned NumTemplateArgs);
public:
@@ -76,6 +82,13 @@ public:
const ConstraintSatisfaction *Satisfaction);
static ConceptSpecializationExpr *
+ Create(const ASTContext &C, ConceptDecl *NamedConcept,
+ ArrayRef<TemplateArgument> ConvertedArgs,
+ const ConstraintSatisfaction *Satisfaction,
+ bool Dependent,
+ bool ContainsUnexpandedParameterPack);
+
+ static ConceptSpecializationExpr *
Create(ASTContext &C, EmptyShell Empty, unsigned NumTemplateArgs);
ArrayRef<TemplateArgument> getTemplateArguments() const {
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index ef5d24dcf888..da572957d10d 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -11,6 +11,7 @@
namespace llvm {
class StringRef;
+class Twine;
class VersionTuple;
} // namespace llvm
@@ -30,7 +31,7 @@ enum class CudaVersion {
};
const char *CudaVersionToString(CudaVersion V);
// Input is "Major.Minor"
-CudaVersion CudaStringToVersion(llvm::StringRef S);
+CudaVersion CudaStringToVersion(const llvm::Twine &S);
enum class CudaArch {
UNKNOWN,
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 2da41bef2669..ecd871e36ee8 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -60,6 +60,9 @@ def err_drv_cuda_version_unsupported : Error<
"but installation at %3 is %4. Use --cuda-path to specify a different CUDA "
"install, pass a different GPU arch with --cuda-gpu-arch, or pass "
"--no-cuda-version-check.">;
+def warn_drv_unknown_cuda_version: Warning<
+ "Unknown CUDA version %0. Assuming the latest supported version %1">,
+ InGroup<CudaUnknownVersion>;
def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
def err_drv_invalid_thread_model_for_target : Error<
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index a15fb908c537..5ad07915d2f5 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -384,7 +384,10 @@ def GNULabelsAsValue : DiagGroup<"gnu-label-as-value">;
def LiteralRange : DiagGroup<"literal-range">;
def LocalTypeTemplateArgs : DiagGroup<"local-type-template-args",
[CXX98CompatLocalTypeTemplateArgs]>;
-def RangeLoopAnalysis : DiagGroup<"range-loop-analysis">;
+def RangeLoopConstruct : DiagGroup<"range-loop-construct">;
+def RangeLoopBindReference : DiagGroup<"range-loop-bind-reference">;
+def RangeLoopAnalysis : DiagGroup<"range-loop-analysis",
+ [RangeLoopConstruct, RangeLoopBindReference]>;
def ForLoopAnalysis : DiagGroup<"for-loop-analysis">;
def LoopAnalysis : DiagGroup<"loop-analysis", [ForLoopAnalysis,
RangeLoopAnalysis]>;
@@ -858,14 +861,15 @@ def Most : DiagGroup<"most", [
Comment,
DeleteNonVirtualDtor,
Format,
+ ForLoopAnalysis,
Implicit,
InfiniteRecursion,
IntInBoolContext,
- LoopAnalysis,
MismatchedTags,
MissingBraces,
Move,
MultiChar,
+ RangeLoopConstruct,
Reorder,
ReturnType,
SelfAssignment,
@@ -1113,6 +1117,9 @@ def SerializedDiagnostics : DiagGroup<"serialized-diagnostics">;
// compiling CUDA C/C++ but which is not compatible with the CUDA spec.
def CudaCompat : DiagGroup<"cuda-compat">;
+// Warning about unknown CUDA SDK version.
+def CudaUnknownVersion: DiagGroup<"unknown-cuda-version">;
+
// A warning group for warnings about features supported by HIP but
// ignored by CUDA.
def HIPOnly : DiagGroup<"hip-only">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 7636d04a34c3..2199dfbddc84 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2378,17 +2378,17 @@ def warn_for_range_const_reference_copy : Warning<
"loop variable %0 "
"%diff{has type $ but is initialized with type $"
"| is initialized with a value of a different type}1,2 resulting in a copy">,
- InGroup<RangeLoopAnalysis>, DefaultIgnore;
+ InGroup<RangeLoopConstruct>, DefaultIgnore;
def note_use_type_or_non_reference : Note<
"use non-reference type %0 to keep the copy or type %1 to prevent copying">;
def warn_for_range_variable_always_copy : Warning<
"loop variable %0 is always a copy because the range of type %1 does not "
"return a reference">,
- InGroup<RangeLoopAnalysis>, DefaultIgnore;
+ InGroup<RangeLoopBindReference>, DefaultIgnore;
def note_use_non_reference_type : Note<"use non-reference type %0">;
def warn_for_range_copy : Warning<
"loop variable %0 of type %1 creates a copy from type %2">,
- InGroup<RangeLoopAnalysis>, DefaultIgnore;
+ InGroup<RangeLoopConstruct>, DefaultIgnore;
def note_use_reference_type : Note<"use reference type %0 to prevent copying">;
def err_objc_for_range_init_stmt : Error<
"initialization statement is not supported when iterating over Objective-C "
@@ -4683,6 +4683,8 @@ def note_checking_constraints_for_var_spec_id_here : Note<
def note_checking_constraints_for_class_spec_id_here : Note<
"while checking constraint satisfaction for class template partial "
"specialization '%0' required here">;
+def note_checking_constraints_for_function_here : Note<
+ "while checking constraint satisfaction for function '%0' required here">;
def note_constraint_substitution_here : Note<
"while substituting template arguments into constraint expression here">;
def note_constraint_normalization_here : Note<
@@ -6746,6 +6748,10 @@ def err_bad_cxx_cast_scalar_to_vector_different_size : Error<
def err_bad_cxx_cast_vector_to_vector_different_size : Error<
"%select{||reinterpret_cast||C-style cast|}0 from vector %1 "
"to vector %2 of different size">;
+def warn_bad_cxx_cast_nested_pointer_addr_space : Warning<
+ "%select{reinterpret_cast|C-style cast}0 from %1 to %2 "
+ "changes address space of nested pointers">,
+ InGroup<IncompatiblePointerTypesDiscardsQualifiers>;
def err_bad_lvalue_to_rvalue_cast : Error<
"cannot cast from lvalue of type %1 to rvalue reference type %2; types are "
"not compatible">;
@@ -8390,6 +8396,12 @@ def note_defaulted_comparison_cannot_deduce : Note<
"return type of defaulted 'operator<=>' cannot be deduced because "
"return type %2 of three-way comparison for %select{|member|base class}0 %1 "
"is not a standard comparison category type">;
+def err_defaulted_comparison_cannot_deduce_undeduced_auto : Error<
+ "return type of defaulted 'operator<=>' cannot be deduced because "
+ "three-way comparison for %select{|member|base class}0 %1 "
+ "has a deduced return type and is not yet defined">;
+def note_defaulted_comparison_cannot_deduce_undeduced_auto : Note<
+ "%select{|member|base class}0 %1 declared here">;
def note_defaulted_comparison_cannot_deduce_callee : Note<
"selected 'operator<=>' for %select{|member|base class}0 %1 declared here">;
def err_incorrect_defaulted_comparison_constexpr : Error<
diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index 9387285518de..d1f5ec5a3d4c 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -859,6 +859,8 @@ def detailed_preprocessing_record : Flag<["-"], "detailed-preprocessing-record">
HelpText<"include a detailed record of preprocessing actions">;
def setup_static_analyzer : Flag<["-"], "setup-static-analyzer">,
HelpText<"Set up preprocessor for static analyzer (done automatically when static analyzer is run).">;
+def disable_pragma_debug_crash : Flag<["-"], "disable-pragma-debug-crash">,
+ HelpText<"Disable any #pragma clang __debug that can lead to crashing behavior. This is meant for testing.">;
//===----------------------------------------------------------------------===//
// OpenCL Options
diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h
index 0765b3c67d4e..9a3cad23363b 100644
--- a/clang/include/clang/Driver/Job.h
+++ b/clang/include/clang/Driver/Job.h
@@ -55,9 +55,6 @@ class Command {
/// The list of program arguments which are inputs.
llvm::opt::ArgStringList InputFilenames;
- /// Whether to print the input filenames when executing.
- bool PrintInputFilenames = false;
-
/// Response file name, if this command is set to use one, or nullptr
/// otherwise
const char *ResponseFile = nullptr;
@@ -86,6 +83,12 @@ class Command {
void writeResponseFile(raw_ostream &OS) const;
public:
+ /// Whether to print the input filenames when executing.
+ bool PrintInputFilenames = false;
+
+ /// Whether the command will be executed in this process or not.
+ bool InProcess = false;
+
Command(const Action &Source, const Tool &Creator, const char *Executable,
const llvm::opt::ArgStringList &Arguments,
ArrayRef<InputInfo> Inputs);
@@ -128,9 +131,6 @@ public:
/// Print a command argument, and optionally quote it.
static void printArg(llvm::raw_ostream &OS, StringRef Arg, bool Quote);
- /// Set whether to print the input filenames when executing.
- void setPrintInputFilenames(bool P) { PrintInputFilenames = P; }
-
protected:
/// Optionally print the filenames to be compiled
void PrintFileNames() const;
@@ -139,7 +139,9 @@ protected:
/// Use the CC1 tool callback when available, to avoid creating a new process
class CC1Command : public Command {
public:
- using Command::Command;
+ CC1Command(const Action &Source, const Tool &Creator, const char *Executable,
+ const llvm::opt::ArgStringList &Arguments,
+ ArrayRef<InputInfo> Inputs);
void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote,
CrashReportInfo *CrashInfo = nullptr) const override;
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index abffbd03c3b4..8b2146059f85 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -189,6 +189,9 @@ public:
/// Set up preprocessor for RunAnalysis action.
bool SetUpStaticAnalyzer = false;
+ /// Prevents intended crashes when using #pragma clang __debug. For testing.
+ bool DisablePragmaDebugCrash = false;
+
public:
PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {}
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a88dd2814487..697d1911be8f 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -6275,7 +6275,7 @@ public:
/// \returns true if an error occurred and satisfaction could not be checked,
/// false otherwise.
bool CheckConstraintSatisfaction(
- NamedDecl *Template, ArrayRef<const Expr *> ConstraintExprs,
+ const NamedDecl *Template, ArrayRef<const Expr *> ConstraintExprs,
ArrayRef<TemplateArgument> TemplateArgs,
SourceRange TemplateIDRange, ConstraintSatisfaction &Satisfaction);
@@ -6288,6 +6288,17 @@ public:
bool CheckConstraintSatisfaction(const Expr *ConstraintExpr,
ConstraintSatisfaction &Satisfaction);
+ /// Check whether the given function decl's trailing requires clause is
+ /// satisfied, if any. Returns false and updates Satisfaction with the
+ /// satisfaction verdict if successful, emits a diagnostic and returns true if
+ /// an error occured and satisfaction could not be determined.
+ ///
+ /// \returns true if an error occurred, false otherwise.
+ bool CheckFunctionConstraints(const FunctionDecl *FD,
+ ConstraintSatisfaction &Satisfaction,
+ SourceLocation UsageLoc = SourceLocation());
+
+
/// \brief Ensure that the given template arguments satisfy the constraints
/// associated with the given template, emitting a diagnostic if they do not.
///
@@ -6986,7 +6997,7 @@ public:
/// Get a template argument mapping the given template parameter to itself,
/// e.g. for X in \c template<int X>, this would return an expression template
/// argument referencing X.
- TemplateArgumentLoc getIdentityTemplateArgumentLoc(Decl *Param,
+ TemplateArgumentLoc getIdentityTemplateArgumentLoc(NamedDecl *Param,
SourceLocation Location);
void translateTemplateArguments(const ASTTemplateArgsPtr &In,
diff --git a/clang/include/clang/Sema/SemaConcept.h b/clang/include/clang/Sema/SemaConcept.h
index 7fc42a4816ec..c5f9fc45612a 100644
--- a/clang/include/clang/Sema/SemaConcept.h
+++ b/clang/include/clang/Sema/SemaConcept.h
@@ -43,11 +43,15 @@ struct AtomicConstraint {
if (ParameterMapping->size() != Other.ParameterMapping->size())
return false;
- for (unsigned I = 0, S = ParameterMapping->size(); I < S; ++I)
- if (!C.getCanonicalTemplateArgument((*ParameterMapping)[I].getArgument())
- .structurallyEquals(C.getCanonicalTemplateArgument(
- (*Other.ParameterMapping)[I].getArgument())))
+ for (unsigned I = 0, S = ParameterMapping->size(); I < S; ++I) {
+ llvm::FoldingSetNodeID IDA, IDB;
+ C.getCanonicalTemplateArgument((*ParameterMapping)[I].getArgument())
+ .Profile(IDA, C);
+ C.getCanonicalTemplateArgument((*Other.ParameterMapping)[I].getArgument())
+ .Profile(IDB, C);
+ if (IDA != IDB)
return false;
+ }
return true;
}
diff --git a/clang/lib/AST/ASTConcept.cpp b/clang/lib/AST/ASTConcept.cpp
index c28a06bdf0b2..549088ad4a8a 100644
--- a/clang/lib/AST/ASTConcept.cpp
+++ b/clang/lib/AST/ASTConcept.cpp
@@ -59,8 +59,8 @@ ASTConstraintSatisfaction::Create(const ASTContext &C,
}
void ConstraintSatisfaction::Profile(
- llvm::FoldingSetNodeID &ID, const ASTContext &C, NamedDecl *ConstraintOwner,
- ArrayRef<TemplateArgument> TemplateArgs) {
+ llvm::FoldingSetNodeID &ID, const ASTContext &C,
+ const NamedDecl *ConstraintOwner, ArrayRef<TemplateArgument> TemplateArgs) {
ID.AddPointer(ConstraintOwner);
ID.AddInteger(TemplateArgs.size());
for (auto &Arg : TemplateArgs)
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 6d1db38e36cc..1be72efe4de8 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -756,12 +756,8 @@ canonicalizeImmediatelyDeclaredConstraint(const ASTContext &C, Expr *IDC,
NewConverted.push_back(Arg);
}
Expr *NewIDC = ConceptSpecializationExpr::Create(
- C, NestedNameSpecifierLoc(), /*TemplateKWLoc=*/SourceLocation(),
- CSE->getConceptNameInfo(), /*FoundDecl=*/CSE->getNamedConcept(),
- CSE->getNamedConcept(),
- // Actually canonicalizing a TemplateArgumentLoc is difficult so we
- // simply omit the ArgsAsWritten
- /*ArgsAsWritten=*/nullptr, NewConverted, nullptr);
+ C, CSE->getNamedConcept(), NewConverted, nullptr,
+ CSE->isInstantiationDependent(), CSE->containsUnexpandedParameterPack());
if (auto *OrigFold = dyn_cast<CXXFoldExpr>(IDC))
NewIDC = new (C) CXXFoldExpr(OrigFold->getType(), SourceLocation(), NewIDC,
diff --git a/clang/lib/AST/CXXInheritance.cpp b/clang/lib/AST/CXXInheritance.cpp
index a3a3794b2edd..0377bd324cb6 100644
--- a/clang/lib/AST/CXXInheritance.cpp
+++ b/clang/lib/AST/CXXInheritance.cpp
@@ -758,6 +758,8 @@ CXXRecordDecl::getFinalOverriders(CXXFinalOverriderMap &FinalOverriders) const {
return false;
};
+ // FIXME: IsHidden reads from Overriding from the middle of a remove_if
+ // over the same sequence! Is this guaranteed to work?
Overriding.erase(
std::remove_if(Overriding.begin(), Overriding.end(), IsHidden),
Overriding.end());
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 48e310e858b2..227fe80ccab4 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -2038,17 +2038,36 @@ CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD,
if (auto *MD = getCorrespondingMethodDeclaredInClass(RD, MayBeBase))
return MD;
+ llvm::SmallVector<CXXMethodDecl*, 4> FinalOverriders;
+ auto AddFinalOverrider = [&](CXXMethodDecl *D) {
+ // If this function is overridden by a candidate final overrider, it is not
+ // a final overrider.
+ for (CXXMethodDecl *OtherD : FinalOverriders) {
+ if (declaresSameEntity(D, OtherD) || recursivelyOverrides(OtherD, D))
+ return;
+ }
+
+ // Other candidate final overriders might be overridden by this function.
+ FinalOverriders.erase(
+ std::remove_if(FinalOverriders.begin(), FinalOverriders.end(),
+ [&](CXXMethodDecl *OtherD) {
+ return recursivelyOverrides(D, OtherD);
+ }),
+ FinalOverriders.end());
+
+ FinalOverriders.push_back(D);
+ };
+
for (const auto &I : RD->bases()) {
const RecordType *RT = I.getType()->getAs<RecordType>();
if (!RT)
continue;
const auto *Base = cast<CXXRecordDecl>(RT->getDecl());
- CXXMethodDecl *T = this->getCorrespondingMethodInClass(Base);
- if (T)
- return T;
+ if (CXXMethodDecl *D = this->getCorrespondingMethodInClass(Base))
+ AddFinalOverrider(D);
}
- return nullptr;
+ return FinalOverriders.size() == 1 ? FinalOverriders.front() : nullptr;
}
CXXMethodDecl *CXXMethodDecl::Create(ASTContext &C, CXXRecordDecl *RD,
@@ -2105,6 +2124,11 @@ CXXMethodDecl *CXXMethodDecl::getDevirtualizedMethod(const Expr *Base,
CXXMethodDecl *DevirtualizedMethod =
getCorrespondingMethodInClass(BestDynamicDecl);
+ // If there final overrider in the dynamic type is ambiguous, we can't
+ // devirtualize this call.
+ if (!DevirtualizedMethod)
+ return nullptr;
+
// If that method is pure virtual, we can't devirtualize. If this code is
// reached, the result would be UB, not a direct call to the derived class
// function, and we can't assume the derived class function is defined.
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 835198958766..fea7d606f261 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1685,6 +1685,11 @@ MemberExpr *MemberExpr::Create(
CXXRecordDecl *RD = dyn_cast_or_null<CXXRecordDecl>(DC);
if (RD && RD->isDependentContext() && RD->isCurrentInstantiation(DC))
E->setTypeDependent(T->isDependentType());
+
+ // Bitfield with value-dependent width is type-dependent.
+ FieldDecl *FD = dyn_cast<FieldDecl>(MemberDecl);
+ if (FD && FD->isBitField() && FD->getBitWidth()->isValueDependent())
+ E->setTypeDependent(true);
}
if (HasQualOrFound) {
diff --git a/clang/lib/AST/ExprConcepts.cpp b/clang/lib/AST/ExprConcepts.cpp
index 76d57ed5d5b1..b5a3686dc99a 100644
--- a/clang/lib/AST/ExprConcepts.cpp
+++ b/clang/lib/AST/ExprConcepts.cpp
@@ -46,24 +46,12 @@ ConceptSpecializationExpr::ConceptSpecializationExpr(const ASTContext &C,
ASTConstraintSatisfaction::Create(C, *Satisfaction) :
nullptr) {
setTemplateArguments(ConvertedArgs);
-}
-
-ConceptSpecializationExpr::ConceptSpecializationExpr(EmptyShell Empty,
- unsigned NumTemplateArgs)
- : Expr(ConceptSpecializationExprClass, Empty), ConceptReference(),
- NumTemplateArgs(NumTemplateArgs) { }
-
-void ConceptSpecializationExpr::setTemplateArguments(
- ArrayRef<TemplateArgument> Converted) {
- assert(Converted.size() == NumTemplateArgs);
- std::uninitialized_copy(Converted.begin(), Converted.end(),
- getTrailingObjects<TemplateArgument>());
bool IsInstantiationDependent = false;
bool ContainsUnexpandedParameterPack = false;
- for (const TemplateArgument& Arg : Converted) {
- if (Arg.isInstantiationDependent())
+ for (const TemplateArgumentLoc& ArgLoc : ArgsAsWritten->arguments()) {
+ if (ArgLoc.getArgument().isInstantiationDependent())
IsInstantiationDependent = true;
- if (Arg.containsUnexpandedParameterPack())
+ if (ArgLoc.getArgument().containsUnexpandedParameterPack())
ContainsUnexpandedParameterPack = true;
if (ContainsUnexpandedParameterPack && IsInstantiationDependent)
break;
@@ -80,6 +68,18 @@ void ConceptSpecializationExpr::setTemplateArguments(
"should not be value-dependent");
}
+ConceptSpecializationExpr::ConceptSpecializationExpr(EmptyShell Empty,
+ unsigned NumTemplateArgs)
+ : Expr(ConceptSpecializationExprClass, Empty), ConceptReference(),
+ NumTemplateArgs(NumTemplateArgs) { }
+
+void ConceptSpecializationExpr::setTemplateArguments(
+ ArrayRef<TemplateArgument> Converted) {
+ assert(Converted.size() == NumTemplateArgs);
+ std::uninitialized_copy(Converted.begin(), Converted.end(),
+ getTrailingObjects<TemplateArgument>());
+}
+
ConceptSpecializationExpr *
ConceptSpecializationExpr::Create(const ASTContext &C,
NestedNameSpecifierLoc NNS,
@@ -98,6 +98,39 @@ ConceptSpecializationExpr::Create(const ASTContext &C,
ConvertedArgs, Satisfaction);
}
+ConceptSpecializationExpr::ConceptSpecializationExpr(
+ const ASTContext &C, ConceptDecl *NamedConcept,
+ ArrayRef<TemplateArgument> ConvertedArgs,
+ const ConstraintSatisfaction *Satisfaction, bool Dependent,
+ bool ContainsUnexpandedParameterPack)
+ : Expr(ConceptSpecializationExprClass, C.BoolTy, VK_RValue, OK_Ordinary,
+ /*TypeDependent=*/false,
+ /*ValueDependent=*/!Satisfaction, Dependent,
+ ContainsUnexpandedParameterPack),
+ ConceptReference(NestedNameSpecifierLoc(), SourceLocation(),
+ DeclarationNameInfo(), NamedConcept,
+ NamedConcept, nullptr),
+ NumTemplateArgs(ConvertedArgs.size()),
+ Satisfaction(Satisfaction ?
+ ASTConstraintSatisfaction::Create(C, *Satisfaction) :
+ nullptr) {
+ setTemplateArguments(ConvertedArgs);
+}
+
+ConceptSpecializationExpr *
+ConceptSpecializationExpr::Create(const ASTContext &C,
+ ConceptDecl *NamedConcept,
+ ArrayRef<TemplateArgument> ConvertedArgs,
+ const ConstraintSatisfaction *Satisfaction,
+ bool Dependent,
+ bool ContainsUnexpandedParameterPack) {
+ void *Buffer = C.Allocate(totalSizeToAlloc<TemplateArgument>(
+ ConvertedArgs.size()));
+ return new (Buffer) ConceptSpecializationExpr(
+ C, NamedConcept, ConvertedArgs, Satisfaction, Dependent,
+ ContainsUnexpandedParameterPack);
+}
+
ConceptSpecializationExpr *
ConceptSpecializationExpr::Create(ASTContext &C, EmptyShell Empty,
unsigned NumTemplateArgs) {
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 382ea5c8d7ef..60dec50d53da 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -1535,8 +1535,8 @@ static Stmt::StmtClass DecodeOperatorCall(const CXXOperatorCallExpr *S,
return Stmt::BinaryOperatorClass;
case OO_Spaceship:
- // FIXME: Update this once we support <=> expressions.
- llvm_unreachable("<=> expressions not supported yet");
+ BinaryOp = BO_Cmp;
+ return Stmt::BinaryOperatorClass;
case OO_AmpAmp:
BinaryOp = BO_LAnd;
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index f2b6c8cd3ee9..e06d120c58bf 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -2,6 +2,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/VersionTuple.h"
@@ -31,8 +32,8 @@ const char *CudaVersionToString(CudaVersion V) {
llvm_unreachable("invalid enum");
}
-CudaVersion CudaStringToVersion(llvm::StringRef S) {
- return llvm::StringSwitch<CudaVersion>(S)
+CudaVersion CudaStringToVersion(const llvm::Twine &S) {
+ return llvm::StringSwitch<CudaVersion>(S.str())
.Case("7.0", CudaVersion::CUDA_70)
.Case("7.5", CudaVersion::CUDA_75)
.Case("8.0", CudaVersion::CUDA_80)
@@ -40,7 +41,8 @@ CudaVersion CudaStringToVersion(llvm::StringRef S) {
.Case("9.1", CudaVersion::CUDA_91)
.Case("9.2", CudaVersion::CUDA_92)
.Case("10.0", CudaVersion::CUDA_100)
- .Case("10.1", CudaVersion::CUDA_101);
+ .Case("10.1", CudaVersion::CUDA_101)
+ .Default(CudaVersion::UNKNOWN);
}
const char *CudaArchToString(CudaArch A) {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 57beda26677c..f8866ac4f7f6 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -537,6 +537,13 @@ void CodeGenModule::Release() {
getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth);
}
+ if (Arch == llvm::Triple::riscv32 || Arch == llvm::Triple::riscv64) {
+ StringRef ABIStr = Target.getABI();
+ llvm::LLVMContext &Ctx = TheModule.getContext();
+ getModule().addModuleFlag(llvm::Module::Error, "target-abi",
+ llvm::MDString::get(Ctx, ABIStr));
+ }
+
if (CodeGenOpts.SanitizeCfiCrossDso) {
// Indicate that we want cross-DSO control flow integrity checks.
getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index 25aec3690f21..52477576b2eb 100644
--- a/clang/lib/Driver/Compilation.cpp
+++ b/clang/lib/Driver/Compilation.cpp
@@ -258,14 +258,23 @@ void Compilation::initCompilationForDiagnostics() {
// Remove any user specified output. Claim any unclaimed arguments, so as
// to avoid emitting warnings about unused args.
- OptSpecifier OutputOpts[] = { options::OPT_o, options::OPT_MD,
- options::OPT_MMD };
+ OptSpecifier OutputOpts[] = {
+ options::OPT_o, options::OPT_MD, options::OPT_MMD, options::OPT_M,
+ options::OPT_MM, options::OPT_MF, options::OPT_MG, options::OPT_MJ,
+ options::OPT_MQ, options::OPT_MT, options::OPT_MV};
for (unsigned i = 0, e = llvm::array_lengthof(OutputOpts); i != e; ++i) {
if (TranslatedArgs->hasArg(OutputOpts[i]))
TranslatedArgs->eraseArg(OutputOpts[i]);
}
TranslatedArgs->ClaimAllArgs();
+ // Force re-creation of the toolchain Args, otherwise our modifications just
+ // above will have no effect.
+ for (auto Arg : TCArgs)
+ if (Arg.second != TranslatedArgs)
+ delete Arg.second;
+ TCArgs.clear();
+
// Redirect stdout/stderr to /dev/null.
Redirects = {None, {""}, {""}};
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 7ee3caaa0bce..fb8335a3695d 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3757,6 +3757,11 @@ void Driver::BuildJobs(Compilation &C) const {
/*TargetDeviceOffloadKind*/ Action::OFK_None);
}
+ // If we have more than one job, then disable integrated-cc1 for now.
+ if (C.getJobs().size() > 1)
+ for (auto &J : C.getJobs())
+ J.InProcess = false;
+
// If the user passed -Qunused-arguments or there were errors, don't warn
// about any unused arguments.
if (Diags.hasErrorOccurred() ||
diff --git a/clang/lib/Driver/Job.cpp b/clang/lib/Driver/Job.cpp
index 7dab2a022d92..6d1e7e61ba1d 100644
--- a/clang/lib/Driver/Job.cpp
+++ b/clang/lib/Driver/Job.cpp
@@ -371,14 +371,29 @@ int Command::Execute(ArrayRef<llvm::Optional<StringRef>> Redirects,
/*memoryLimit*/ 0, ErrMsg, ExecutionFailed);
}
+CC1Command::CC1Command(const Action &Source, const Tool &Creator,
+ const char *Executable,
+ const llvm::opt::ArgStringList &Arguments,
+ ArrayRef<InputInfo> Inputs)
+ : Command(Source, Creator, Executable, Arguments, Inputs) {
+ InProcess = true;
+}
+
void CC1Command::Print(raw_ostream &OS, const char *Terminator, bool Quote,
CrashReportInfo *CrashInfo) const {
- OS << " (in-process)\n";
+ if (InProcess)
+ OS << " (in-process)\n";
Command::Print(OS, Terminator, Quote, CrashInfo);
}
-int CC1Command::Execute(ArrayRef<llvm::Optional<StringRef>> /*Redirects*/,
+int CC1Command::Execute(ArrayRef<llvm::Optional<StringRef>> Redirects,
std::string *ErrMsg, bool *ExecutionFailed) const {
+ // FIXME: Currently, if there're more than one job, we disable
+ // -fintegrate-cc1. If we're no longer a integrated-cc1 job, fallback to
+ // out-of-process execution. See discussion in https://reviews.llvm.org/D74447
+ if (!InProcess)
+ return Command::Execute(Redirects, ErrMsg, ExecutionFailed);
+
PrintFileNames();
SmallVector<const char *, 128> Argv;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 647465863d3e..aec1971214cf 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4679,6 +4679,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
: "-");
}
+ // Give the gen diagnostics more chances to succeed, by avoiding intentional
+ // crashes.
+ if (D.CCGenDiagnostics)
+ CmdArgs.push_back("-disable-pragma-debug-crash");
+
bool UseSeparateSections = isUseSeparateSections(Triple);
if (Args.hasFlag(options::OPT_ffunction_sections,
@@ -6048,7 +6053,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (Output.getType() == types::TY_Object &&
Args.hasFlag(options::OPT__SLASH_showFilenames,
options::OPT__SLASH_showFilenames_, false)) {
- C.getJobs().getJobs().back()->setPrintInputFilenames(true);
+ C.getJobs().getJobs().back()->PrintInputFilenames = true;
}
if (Arg *A = Args.getLastArg(options::OPT_pg))
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 02871d2ce411..8a7da4f86b39 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -32,37 +32,24 @@ using namespace llvm::opt;
// Parses the contents of version.txt in an CUDA installation. It should
// contain one line of the from e.g. "CUDA Version 7.5.2".
-static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
+static CudaVersion ParseCudaVersionFile(const Driver &D, llvm::StringRef V) {
if (!V.startswith("CUDA Version "))
return CudaVersion::UNKNOWN;
V = V.substr(strlen("CUDA Version "));
- int Major = -1, Minor = -1;
- auto First = V.split('.');
- auto Second = First.second.split('.');
- if (First.first.getAsInteger(10, Major) ||
- Second.first.getAsInteger(10, Minor))
+ SmallVector<StringRef,4> VersionParts;
+ V.split(VersionParts, '.');
+ if (VersionParts.size() < 2)
return CudaVersion::UNKNOWN;
-
- if (Major == 7 && Minor == 0) {
- // This doesn't appear to ever happen -- version.txt doesn't exist in the
- // CUDA 7 installs I've seen. But no harm in checking.
- return CudaVersion::CUDA_70;
- }
- if (Major == 7 && Minor == 5)
- return CudaVersion::CUDA_75;
- if (Major == 8 && Minor == 0)
- return CudaVersion::CUDA_80;
- if (Major == 9 && Minor == 0)
- return CudaVersion::CUDA_90;
- if (Major == 9 && Minor == 1)
- return CudaVersion::CUDA_91;
- if (Major == 9 && Minor == 2)
- return CudaVersion::CUDA_92;
- if (Major == 10 && Minor == 0)
- return CudaVersion::CUDA_100;
- if (Major == 10 && Minor == 1)
- return CudaVersion::CUDA_101;
- return CudaVersion::UNKNOWN;
+ std::string MajorMinor = join_items(".", VersionParts[0], VersionParts[1]);
+ CudaVersion Version = CudaStringToVersion(MajorMinor);
+ if (Version != CudaVersion::UNKNOWN)
+ return Version;
+
+ // Issue a warning and assume that the version we've found is compatible with
+ // the latest version we support.
+ D.Diag(diag::warn_drv_unknown_cuda_version)
+ << MajorMinor << CudaVersionToString(CudaVersion::LATEST);
+ return CudaVersion::LATEST;
}
CudaInstallationDetector::CudaInstallationDetector(
@@ -160,7 +147,7 @@ CudaInstallationDetector::CudaInstallationDetector(
// version.txt isn't present.
Version = CudaVersion::CUDA_70;
} else {
- Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
+ Version = ParseCudaVersionFile(D, (*VersionFile)->getBuffer());
}
if (Version >= CudaVersion::CUDA_90) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 4e5babdbaa03..e98a407ac42f 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3440,6 +3440,7 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
Opts.LexEditorPlaceholders = false;
Opts.SetUpStaticAnalyzer = Args.hasArg(OPT_setup_static_analyzer);
+ Opts.DisablePragmaDebugCrash = Args.hasArg(OPT_disable_pragma_debug_crash);
}
static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index b67461a146fc..c7bff6a9d8fe 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -45,7 +45,7 @@
_Static_assert(sizeof(__val) == sizeof(__Bits)); \
_Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
__Bits __tmp; \
- memcpy(&__val, &__tmp, sizeof(__val)); \
+ memcpy(&__tmp, &__val, sizeof(__val)); \
__tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \
__tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \
long long __ret; \
@@ -129,7 +129,7 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
_Static_assert(sizeof(__val) == sizeof(__Bits)); \
_Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
__Bits __tmp; \
- memcpy(&__val, &__tmp, sizeof(__val)); \
+ memcpy(&__tmp, &__val, sizeof(__val)); \
__tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \
__tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \
long long __ret; \
diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index 3e362dd967db..e91de3c81dbd 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -48,7 +48,7 @@
#include "cuda.h"
#if !defined(CUDA_VERSION)
#error "cuda.h did not define CUDA_VERSION"
-#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10010
+#elif CUDA_VERSION < 7000
#error "Unsupported CUDA version!"
#endif
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 0e61eab44aeb..9b8de63f04d5 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2181,7 +2181,7 @@ void _mm_sfence(void);
/// 3: Bits [63:48] are copied to the destination.
/// \returns A 16-bit integer containing the extracted 16 bits of packed data.
#define _mm_extract_pi16(a, n) \
- (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n)
+ (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2212,7 +2212,7 @@ void _mm_sfence(void);
/// \returns A 64-bit integer vector containing the copied packed data from the
/// operands.
#define _mm_insert_pi16(a, d, n) \
- (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n)
+ (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)
/// Compares each of the corresponding packed 16-bit integer values of
/// the 64-bit integer vectors, and writes the greater value to the
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 648bda270578..981111d03744 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2552,8 +2552,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
'/', '/', '/', '/', '/', '/', '/', '/',
'/', '/', '/', '/', '/', '/', '/', '/'
};
- while (CurPtr+16 <= BufferEnd &&
- !vec_any_eq(*(const vector unsigned char*)CurPtr, Slashes))
+ while (CurPtr + 16 <= BufferEnd &&
+ !vec_any_eq(*(const __vector unsigned char *)CurPtr, Slashes))
CurPtr += 16;
#else
// Scan for '/' quickly. Many block comments are very large.
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index e4636265a72b..a8cd18b123b0 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp
@@ -30,6 +30,7 @@
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Lex/Token.h"
#include "clang/Lex/TokenLexer.h"
#include "llvm/ADT/ArrayRef.h"
@@ -39,7 +40,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
@@ -1035,15 +1035,19 @@ struct PragmaDebugHandler : public PragmaHandler {
IdentifierInfo *II = Tok.getIdentifierInfo();
if (II->isStr("assert")) {
- llvm_unreachable("This is an assertion!");
+ if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+ llvm_unreachable("This is an assertion!");
} else if (II->isStr("crash")) {
- LLVM_BUILTIN_TRAP;
+ if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+ LLVM_BUILTIN_TRAP;
} else if (II->isStr("parser_crash")) {
- Token Crasher;
- Crasher.startToken();
- Crasher.setKind(tok::annot_pragma_parser_crash);
- Crasher.setAnnotationRange(SourceRange(Tok.getLocation()));
- PP.EnterToken(Crasher, /*IsReinject*/false);
+ if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash) {
+ Token Crasher;
+ Crasher.startToken();
+ Crasher.setKind(tok::annot_pragma_parser_crash);
+ Crasher.setAnnotationRange(SourceRange(Tok.getLocation()));
+ PP.EnterToken(Crasher, /*IsReinject*/ false);
+ }
} else if (II->isStr("dump")) {
Token Identifier;
PP.LexUnexpandedToken(Identifier);
@@ -1075,9 +1079,11 @@ struct PragmaDebugHandler : public PragmaHandler {
<< II->getName();
}
} else if (II->isStr("llvm_fatal_error")) {
- llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error");
+ if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+ llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error");
} else if (II->isStr("llvm_unreachable")) {
- llvm_unreachable("#pragma clang __debug llvm_unreachable");
+ if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+ llvm_unreachable("#pragma clang __debug llvm_unreachable");
} else if (II->isStr("macro")) {
Token MacroName;
PP.LexUnexpandedToken(MacroName);
@@ -1104,11 +1110,8 @@ struct PragmaDebugHandler : public PragmaHandler {
}
M->dump();
} else if (II->isStr("overflow_stack")) {
- DebugOverflowStack();
- } else if (II->isStr("handle_crash")) {
- llvm::CrashRecoveryContext *CRC =llvm::CrashRecoveryContext::GetCurrent();
- if (CRC)
- CRC->HandleCrash();
+ if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+ DebugOverflowStack();
} else if (II->isStr("captured")) {
HandleCaptured(PP);
} else {
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 4af993c4527f..cdc3506d5c68 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -5060,6 +5060,8 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
// recurse to handle whatever we get.
if (TryAnnotateTypeOrScopeToken())
return true;
+ if (TryAnnotateTypeConstraint())
+ return true;
if (Tok.is(tok::identifier))
return false;
@@ -5192,11 +5194,14 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
// placeholder-type-specifier
case tok::annot_template_id: {
- TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok);
- return TemplateId->Kind == TNK_Concept_template &&
+ return isTypeConstraintAnnotation() &&
(NextToken().is(tok::kw_auto) || NextToken().is(tok::kw_decltype));
}
-
+ case tok::annot_cxxscope:
+ if (NextToken().is(tok::identifier) && TryAnnotateTypeConstraint())
+ return true;
+ return isTypeConstraintAnnotation() &&
+ GetLookAheadToken(2).isOneOf(tok::kw_auto, tok::kw_decltype);
case tok::kw___declspec:
case tok::kw___cdecl:
case tok::kw___stdcall:
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index f872aa3a950c..09e5c7996fcd 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -2716,7 +2716,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
// C++11 [dcl.attr.grammar] p4: If an attribute-specifier-seq appertains
// to a friend declaration, that declaration shall be a definition.
if (DeclaratorInfo.isFunctionDeclarator() &&
- DefinitionKind != FDK_Definition && DS.isFriendSpecified()) {
+ DefinitionKind == FDK_Declaration && DS.isFriendSpecified()) {
// Diagnose attributes that appear before decl specifier:
// [[]] friend int foo();
ProhibitAttributes(FnAttrs);
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 036eabb94dd7..17f81ec96c1f 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -3374,25 +3374,6 @@ ExprResult Parser::ParseRequiresExpression() {
Diag(Tok, diag::err_requires_expr_missing_arrow)
<< FixItHint::CreateInsertion(Tok.getLocation(), "->");
// Try to parse a 'type-constraint'
- CXXScopeSpec SS;
- if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(),
- /*EnteringContext=*/false,
- /*MayBePseudoDestructor=*/nullptr,
- // If this is not a type-constraint,
- // then this scope-spec is part of
- // the typename of a non-type
- // template parameter
- /*IsTypename=*/true,
- /*LastII=*/nullptr,
- // We won't find concepts in
- // non-namespaces anyway, so might as
- // well parse this correctly for
- // possible type names.
- /*OnlyNamespace=*/false,
- /*SuppressDiagnostic=*/true)) {
- SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch);
- break;
- }
if (TryAnnotateTypeConstraint()) {
SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch);
break;
@@ -3402,8 +3383,13 @@ ExprResult Parser::ParseRequiresExpression() {
SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch);
break;
}
- if (Tok.is(tok::annot_cxxscope))
+ CXXScopeSpec SS;
+ if (Tok.is(tok::annot_cxxscope)) {
+ Actions.RestoreNestedNameSpecifierAnnotation(Tok.getAnnotationValue(),
+ Tok.getAnnotationRange(),
+ SS);
ConsumeAnnotationToken();
+ }
Req = Actions.ActOnCompoundRequirement(
Expression.get(), NoexceptLoc, SS, takeTemplateIdAnnotation(Tok),
@@ -3490,6 +3476,7 @@ ExprResult Parser::ParseRequiresExpression() {
// We need to consume the typename to allow 'requires { typename a; }'
SourceLocation TypenameKWLoc = ConsumeToken();
if (TryAnnotateCXXScopeToken()) {
+ TPA.Commit();
SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch);
break;
}
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index a905ebc67305..7a8cbca1e3f1 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -2311,6 +2311,24 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
return SuccessResult;
}
+ // Diagnose address space conversion in nested pointers.
+ QualType DestPtee = DestType->getPointeeType().isNull()
+ ? DestType->getPointeeType()
+ : DestType->getPointeeType()->getPointeeType();
+ QualType SrcPtee = SrcType->getPointeeType().isNull()
+ ? SrcType->getPointeeType()
+ : SrcType->getPointeeType()->getPointeeType();
+ while (!DestPtee.isNull() && !SrcPtee.isNull()) {
+ if (DestPtee.getAddressSpace() != SrcPtee.getAddressSpace()) {
+ Self.Diag(OpRange.getBegin(),
+ diag::warn_bad_cxx_cast_nested_pointer_addr_space)
+ << CStyle << SrcType << DestType << SrcExpr.get()->getSourceRange();
+ break;
+ }
+ DestPtee = DestPtee->getPointeeType();
+ SrcPtee = SrcPtee->getPointeeType();
+ }
+
// C++ 5.2.10p7: A pointer to an object can be explicitly converted to
// a pointer to an object of different type.
// Void pointers are not specified, but supported by every compiler out there.
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 81601b09ce0d..290e4cbff4fd 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -167,9 +167,8 @@ calculateConstraintSatisfaction(Sema &S, const Expr *ConstraintExpr,
return false;
}
-template <typename TemplateDeclT>
static bool calculateConstraintSatisfaction(
- Sema &S, TemplateDeclT *Template, ArrayRef<TemplateArgument> TemplateArgs,
+ Sema &S, const NamedDecl *Template, ArrayRef<TemplateArgument> TemplateArgs,
SourceLocation TemplateNameLoc, MultiLevelTemplateArgumentList &MLTAL,
const Expr *ConstraintExpr, ConstraintSatisfaction &Satisfaction) {
return calculateConstraintSatisfaction(
@@ -182,8 +181,9 @@ static bool calculateConstraintSatisfaction(
{
TemplateDeductionInfo Info(TemplateNameLoc);
Sema::InstantiatingTemplate Inst(S, AtomicExpr->getBeginLoc(),
- Sema::InstantiatingTemplate::ConstraintSubstitution{}, Template,
- Info, AtomicExpr->getSourceRange());
+ Sema::InstantiatingTemplate::ConstraintSubstitution{},
+ const_cast<NamedDecl *>(Template), Info,
+ AtomicExpr->getSourceRange());
if (Inst.isInvalid())
return ExprError();
// We do not want error diagnostics escaping here.
@@ -230,8 +230,7 @@ static bool calculateConstraintSatisfaction(
});
}
-template<typename TemplateDeclT>
-static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template,
+static bool CheckConstraintSatisfaction(Sema &S, const NamedDecl *Template,
ArrayRef<const Expr *> ConstraintExprs,
ArrayRef<TemplateArgument> TemplateArgs,
SourceRange TemplateIDRange,
@@ -249,8 +248,8 @@ static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template,
}
Sema::InstantiatingTemplate Inst(S, TemplateIDRange.getBegin(),
- Sema::InstantiatingTemplate::ConstraintsCheck{}, Template, TemplateArgs,
- TemplateIDRange);
+ Sema::InstantiatingTemplate::ConstraintsCheck{},
+ const_cast<NamedDecl *>(Template), TemplateArgs, TemplateIDRange);
if (Inst.isInvalid())
return true;
@@ -273,7 +272,7 @@ static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template,
}
bool Sema::CheckConstraintSatisfaction(
- NamedDecl *Template, ArrayRef<const Expr *> ConstraintExprs,
+ const NamedDecl *Template, ArrayRef<const Expr *> ConstraintExprs,
ArrayRef<TemplateArgument> TemplateArgs, SourceRange TemplateIDRange,
ConstraintSatisfaction &OutSatisfaction) {
if (ConstraintExprs.empty()) {
@@ -284,7 +283,8 @@ bool Sema::CheckConstraintSatisfaction(
llvm::FoldingSetNodeID ID;
void *InsertPos;
ConstraintSatisfaction *Satisfaction = nullptr;
- if (LangOpts.ConceptSatisfactionCaching) {
+ bool ShouldCache = LangOpts.ConceptSatisfactionCaching && Template;
+ if (ShouldCache) {
ConstraintSatisfaction::Profile(ID, Context, Template, TemplateArgs);
Satisfaction = SatisfactionCache.FindNodeOrInsertPos(ID, InsertPos);
if (Satisfaction) {
@@ -295,27 +295,15 @@ bool Sema::CheckConstraintSatisfaction(
} else {
Satisfaction = &OutSatisfaction;
}
- bool Failed;
- if (auto *T = dyn_cast<TemplateDecl>(Template))
- Failed = ::CheckConstraintSatisfaction(*this, T, ConstraintExprs,
- TemplateArgs, TemplateIDRange,
- *Satisfaction);
- else if (auto *P =
- dyn_cast<ClassTemplatePartialSpecializationDecl>(Template))
- Failed = ::CheckConstraintSatisfaction(*this, P, ConstraintExprs,
- TemplateArgs, TemplateIDRange,
- *Satisfaction);
- else
- Failed = ::CheckConstraintSatisfaction(
- *this, cast<VarTemplatePartialSpecializationDecl>(Template),
- ConstraintExprs, TemplateArgs, TemplateIDRange, *Satisfaction);
- if (Failed) {
- if (LangOpts.ConceptSatisfactionCaching)
+ if (::CheckConstraintSatisfaction(*this, Template, ConstraintExprs,
+ TemplateArgs, TemplateIDRange,
+ *Satisfaction)) {
+ if (ShouldCache)
delete Satisfaction;
return true;
}
- if (LangOpts.ConceptSatisfactionCaching) {
+ if (ShouldCache) {
// We cannot use InsertNode here because CheckConstraintSatisfaction might
// have invalidated it.
SatisfactionCache.InsertNode(Satisfaction);
@@ -333,6 +321,30 @@ bool Sema::CheckConstraintSatisfaction(const Expr *ConstraintExpr,
});
}
+bool Sema::CheckFunctionConstraints(const FunctionDecl *FD,
+ ConstraintSatisfaction &Satisfaction,
+ SourceLocation UsageLoc) {
+ const Expr *RC = FD->getTrailingRequiresClause();
+ if (RC->isInstantiationDependent()) {
+ Satisfaction.IsSatisfied = true;
+ return false;
+ }
+ Qualifiers ThisQuals;
+ CXXRecordDecl *Record = nullptr;
+ if (auto *Method = dyn_cast<CXXMethodDecl>(FD)) {
+ ThisQuals = Method->getMethodQualifiers();
+ Record = const_cast<CXXRecordDecl *>(Method->getParent());
+ }
+ CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr);
+ // We substitute with empty arguments in order to rebuild the atomic
+ // constraint in a constant-evaluated context.
+ // FIXME: Should this be a dedicated TreeTransform?
+ return CheckConstraintSatisfaction(
+ FD, {RC}, /*TemplateArgs=*/{},
+ SourceRange(UsageLoc.isValid() ? UsageLoc : FD->getLocation()),
+ Satisfaction);
+}
+
bool Sema::EnsureTemplateArgumentListConstraints(
TemplateDecl *TD, ArrayRef<TemplateArgument> TemplateArgs,
SourceRange TemplateIDRange) {
@@ -671,6 +683,10 @@ static bool substituteParameterMappings(Sema &S, NormalizedConstraint &N,
ArgsAsWritten->arguments().back().getSourceRange().getEnd()));
if (S.SubstTemplateArguments(*Atomic.ParameterMapping, MLTAL, SubstArgs))
return true;
+ Atomic.ParameterMapping.emplace(
+ MutableArrayRef<TemplateArgumentLoc>(
+ new (S.Context) TemplateArgumentLoc[SubstArgs.size()],
+ SubstArgs.size()));
std::copy(SubstArgs.arguments().begin(), SubstArgs.arguments().end(),
N.getAtomicConstraint()->ParameterMapping->begin());
return false;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 0bf490336537..64146f4a912f 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12526,6 +12526,7 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
var->getDeclContext()->getRedeclContext()->isFileContext() &&
var->isExternallyVisible() && var->hasLinkage() &&
!var->isInline() && !var->getDescribedVarTemplate() &&
+ !isa<VarTemplatePartialSpecializationDecl>(var) &&
!isTemplateInstantiation(var->getTemplateSpecializationKind()) &&
!getDiagnostics().isIgnored(diag::warn_missing_variable_declarations,
var->getLocation())) {
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 9fa5691983a1..831e55046e80 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -7373,7 +7373,14 @@ private:
/// resolution [...]
CandidateSet.exclude(FD);
- S.LookupOverloadedBinOp(CandidateSet, OO, Fns, Args);
+ if (Args[0]->getType()->isOverloadableType())
+ S.LookupOverloadedBinOp(CandidateSet, OO, Fns, Args);
+ else {
+ // FIXME: We determine whether this is a valid expression by checking to
+ // see if there's a viable builtin operator candidate for it. That isn't
+ // really what the rules ask us to do, but should give the right results.
+ S.AddBuiltinOperatorCandidates(OO, FD->getLocation(), Args, CandidateSet);
+ }
Result R;
@@ -7438,6 +7445,31 @@ private:
if (OO == OO_Spaceship && FD->getReturnType()->isUndeducedAutoType()) {
if (auto *BestFD = Best->Function) {
+ // If any callee has an undeduced return type, deduce it now.
+ // FIXME: It's not clear how a failure here should be handled. For
+ // now, we produce an eager diagnostic, because that is forward
+ // compatible with most (all?) other reasonable options.
+ if (BestFD->getReturnType()->isUndeducedType() &&
+ S.DeduceReturnType(BestFD, FD->getLocation(),
+ /*Diagnose=*/false)) {
+ // Don't produce a duplicate error when asked to explain why the
+ // comparison is deleted: we diagnosed that when initially checking
+ // the defaulted operator.
+ if (Diagnose == NoDiagnostics) {
+ S.Diag(
+ FD->getLocation(),
+ diag::err_defaulted_comparison_cannot_deduce_undeduced_auto)
+ << Subobj.Kind << Subobj.Decl;
+ S.Diag(
+ Subobj.Loc,
+ diag::note_defaulted_comparison_cannot_deduce_undeduced_auto)
+ << Subobj.Kind << Subobj.Decl;
+ S.Diag(BestFD->getLocation(),
+ diag::note_defaulted_comparison_cannot_deduce_callee)
+ << Subobj.Kind << Subobj.Decl;
+ }
+ return Result::deleted();
+ }
if (auto *Info = S.Context.CompCategories.lookupInfoForType(
BestFD->getCallResultType())) {
R.Category = Info->Kind;
@@ -7826,10 +7858,14 @@ private:
return StmtError();
OverloadedOperatorKind OO = FD->getOverloadedOperator();
- ExprResult Op = S.CreateOverloadedBinOp(
- Loc, BinaryOperator::getOverloadedOpcode(OO), Fns,
- Obj.first.get(), Obj.second.get(), /*PerformADL=*/true,
- /*AllowRewrittenCandidates=*/true, FD);
+ BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(OO);
+ ExprResult Op;
+ if (Type->isOverloadableType())
+ Op = S.CreateOverloadedBinOp(Loc, Opc, Fns, Obj.first.get(),
+ Obj.second.get(), /*PerformADL=*/true,
+ /*AllowRewrittenCandidates=*/true, FD);
+ else
+ Op = S.CreateBuiltinBinOp(Loc, Opc, Obj.first.get(), Obj.second.get());
if (Op.isInvalid())
return StmtError();
@@ -7869,8 +7905,12 @@ private:
llvm::APInt ZeroVal(S.Context.getIntWidth(S.Context.IntTy), 0);
Expr *Zero =
IntegerLiteral::Create(S.Context, ZeroVal, S.Context.IntTy, Loc);
- ExprResult Comp = S.CreateOverloadedBinOp(Loc, BO_NE, Fns, VDRef.get(),
- Zero, true, true, FD);
+ ExprResult Comp;
+ if (VDRef.get()->getType()->isOverloadableType())
+ Comp = S.CreateOverloadedBinOp(Loc, BO_NE, Fns, VDRef.get(), Zero, true,
+ true, FD);
+ else
+ Comp = S.CreateBuiltinBinOp(Loc, BO_NE, VDRef.get(), Zero);
if (Comp.isInvalid())
return StmtError();
Sema::ConditionResult Cond = S.ActOnCondition(
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index ea4b93ee6a5a..29562615e588 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -245,8 +245,8 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
return true;
}
- // See if this is a deleted function.
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+ // See if this is a deleted function.
if (FD->isDeleted()) {
auto *Ctor = dyn_cast<CXXConstructorDecl>(FD);
if (Ctor && Ctor->isInheritingConstructor())
@@ -259,6 +259,29 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
return true;
}
+ // [expr.prim.id]p4
+ // A program that refers explicitly or implicitly to a function with a
+ // trailing requires-clause whose constraint-expression is not satisfied,
+ // other than to declare it, is ill-formed. [...]
+ //
+ // See if this is a function with constraints that need to be satisfied.
+ // Check this before deducing the return type, as it might instantiate the
+ // definition.
+ if (FD->getTrailingRequiresClause()) {
+ ConstraintSatisfaction Satisfaction;
+ if (CheckFunctionConstraints(FD, Satisfaction, Loc))
+ // A diagnostic will have already been generated (non-constant
+ // constraint expression, for example)
+ return true;
+ if (!Satisfaction.IsSatisfied) {
+ Diag(Loc,
+ diag::err_reference_to_function_with_unsatisfied_constraints)
+ << D;
+ DiagnoseUnsatisfiedConstraint(Satisfaction);
+ return true;
+ }
+ }
+
// If the function has a deduced return type, and we can't deduce it,
// then we can't use it either.
if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() &&
@@ -326,30 +349,6 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc);
- // [expr.prim.id]p4
- // A program that refers explicitly or implicitly to a function with a
- // trailing requires-clause whose constraint-expression is not satisfied,
- // other than to declare it, is ill-formed. [...]
- //
- // See if this is a function with constraints that need to be satisfied.
- if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
- if (Expr *RC = FD->getTrailingRequiresClause()) {
- ConstraintSatisfaction Satisfaction;
- bool Failed = CheckConstraintSatisfaction(RC, Satisfaction);
- if (Failed)
- // A diagnostic will have already been generated (non-constant
- // constraint expression, for example)
- return true;
- if (!Satisfaction.IsSatisfied) {
- Diag(Loc,
- diag::err_reference_to_function_with_unsatisfied_constraints)
- << D;
- DiagnoseUnsatisfiedConstraint(Satisfaction);
- return true;
- }
- }
- }
-
if (isa<ParmVarDecl>(D) && isa<RequiresExprBodyDecl>(D->getDeclContext()) &&
!isUnevaluatedContext()) {
// C++ [expr.prim.req.nested] p3
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 192c237b6c1c..98af7fb73eca 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -8487,7 +8487,8 @@ concepts::NestedRequirement *
Sema::BuildNestedRequirement(Expr *Constraint) {
ConstraintSatisfaction Satisfaction;
if (!Constraint->isInstantiationDependent() &&
- CheckConstraintSatisfaction(Constraint, Satisfaction))
+ CheckConstraintSatisfaction(nullptr, {Constraint}, /*TemplateArgs=*/{},
+ Constraint->getSourceRange(), Satisfaction))
return nullptr;
return new (Context) concepts::NestedRequirement(Context, Constraint,
Satisfaction);
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 0fd932fac970..db1884acd349 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -3176,7 +3176,7 @@ static bool isNonTrivialObjCLifetimeConversion(Qualifiers FromQuals,
/// FromType and \p ToType is permissible, given knowledge about whether every
/// outer layer is const-qualified.
static bool isQualificationConversionStep(QualType FromType, QualType ToType,
- bool CStyle,
+ bool CStyle, bool IsTopLevel,
bool &PreviousToQualsIncludeConst,
bool &ObjCLifetimeConversion) {
Qualifiers FromQuals = FromType.getQualifiers();
@@ -3213,11 +3213,15 @@ static bool isQualificationConversionStep(QualType FromType, QualType ToType,
if (!CStyle && !ToQuals.compatiblyIncludes(FromQuals))
return false;
- // For a C-style cast, just require the address spaces to overlap.
- // FIXME: Does "superset" also imply the representation of a pointer is the
- // same? We're assuming that it does here and in compatiblyIncludes.
- if (CStyle && !ToQuals.isAddressSpaceSupersetOf(FromQuals) &&
- !FromQuals.isAddressSpaceSupersetOf(ToQuals))
+ // If address spaces mismatch:
+ // - in top level it is only valid to convert to addr space that is a
+ // superset in all cases apart from C-style casts where we allow
+ // conversions between overlapping address spaces.
+ // - in non-top levels it is not a valid conversion.
+ if (ToQuals.getAddressSpace() != FromQuals.getAddressSpace() &&
+ (!IsTopLevel ||
+ !(ToQuals.isAddressSpaceSupersetOf(FromQuals) ||
+ (CStyle && FromQuals.isAddressSpaceSupersetOf(ToQuals)))))
return false;
// -- if the cv 1,j and cv 2,j are different, then const is in
@@ -3258,9 +3262,9 @@ Sema::IsQualificationConversion(QualType FromType, QualType ToType,
bool PreviousToQualsIncludeConst = true;
bool UnwrappedAnyPointer = false;
while (Context.UnwrapSimilarTypes(FromType, ToType)) {
- if (!isQualificationConversionStep(FromType, ToType, CStyle,
- PreviousToQualsIncludeConst,
- ObjCLifetimeConversion))
+ if (!isQualificationConversionStep(
+ FromType, ToType, CStyle, !UnwrappedAnyPointer,
+ PreviousToQualsIncludeConst, ObjCLifetimeConversion))
return false;
UnwrappedAnyPointer = true;
}
@@ -4499,7 +4503,7 @@ Sema::CompareReferenceRelationship(SourceLocation Loc,
// If we find a qualifier mismatch, the types are not reference-compatible,
// but are still be reference-related if they're similar.
bool ObjCLifetimeConversion = false;
- if (!isQualificationConversionStep(T2, T1, /*CStyle=*/false,
+ if (!isQualificationConversionStep(T2, T1, /*CStyle=*/false, TopLevel,
PreviousToQualsIncludeConst,
ObjCLifetimeConversion))
return (ConvertedReferent || Context.hasSimilarType(T1, T2))
@@ -6291,9 +6295,9 @@ void Sema::AddOverloadCandidate(
return;
}
- if (Expr *RequiresClause = Function->getTrailingRequiresClause()) {
+ if (Function->getTrailingRequiresClause()) {
ConstraintSatisfaction Satisfaction;
- if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) ||
+ if (CheckFunctionConstraints(Function, Satisfaction) ||
!Satisfaction.IsSatisfied) {
Candidate.Viable = false;
Candidate.FailureKind = ovl_fail_constraints_not_satisfied;
@@ -6808,9 +6812,9 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
return;
}
- if (Expr *RequiresClause = Method->getTrailingRequiresClause()) {
+ if (Method->getTrailingRequiresClause()) {
ConstraintSatisfaction Satisfaction;
- if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) ||
+ if (CheckFunctionConstraints(Method, Satisfaction) ||
!Satisfaction.IsSatisfied) {
Candidate.Viable = false;
Candidate.FailureKind = ovl_fail_constraints_not_satisfied;
@@ -7204,10 +7208,9 @@ void Sema::AddConversionCandidate(
return;
}
- Expr *RequiresClause = Conversion->getTrailingRequiresClause();
- if (RequiresClause) {
+ if (Conversion->getTrailingRequiresClause()) {
ConstraintSatisfaction Satisfaction;
- if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) ||
+ if (CheckFunctionConstraints(Conversion, Satisfaction) ||
!Satisfaction.IsSatisfied) {
Candidate.Viable = false;
Candidate.FailureKind = ovl_fail_constraints_not_satisfied;
@@ -9270,17 +9273,31 @@ Sema::AddArgumentDependentLookupCandidates(DeclarationName Name,
if (ExplicitTemplateArgs)
continue;
- AddOverloadCandidate(FD, FoundDecl, Args, CandidateSet,
- /*SuppressUserConversions=*/false, PartialOverloading,
- /*AllowExplicit*/ true,
- /*AllowExplicitConversions*/ false,
- ADLCallKind::UsesADL);
+ AddOverloadCandidate(
+ FD, FoundDecl, Args, CandidateSet, /*SuppressUserConversions=*/false,
+ PartialOverloading, /*AllowExplicit=*/true,
+ /*AllowExplicitConversions=*/false, ADLCallKind::UsesADL);
+ if (CandidateSet.getRewriteInfo().shouldAddReversed(Context, FD)) {
+ AddOverloadCandidate(
+ FD, FoundDecl, {Args[1], Args[0]}, CandidateSet,
+ /*SuppressUserConversions=*/false, PartialOverloading,
+ /*AllowExplicit=*/true, /*AllowExplicitConversions=*/false,
+ ADLCallKind::UsesADL, None, OverloadCandidateParamOrder::Reversed);
+ }
} else {
+ auto *FTD = cast<FunctionTemplateDecl>(*I);
AddTemplateOverloadCandidate(
- cast<FunctionTemplateDecl>(*I), FoundDecl, ExplicitTemplateArgs, Args,
- CandidateSet,
+ FTD, FoundDecl, ExplicitTemplateArgs, Args, CandidateSet,
/*SuppressUserConversions=*/false, PartialOverloading,
- /*AllowExplicit*/true, ADLCallKind::UsesADL);
+ /*AllowExplicit=*/true, ADLCallKind::UsesADL);
+ if (CandidateSet.getRewriteInfo().shouldAddReversed(
+ Context, FTD->getTemplatedDecl())) {
+ AddTemplateOverloadCandidate(
+ FTD, FoundDecl, ExplicitTemplateArgs, {Args[1], Args[0]},
+ CandidateSet, /*SuppressUserConversions=*/false, PartialOverloading,
+ /*AllowExplicit=*/true, ADLCallKind::UsesADL,
+ OverloadCandidateParamOrder::Reversed);
+ }
}
}
}
@@ -9566,17 +9583,15 @@ bool clang::isBetterOverloadCandidate(
if (RC1 && RC2) {
bool AtLeastAsConstrained1, AtLeastAsConstrained2;
if (S.IsAtLeastAsConstrained(Cand1.Function, {RC1}, Cand2.Function,
- {RC2}, AtLeastAsConstrained1))
- return false;
- if (!AtLeastAsConstrained1)
- return false;
- if (S.IsAtLeastAsConstrained(Cand2.Function, {RC2}, Cand1.Function,
+ {RC2}, AtLeastAsConstrained1) ||
+ S.IsAtLeastAsConstrained(Cand2.Function, {RC2}, Cand1.Function,
{RC1}, AtLeastAsConstrained2))
return false;
- if (!AtLeastAsConstrained2)
- return true;
- } else if (RC1 || RC2)
+ if (AtLeastAsConstrained1 != AtLeastAsConstrained2)
+ return AtLeastAsConstrained1;
+ } else if (RC1 || RC2) {
return RC1 != nullptr;
+ }
}
}
@@ -9947,9 +9962,9 @@ static bool checkAddressOfFunctionIsAvailable(Sema &S, const FunctionDecl *FD,
return false;
}
- if (const Expr *RC = FD->getTrailingRequiresClause()) {
+ if (FD->getTrailingRequiresClause()) {
ConstraintSatisfaction Satisfaction;
- if (S.CheckConstraintSatisfaction(RC, Satisfaction))
+ if (S.CheckFunctionConstraints(FD, Satisfaction, Loc))
return false;
if (!Satisfaction.IsSatisfied) {
if (Complain) {
@@ -10974,8 +10989,7 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
<< (unsigned)FnKindPair.first << (unsigned)ocs_non_template
<< FnDesc /* Ignored */;
ConstraintSatisfaction Satisfaction;
- if (S.CheckConstraintSatisfaction(Fn->getTrailingRequiresClause(),
- Satisfaction))
+ if (S.CheckFunctionConstraints(Fn, Satisfaction))
break;
S.DiagnoseUnsatisfiedConstraint(Satisfaction);
}
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index f961244da072..ad4ea2d2593d 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -2047,12 +2047,14 @@ private:
if (const auto *TC = TTP->getTypeConstraint()) {
TemplateArgumentListInfo TransformedArgs;
const auto *ArgsAsWritten = TC->getTemplateArgsAsWritten();
- if (SemaRef.Subst(ArgsAsWritten->getTemplateArgs(),
+ if (!ArgsAsWritten ||
+ SemaRef.Subst(ArgsAsWritten->getTemplateArgs(),
ArgsAsWritten->NumTemplateArgs, TransformedArgs,
Args))
SemaRef.AttachTypeConstraint(
TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
- TC->getNamedConcept(), &TransformedArgs, NewTTP,
+ TC->getNamedConcept(), ArgsAsWritten ? &TransformedArgs : nullptr,
+ NewTTP,
NewTTP->isParameterPack()
? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
->getEllipsisLoc()
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 394c81c82794..6b865a601f9d 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -2488,7 +2488,7 @@ Sema::getTrivialTemplateArgumentLoc(const TemplateArgument &Arg,
case TemplateArgument::Template:
case TemplateArgument::TemplateExpansion: {
NestedNameSpecifierLocBuilder Builder;
- TemplateName Template = Arg.getAsTemplate();
+ TemplateName Template = Arg.getAsTemplateOrTemplatePattern();
if (DependentTemplateName *DTN = Template.getAsDependentTemplateName())
Builder.MakeTrivial(Context, DTN->getQualifier(), Loc);
else if (QualifiedTemplateName *QTN =
@@ -2514,27 +2514,10 @@ Sema::getTrivialTemplateArgumentLoc(const TemplateArgument &Arg,
}
TemplateArgumentLoc
-Sema::getIdentityTemplateArgumentLoc(Decl *TemplateParm,
+Sema::getIdentityTemplateArgumentLoc(NamedDecl *TemplateParm,
SourceLocation Location) {
- if (auto *TTP = dyn_cast<TemplateTypeParmDecl>(TemplateParm))
- return getTrivialTemplateArgumentLoc(
- TemplateArgument(
- Context.getTemplateTypeParmType(TTP->getDepth(), TTP->getIndex(),
- TTP->isParameterPack(), TTP)),
- QualType(), Location.isValid() ? Location : TTP->getLocation());
- else if (auto *TTP = dyn_cast<TemplateTemplateParmDecl>(TemplateParm))
- return getTrivialTemplateArgumentLoc(TemplateArgument(TemplateName(TTP)),
- QualType(),
- Location.isValid() ? Location :
- TTP->getLocation());
- auto *NTTP = cast<NonTypeTemplateParmDecl>(TemplateParm);
- CXXScopeSpec SS;
- DeclarationNameInfo Info(NTTP->getDeclName(),
- Location.isValid() ? Location : NTTP->getLocation());
- Expr *E = BuildDeclarationNameExpr(SS, Info, NTTP).get();
- return getTrivialTemplateArgumentLoc(TemplateArgument(E), NTTP->getType(),
- Location.isValid() ? Location :
- NTTP->getLocation());
+ return getTrivialTemplateArgumentLoc(
+ Context.getInjectedTemplateArg(TemplateParm), QualType(), Location);
}
/// Convert the given deduced template argument and add it to the set of
@@ -3456,13 +3439,16 @@ Sema::TemplateDeductionResult Sema::FinishTemplateArgumentDeduction(
// ([temp.constr.decl]), those constraints are checked for satisfaction
// ([temp.constr.constr]). If the constraints are not satisfied, type
// deduction fails.
- if (CheckInstantiatedFunctionTemplateConstraints(Info.getLocation(),
- Specialization, Builder, Info.AssociatedConstraintsSatisfaction))
- return TDK_MiscellaneousDeductionFailure;
+ if (!PartialOverloading ||
+ (Builder.size() == FunctionTemplate->getTemplateParameters()->size())) {
+ if (CheckInstantiatedFunctionTemplateConstraints(Info.getLocation(),
+ Specialization, Builder, Info.AssociatedConstraintsSatisfaction))
+ return TDK_MiscellaneousDeductionFailure;
- if (!Info.AssociatedConstraintsSatisfaction.IsSatisfied) {
- Info.reset(TemplateArgumentList::CreateCopy(Context, Builder));
- return TDK_ConstraintsNotSatisfied;
+ if (!Info.AssociatedConstraintsSatisfaction.IsSatisfied) {
+ Info.reset(TemplateArgumentList::CreateCopy(Context, Builder));
+ return TDK_ConstraintsNotSatisfied;
+ }
}
if (OriginalCallArgs) {
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 39bc28d62305..568f5404dc0b 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -18,6 +18,7 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/PrettyDeclStackTrace.h"
+#include "clang/AST/TypeVisitor.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Stack.h"
#include "clang/Sema/DeclSpec.h"
@@ -763,21 +764,30 @@ void Sema::PrintInstantiationStack() {
case CodeSynthesisContext::ConstraintsCheck: {
unsigned DiagID = 0;
+ if (!Active->Entity) {
+ Diags.Report(Active->PointOfInstantiation,
+ diag::note_nested_requirement_here)
+ << Active->InstantiationRange;
+ break;
+ }
if (isa<ConceptDecl>(Active->Entity))
DiagID = diag::note_concept_specialization_here;
else if (isa<TemplateDecl>(Active->Entity))
DiagID = diag::note_checking_constraints_for_template_id_here;
else if (isa<VarTemplatePartialSpecializationDecl>(Active->Entity))
DiagID = diag::note_checking_constraints_for_var_spec_id_here;
- else {
- assert(isa<ClassTemplatePartialSpecializationDecl>(Active->Entity));
+ else if (isa<ClassTemplatePartialSpecializationDecl>(Active->Entity))
DiagID = diag::note_checking_constraints_for_class_spec_id_here;
+ else {
+ assert(isa<FunctionDecl>(Active->Entity));
+ DiagID = diag::note_checking_constraints_for_function_here;
}
SmallVector<char, 128> TemplateArgsStr;
llvm::raw_svector_ostream OS(TemplateArgsStr);
cast<NamedDecl>(Active->Entity)->printName(OS);
- printTemplateArgumentList(OS, Active->template_arguments(),
- getPrintingPolicy());
+ if (!isa<FunctionDecl>(Active->Entity))
+ printTemplateArgumentList(OS, Active->template_arguments(),
+ getPrintingPolicy());
Diags.Report(Active->PointOfInstantiation, DiagID) << OS.str()
<< Active->InstantiationRange;
break;
@@ -1048,6 +1058,8 @@ namespace {
NonTypeTemplateParmDecl *D);
ExprResult TransformSubstNonTypeTemplateParmPackExpr(
SubstNonTypeTemplateParmPackExpr *E);
+ ExprResult TransformSubstNonTypeTemplateParmExpr(
+ SubstNonTypeTemplateParmExpr *E);
/// Rebuild a DeclRefExpr for a VarDecl reference.
ExprResult RebuildVarDeclRefExpr(VarDecl *PD, SourceLocation Loc);
@@ -1526,6 +1538,44 @@ TemplateInstantiator::TransformSubstNonTypeTemplateParmPackExpr(
Arg);
}
+ExprResult
+TemplateInstantiator::TransformSubstNonTypeTemplateParmExpr(
+ SubstNonTypeTemplateParmExpr *E) {
+ ExprResult SubstReplacement = TransformExpr(E->getReplacement());
+ if (SubstReplacement.isInvalid())
+ return true;
+ QualType SubstType = TransformType(E->getType());
+ if (SubstType.isNull())
+ return true;
+ // The type may have been previously dependent and not now, which means we
+ // might have to implicit cast the argument to the new type, for example:
+ // template<auto T, decltype(T) U>
+ // concept C = sizeof(U) == 4;
+ // void foo() requires C<2, 'a'> { }
+ // When normalizing foo(), we first form the normalized constraints of C:
+ // AtomicExpr(sizeof(U) == 4,
+ // U=SubstNonTypeTemplateParmExpr(Param=U,
+ // Expr=DeclRef(U),
+ // Type=decltype(T)))
+ // Then we substitute T = 2, U = 'a' into the parameter mapping, and need to
+ // produce:
+ // AtomicExpr(sizeof(U) == 4,
+ // U=SubstNonTypeTemplateParmExpr(Param=U,
+ // Expr=ImpCast(
+ // decltype(2),
+ // SubstNTTPE(Param=U, Expr='a',
+ // Type=char)),
+ // Type=decltype(2)))
+ // The call to CheckTemplateArgument here produces the ImpCast.
+ TemplateArgument Converted;
+ if (SemaRef.CheckTemplateArgument(E->getParameter(), SubstType,
+ SubstReplacement.get(),
+ Converted).isInvalid())
+ return true;
+ return transformNonTypeTemplateParmRef(E->getParameter(),
+ E->getExprLoc(), Converted);
+}
+
ExprResult TemplateInstantiator::RebuildVarDeclRefExpr(VarDecl *PD,
SourceLocation Loc) {
DeclarationNameInfo NameInfo(PD->getDeclName(), Loc);
@@ -2096,6 +2146,94 @@ void Sema::SubstExceptionSpec(FunctionDecl *New, const FunctionProtoType *Proto,
UpdateExceptionSpec(New, ESI);
}
+namespace {
+
+ struct GetContainedInventedTypeParmVisitor :
+ public TypeVisitor<GetContainedInventedTypeParmVisitor,
+ TemplateTypeParmDecl *> {
+ using TypeVisitor<GetContainedInventedTypeParmVisitor,
+ TemplateTypeParmDecl *>::Visit;
+
+ TemplateTypeParmDecl *Visit(QualType T) {
+ if (T.isNull())
+ return nullptr;
+ return Visit(T.getTypePtr());
+ }
+ // The deduced type itself.
+ TemplateTypeParmDecl *VisitTemplateTypeParmType(
+ const TemplateTypeParmType *T) {
+ if (!T->getDecl()->isImplicit())
+ return nullptr;
+ return T->getDecl();
+ }
+
+ // Only these types can contain 'auto' types, and subsequently be replaced
+ // by references to invented parameters.
+
+ TemplateTypeParmDecl *VisitElaboratedType(const ElaboratedType *T) {
+ return Visit(T->getNamedType());
+ }
+
+ TemplateTypeParmDecl *VisitPointerType(const PointerType *T) {
+ return Visit(T->getPointeeType());
+ }
+
+ TemplateTypeParmDecl *VisitBlockPointerType(const BlockPointerType *T) {
+ return Visit(T->getPointeeType());
+ }
+
+ TemplateTypeParmDecl *VisitReferenceType(const ReferenceType *T) {
+ return Visit(T->getPointeeTypeAsWritten());
+ }
+
+ TemplateTypeParmDecl *VisitMemberPointerType(const MemberPointerType *T) {
+ return Visit(T->getPointeeType());
+ }
+
+ TemplateTypeParmDecl *VisitArrayType(const ArrayType *T) {
+ return Visit(T->getElementType());
+ }
+
+ TemplateTypeParmDecl *VisitDependentSizedExtVectorType(
+ const DependentSizedExtVectorType *T) {
+ return Visit(T->getElementType());
+ }
+
+ TemplateTypeParmDecl *VisitVectorType(const VectorType *T) {
+ return Visit(T->getElementType());
+ }
+
+ TemplateTypeParmDecl *VisitFunctionProtoType(const FunctionProtoType *T) {
+ return VisitFunctionType(T);
+ }
+
+ TemplateTypeParmDecl *VisitFunctionType(const FunctionType *T) {
+ return Visit(T->getReturnType());
+ }
+
+ TemplateTypeParmDecl *VisitParenType(const ParenType *T) {
+ return Visit(T->getInnerType());
+ }
+
+ TemplateTypeParmDecl *VisitAttributedType(const AttributedType *T) {
+ return Visit(T->getModifiedType());
+ }
+
+ TemplateTypeParmDecl *VisitMacroQualifiedType(const MacroQualifiedType *T) {
+ return Visit(T->getUnderlyingType());
+ }
+
+ TemplateTypeParmDecl *VisitAdjustedType(const AdjustedType *T) {
+ return Visit(T->getOriginalType());
+ }
+
+ TemplateTypeParmDecl *VisitPackExpansionType(const PackExpansionType *T) {
+ return Visit(T->getPattern());
+ }
+ };
+
+} // namespace
+
ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
const MultiLevelTemplateArgumentList &TemplateArgs,
int indexAdjustment,
@@ -2143,6 +2281,46 @@ ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
return nullptr;
}
+ // In abbreviated templates, TemplateTypeParmDecls with possible
+ // TypeConstraints are created when the parameter list is originally parsed.
+ // The TypeConstraints can therefore reference other functions parameters in
+ // the abbreviated function template, which is why we must instantiate them
+ // here, when the instantiated versions of those referenced parameters are in
+ // scope.
+ if (TemplateTypeParmDecl *TTP =
+ GetContainedInventedTypeParmVisitor().Visit(OldDI->getType())) {
+ if (const TypeConstraint *TC = TTP->getTypeConstraint()) {
+ auto *Inst = cast_or_null<TemplateTypeParmDecl>(
+ FindInstantiatedDecl(TTP->getLocation(), TTP, TemplateArgs));
+ // We will first get here when instantiating the abbreviated function
+ // template's described function, but we might also get here later.
+ // Make sure we do not instantiate the TypeConstraint more than once.
+ if (Inst && !Inst->getTypeConstraint()) {
+ // TODO: Concepts: do not instantiate the constraint (delayed constraint
+ // substitution)
+ const ASTTemplateArgumentListInfo *TemplArgInfo
+ = TC->getTemplateArgsAsWritten();
+ TemplateArgumentListInfo InstArgs;
+
+ if (TemplArgInfo) {
+ InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
+ InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
+ if (Subst(TemplArgInfo->getTemplateArgs(),
+ TemplArgInfo->NumTemplateArgs, InstArgs, TemplateArgs))
+ return nullptr;
+ }
+ if (AttachTypeConstraint(
+ TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
+ TC->getNamedConcept(), &InstArgs, Inst,
+ TTP->isParameterPack()
+ ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
+ ->getEllipsisLoc()
+ : SourceLocation()))
+ return nullptr;
+ }
+ }
+ }
+
ParmVarDecl *NewParm = CheckParameter(Context.getTranslationUnitDecl(),
OldParm->getInnerLocStart(),
OldParm->getLocation(),
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index fbbab8f00703..37dace3bee7f 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1837,6 +1837,23 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
return nullptr;
QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo);
+ if (TemplateParams && TemplateParams->size()) {
+ auto *LastParam =
+ dyn_cast<TemplateTypeParmDecl>(TemplateParams->asArray().back());
+ if (LastParam && LastParam->isImplicit() &&
+ LastParam->hasTypeConstraint()) {
+ // In abbreviated templates, the type-constraints of invented template
+ // type parameters are instantiated with the function type, invalidating
+ // the TemplateParameterList which relied on the template type parameter
+ // not having a type constraint. Recreate the TemplateParameterList with
+ // the updated parameter list.
+ TemplateParams = TemplateParameterList::Create(
+ SemaRef.Context, TemplateParams->getTemplateLoc(),
+ TemplateParams->getLAngleLoc(), TemplateParams->asArray(),
+ TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause());
+ }
+ }
+
NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc();
if (QualifierLoc) {
QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
@@ -2177,6 +2194,23 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
return nullptr;
QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo);
+ if (TemplateParams && TemplateParams->size()) {
+ auto *LastParam =
+ dyn_cast<TemplateTypeParmDecl>(TemplateParams->asArray().back());
+ if (LastParam && LastParam->isImplicit() &&
+ LastParam->hasTypeConstraint()) {
+ // In abbreviated templates, the type-constraints of invented template
+ // type parameters are instantiated with the function type, invalidating
+ // the TemplateParameterList which relied on the template type parameter
+ // not having a type constraint. Recreate the TemplateParameterList with
+ // the updated parameter list.
+ TemplateParams = TemplateParameterList::Create(
+ SemaRef.Context, TemplateParams->getTemplateLoc(),
+ TemplateParams->getLAngleLoc(), TemplateParams->asArray(),
+ TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause());
+ }
+ }
+
NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc();
if (QualifierLoc) {
QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
@@ -2190,6 +2224,9 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
if (TrailingRequiresClause) {
EnterExpressionEvaluationContext ConstantEvaluated(
SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
+ auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(Owner);
+ Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext,
+ D->getMethodQualifiers(), ThisContext);
ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause,
TemplateArgs);
if (SubstRC.isInvalid())
@@ -2522,28 +2559,34 @@ Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl(
Inst->setAccess(AS_public);
Inst->setImplicit(D->isImplicit());
if (auto *TC = D->getTypeConstraint()) {
- // TODO: Concepts: do not instantiate the constraint (delayed constraint
- // substitution)
- const ASTTemplateArgumentListInfo *TemplArgInfo
- = TC->getTemplateArgsAsWritten();
- TemplateArgumentListInfo InstArgs;
-
- if (TemplArgInfo) {
- InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
- InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
- if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
- TemplArgInfo->NumTemplateArgs,
- InstArgs, TemplateArgs))
+ if (!D->isImplicit()) {
+ // Invented template parameter type constraints will be instantiated with
+ // the corresponding auto-typed parameter as it might reference other
+ // parameters.
+
+ // TODO: Concepts: do not instantiate the constraint (delayed constraint
+ // substitution)
+ const ASTTemplateArgumentListInfo *TemplArgInfo
+ = TC->getTemplateArgsAsWritten();
+ TemplateArgumentListInfo InstArgs;
+
+ if (TemplArgInfo) {
+ InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
+ InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
+ if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
+ TemplArgInfo->NumTemplateArgs,
+ InstArgs, TemplateArgs))
+ return nullptr;
+ }
+ if (SemaRef.AttachTypeConstraint(
+ TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
+ TC->getNamedConcept(), &InstArgs, Inst,
+ D->isParameterPack()
+ ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
+ ->getEllipsisLoc()
+ : SourceLocation()))
return nullptr;
}
- if (SemaRef.AttachTypeConstraint(
- TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
- TC->getNamedConcept(), &InstArgs, Inst,
- D->isParameterPack()
- ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
- ->getEllipsisLoc()
- : SourceLocation()))
- return nullptr;
}
if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
TypeSourceInfo *InstantiatedDefaultArg =
@@ -4246,24 +4289,29 @@ bool Sema::CheckInstantiatedFunctionTemplateConstraints(
Sema::ContextRAII savedContext(*this, Decl);
LocalInstantiationScope Scope(*this);
- MultiLevelTemplateArgumentList MLTAL =
- getTemplateInstantiationArgs(Decl, nullptr, /*RelativeToPrimary*/true);
-
// If this is not an explicit specialization - we need to get the instantiated
// version of the template arguments and add them to scope for the
// substitution.
if (Decl->isTemplateInstantiation()) {
InstantiatingTemplate Inst(*this, Decl->getPointOfInstantiation(),
InstantiatingTemplate::ConstraintsCheck{}, Decl->getPrimaryTemplate(),
- MLTAL.getInnermost(), SourceRange());
+ TemplateArgs, SourceRange());
if (Inst.isInvalid())
return true;
+ MultiLevelTemplateArgumentList MLTAL(
+ *Decl->getTemplateSpecializationArgs());
if (addInstantiatedParametersToScope(
*this, Decl, Decl->getPrimaryTemplate()->getTemplatedDecl(),
Scope, MLTAL))
return true;
}
-
+ Qualifiers ThisQuals;
+ CXXRecordDecl *Record = nullptr;
+ if (auto *Method = dyn_cast<CXXMethodDecl>(Decl)) {
+ ThisQuals = Method->getMethodQualifiers();
+ Record = Method->getParent();
+ }
+ CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr);
return CheckConstraintSatisfaction(Template, TemplateAC, TemplateArgs,
PointOfInstantiation, Satisfaction);
}
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 093b69ab19d0..362b5a564ab9 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -555,7 +555,7 @@ void ASTDeclReader::Visit(Decl *D) {
void ASTDeclReader::VisitDecl(Decl *D) {
if (D->isTemplateParameter() || D->isTemplateParameterPack() ||
- isa<ParmVarDecl>(D)) {
+ isa<ParmVarDecl>(D) || isa<ObjCTypeParamDecl>(D)) {
// We don't want to deserialize the DeclContext of a template
// parameter or of a parameter of a function template immediately. These
// entities might be used in the formulation of its DeclContext (for
diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index a4918d7179ff..002b6070ddcd 100644
--- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -607,10 +607,17 @@ window.addEventListener("keydown", function (event) {
)<<<";
}
+static bool shouldDisplayPopUpRange(const SourceRange &Range) {
+ return !(Range.getBegin().isMacroID() || Range.getEnd().isMacroID());
+}
+
static void
HandlePopUpPieceStartTag(Rewriter &R,
const std::vector<SourceRange> &PopUpRanges) {
for (const auto &Range : PopUpRanges) {
+ if (!shouldDisplayPopUpRange(Range))
+ continue;
+
html::HighlightRange(R, Range.getBegin(), Range.getEnd(), "",
"<table class='variable_popup'><tbody>",
/*IsTokenRange=*/true);
@@ -626,6 +633,8 @@ static void HandlePopUpPieceEndTag(Rewriter &R,
llvm::raw_svector_ostream Out(Buf);
SourceRange Range(Piece.getLocation().asRange());
+ if (!shouldDisplayPopUpRange(Range))
+ return;
// Write out the path indices with a right arrow and the message as a row.
Out << "<tr><td valign='top'><div class='PathIndex PathIndexPopUp'>"
@@ -870,7 +879,7 @@ void HTMLDiagnostics::HandlePiece(Rewriter &R, FileID BugFileID,
<< (num - 1)
<< "\" title=\"Previous event ("
<< (num - 1)
- << ")\">&#x2190;</a></div></td>";
+ << ")\">&#x2190;</a></div>";
}
os << "</td><td>";
diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp
index b551e9f4cf82..6d1a67f2a4fa 100644
--- a/clang/tools/driver/cc1_main.cpp
+++ b/clang/tools/driver/cc1_main.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
@@ -69,7 +70,7 @@ static void LLVMErrorHandler(void *UserData, const std::string &Message,
// We cannot recover from llvm errors. When reporting a fatal error, exit
// with status 70 to generate crash diagnostics. For BSD systems this is
// defined as an internal software error. Otherwise, exit with status 1.
- exit(GenCrashDiag ? 70 : 1);
+ llvm::sys::Process::Exit(GenCrashDiag ? 70 : 1);
}
#ifdef CLANG_HAVE_RLIMITS
diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp
index 53c8a9d642dc..e1041f91bfd5 100644
--- a/clang/tools/driver/cc1as_main.cpp
+++ b/clang/tools/driver/cc1as_main.cpp
@@ -46,6 +46,7 @@
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
@@ -547,7 +548,7 @@ static void LLVMErrorHandler(void *UserData, const std::string &Message,
Diags.Report(diag::err_fe_error_backend) << Message;
// We cannot recover from llvm errors.
- exit(1);
+ sys::Process::Exit(1);
}
int cc1as_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp
index aa29536d8616..91584914d868 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp
@@ -23,9 +23,12 @@
#include <errno.h>
#include <libkern/OSAtomic.h>
#include <objc/objc-sync.h>
-#include <os/lock.h>
#include <sys/ucontext.h>
+#if defined(__has_include) && __has_include(<os/lock.h>)
+#include <os/lock.h>
+#endif
+
#if defined(__has_include) && __has_include(<xpc/xpc.h>)
#include <xpc/xpc.h>
#endif // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
@@ -247,6 +250,8 @@ TSAN_INTERCEPTOR(void, os_lock_unlock, void *lock) {
REAL(os_lock_unlock)(lock);
}
+#if defined(__has_include) && __has_include(<os/lock.h>)
+
TSAN_INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) {
if (!cur_thread()->is_inited || cur_thread()->is_dead) {
return REAL(os_unfair_lock_lock)(lock);
@@ -286,6 +291,8 @@ TSAN_INTERCEPTOR(void, os_unfair_lock_unlock, os_unfair_lock_t lock) {
REAL(os_unfair_lock_unlock)(lock);
}
+#endif // #if defined(__has_include) && __has_include(<os/lock.h>)
+
#if defined(__has_include) && __has_include(<xpc/xpc.h>)
TSAN_INTERCEPTOR(void, xpc_connection_set_event_handler,
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 8f48f16c2364..ccce227f4d6b 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -342,6 +342,10 @@
# define _LIBCPP_HAS_ALIGNED_ALLOC
# define _LIBCPP_HAS_QUICK_EXIT
# define _LIBCPP_HAS_C11_FEATURES
+# if __FreeBSD_version >= 1300064 || \
+ (__FreeBSD_version >= 1201504 && __FreeBSD_version < 1300000)
+# define _LIBCPP_HAS_TIMESPEC_GET
+# endif
# elif defined(__BIONIC__)
# define _LIBCPP_HAS_C11_FEATURES
# if __ANDROID_API__ >= 21
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index de1023346aa5..08cae59b294b 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -275,8 +275,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
case R_ARM_PLT32:
case R_ARM_JUMP24:
// Source is ARM, all PLT entries are ARM so no interworking required.
- // Otherwise we need to interwork if Symbol has bit 0 set (Thumb).
- if (expr == R_PC && ((s.getVA() & 1) == 1))
+ // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
+ if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
return true;
LLVM_FALLTHROUGH;
case R_ARM_CALL: {
@@ -286,8 +286,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
case R_ARM_THM_JUMP19:
case R_ARM_THM_JUMP24:
// Source is Thumb, all PLT entries are ARM so interworking is required.
- // Otherwise we need to interwork if Symbol has bit 0 clear (ARM).
- if (expr == R_PLT_PC || ((s.getVA() & 1) == 0))
+ // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
+ if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0))
return true;
LLVM_FALLTHROUGH;
case R_ARM_THM_CALL: {
diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp
index 1d4e80184dcd..b3cc78710e9a 100644
--- a/lld/ELF/Arch/PPC.cpp
+++ b/lld/ELF/Arch/PPC.cpp
@@ -67,6 +67,18 @@ static void writeFromHalf16(uint8_t *loc, uint32_t insn) {
}
void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
+ // Create canonical PLT entries for non-PIE code. Compilers don't generate
+ // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE.
+ uint32_t glink = in.plt->getVA(); // VA of .glink
+ if (!config->isPic) {
+ for (const Symbol *sym : in.plt->entries)
+ if (sym->needsPltAddr) {
+ writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0);
+ buf += 16;
+ glink += 16;
+ }
+ }
+
// On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an
// absolute address from a specific .plt slot (usually called .got.plt on
// other targets) and jumps there.
@@ -85,15 +97,14 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
// computes the PLT index (by computing the distance from the landing b to
// itself) and calls _dl_runtime_resolve() (in glibc).
uint32_t got = in.got->getVA();
- uint32_t glink = in.plt->getVA(); // VA of .glink
const uint8_t *end = buf + 64;
if (config->isPic) {
- uint32_t afterBcl = in.plt->getSize() - target->pltHeaderSize + 12;
+ uint32_t afterBcl = 4 * in.plt->getNumEntries() + 12;
uint32_t gotBcl = got + 4 - (glink + afterBcl);
write32(buf + 0, 0x3d6b0000 | ha(afterBcl)); // addis r11,r11,1f-glink@ha
write32(buf + 4, 0x7c0802a6); // mflr r0
write32(buf + 8, 0x429f0005); // bcl 20,30,.+4
- write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-.glink@l
+ write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-glink@l
write32(buf + 16, 0x7d8802a6); // mflr r12
write32(buf + 20, 0x7c0803a6); // mtlr r0
write32(buf + 24, 0x7d6c5850); // sub r11,r11,r12
@@ -113,16 +124,16 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
buf += 56;
} else {
write32(buf + 0, 0x3d800000 | ha(got + 4)); // lis r12,GOT+4@ha
- write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-Glink@ha
+ write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-glink@ha
if (ha(got + 4) == ha(got + 8))
write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4@l(r12)
else
write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4@l(r12)
- write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-Glink@l
+ write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-glink@l
write32(buf + 16, 0x7c0903a6); // mtctr r0
write32(buf + 20, 0x7c0b5a14); // add r0,r11,r11
if (ha(got + 4) == ha(got + 8))
- write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@ha(r12)
+ write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@l(r12)
else
write32(buf + 24, 0x818c0000 | 4); // lwz r12,4(r12)
write32(buf + 28, 0x7d605a14); // add r11,r0,r11
@@ -146,7 +157,7 @@ PPC::PPC() {
gotBaseSymInGotPlt = false;
gotHeaderEntriesNum = 3;
gotPltHeaderEntriesNum = 0;
- pltHeaderSize = 64; // size of PLTresolve in .glink
+ pltHeaderSize = 0;
pltEntrySize = 4;
ipltEntrySize = 16;
@@ -178,25 +189,25 @@ void PPC::writeGotHeader(uint8_t *buf) const {
void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const {
// Address of the symbol resolver stub in .glink .
- write32(buf, in.plt->getVA() + 4 * s.pltIndex);
+ write32(buf, in.plt->getVA() + in.plt->headerSize + 4 * s.pltIndex);
}
bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file,
- uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const {
- if (type != R_PPC_REL24 && type != R_PPC_PLTREL24)
+ uint64_t branchAddr, const Symbol &s, int64_t a) const {
+ if (type != R_PPC_LOCAL24PC && type != R_PPC_REL24 && type != R_PPC_PLTREL24)
return false;
if (s.isInPlt())
return true;
if (s.isUndefWeak())
return false;
- return !(expr == R_PC && PPC::inBranchRange(type, branchAddr, s.getVA()));
+ return !PPC::inBranchRange(type, branchAddr, s.getVA(a));
}
uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; }
bool PPC::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
uint64_t offset = dst - src;
- if (type == R_PPC_REL24 || type == R_PPC_PLTREL24)
+ if (type == R_PPC_LOCAL24PC || type == R_PPC_REL24 || type == R_PPC_PLTREL24)
return isInt<26>(offset);
llvm_unreachable("unsupported relocation type used in branch");
}
@@ -219,13 +230,13 @@ RelExpr PPC::getRelExpr(RelType type, const Symbol &s,
return R_DTPREL;
case R_PPC_REL14:
case R_PPC_REL32:
- case R_PPC_LOCAL24PC:
case R_PPC_REL16_LO:
case R_PPC_REL16_HI:
case R_PPC_REL16_HA:
return R_PC;
case R_PPC_GOT16:
return R_GOT_OFF;
+ case R_PPC_LOCAL24PC:
case R_PPC_REL24:
return R_PLT_PC;
case R_PPC_PLTREL24:
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index aab272f53a73..147c51ab285e 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -485,6 +485,14 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) {
p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr;
else if (config->relocatable && type != target->noneRel)
sec->relocations.push_back({R_ABS, type, rel.r_offset, addend, &sym});
+ } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 &&
+ p->r_addend >= 0x8000) {
+ // Similar to R_MIPS_GPREL{16,32}. If the addend of R_PPC_PLTREL24
+ // indicates that r30 is relative to the input section .got2
+ // (r_addend>=0x8000), after linking, r30 should be relative to the output
+ // section .got2 . To compensate for the shift, adjust r_addend by
+ // ppc32Got2OutSecOff.
+ p->r_addend += sec->file->ppc32Got2OutSecOff;
}
}
}
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index ced9991f2003..93ec06610716 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1198,10 +1198,16 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type,
getLocation(sec, sym, offset));
if (!sym.isInPlt())
addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym);
- if (!sym.isDefined())
+ if (!sym.isDefined()) {
replaceWithDefined(
sym, in.plt,
target->pltHeaderSize + target->pltEntrySize * sym.pltIndex, 0);
+ if (config->emachine == EM_PPC) {
+ // PPC32 canonical PLT entries are at the beginning of .glink
+ cast<Defined>(sym).value = in.plt->headerSize;
+ in.plt->headerSize += 16;
+ }
+ }
sym.needsPltAddr = true;
sec.relocations.push_back({expr, type, offset, addend, &sym});
return;
@@ -1298,10 +1304,10 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
if (expr == R_GOT_PC && !isAbsoluteValue(sym)) {
expr = target->adjustRelaxExpr(type, relocatedAddr, expr);
} else {
- // Addend of R_PPC_PLTREL24 is used to choose call stub type. It should be
- // ignored if optimized to R_PC.
+ // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call
+ // stub type. It should be ignored if optimized to R_PC.
if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL)
- addend = 0;
+ addend &= ~0x8000;
expr = fromPlt(expr);
}
}
@@ -1752,6 +1758,37 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *os,
uint64_t off) {
auto *ts = make<ThunkSection>(os, off);
ts->partition = os->partition;
+ if ((config->fixCortexA53Errata843419 || config->fixCortexA8) &&
+ !isd->sections.empty()) {
+ // The errata fixes are sensitive to addresses modulo 4 KiB. When we add
+ // thunks we disturb the base addresses of sections placed after the thunks
+ // this makes patches we have generated redundant, and may cause us to
+ // generate more patches as different instructions are now in sensitive
+ // locations. When we generate more patches we may force more branches to
+ // go out of range, causing more thunks to be generated. In pathological
+ // cases this can cause the address dependent content pass not to converge.
+ // We fix this by rounding up the size of the ThunkSection to 4KiB, this
+ // limits the insertion of a ThunkSection on the addresses modulo 4 KiB,
+ // which means that adding Thunks to the section does not invalidate
+ // errata patches for following code.
+ // Rounding up the size to 4KiB has consequences for code-size and can
+ // trip up linker script defined assertions. For example the linux kernel
+ // has an assertion that what LLD represents as an InputSectionDescription
+ // does not exceed 4 KiB even if the overall OutputSection is > 128 Mib.
+ // We use the heuristic of rounding up the size when both of the following
+ // conditions are true:
+ // 1.) The OutputSection is larger than the ThunkSectionSpacing. This
+ // accounts for the case where no single InputSectionDescription is
+ // larger than the OutputSection size. This is conservative but simple.
+ // 2.) The InputSectionDescription is larger than 4 KiB. This will prevent
+ // any assertion failures that an InputSectionDescription is < 4 KiB
+ // in size.
+ uint64_t isdSize = isd->sections.back()->outSecOff +
+ isd->sections.back()->getSize() -
+ isd->sections.front()->outSecOff;
+ if (os->size > target->getThunkSectionSpacing() && isdSize > 4096)
+ ts->roundUpSizeForErrata = true;
+ }
isd->thunkSections.push_back({ts, pass});
return ts;
}
@@ -1820,9 +1857,7 @@ bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) {
rel.sym->getVA(rel.addend) + getPCBias(rel.type)))
return true;
rel.sym = &t->destination;
- // TODO Restore addend on all targets.
- if (config->emachine == EM_AARCH64 || config->emachine == EM_PPC64)
- rel.addend = t->addend;
+ rel.addend = t->addend;
if (rel.sym->isInPlt())
rel.expr = toPlt(rel.expr);
}
@@ -1900,16 +1935,11 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) {
rel.sym = t->getThunkTargetSym();
rel.expr = fromPlt(rel.expr);
- // On AArch64 and PPC64, a jump/call relocation may be encoded as
+ // On AArch64 and PPC, a jump/call relocation may be encoded as
// STT_SECTION + non-zero addend, clear the addend after
// redirection.
- //
- // The addend of R_PPC_PLTREL24 should be ignored after changing to
- // R_PC.
- if (config->emachine == EM_AARCH64 ||
- config->emachine == EM_PPC64 ||
- (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24))
- rel.addend = 0;
+ if (config->emachine != EM_MIPS)
+ rel.addend = -getPCBias(rel.type);
}
for (auto &p : isd->thunkSections)
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 550a5b38b89b..ea6eab4b47ad 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2449,6 +2449,9 @@ PltSection::PltSection()
if (config->emachine == EM_PPC || config->emachine == EM_PPC64) {
name = ".glink";
alignment = 4;
+ // PLTresolve is at the end.
+ if (config->emachine == EM_PPC)
+ footerSize = 64;
}
// On x86 when IBT is enabled, this section contains the second PLT (lazy
@@ -2486,7 +2489,7 @@ void PltSection::addEntry(Symbol &sym) {
}
size_t PltSection::getSize() const {
- return headerSize + entries.size() * target->pltEntrySize;
+ return headerSize + entries.size() * target->pltEntrySize + footerSize;
}
bool PltSection::isNeeded() const {
@@ -3451,19 +3454,14 @@ bool ARMExidxSyntheticSection::classof(const SectionBase *d) {
}
ThunkSection::ThunkSection(OutputSection *os, uint64_t off)
- : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS,
- config->wordsize, ".text.thunk") {
+ : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4,
+ ".text.thunk") {
this->parent = os;
this->outSecOff = off;
}
-// When the errata patching is on, we round the size up to a 4 KiB
-// boundary. This limits the effect that adding Thunks has on the addresses
-// of the program modulo 4 KiB. As the errata patching is sensitive to address
-// modulo 4 KiB this can prevent further patches from being needed due to
-// Thunk insertion.
size_t ThunkSection::getSize() const {
- if (config->fixCortexA53Errata843419 || config->fixCortexA8)
+ if (roundUpSizeForErrata)
return alignTo(size, 4096);
return size;
}
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index f0a598dda51d..5f59178fb541 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -683,9 +683,9 @@ public:
void addEntry(Symbol &sym);
size_t getNumEntries() const { return entries.size(); }
- size_t headerSize = 0;
+ size_t headerSize;
+ size_t footerSize = 0;
-private:
std::vector<const Symbol *> entries;
};
@@ -1069,6 +1069,10 @@ public:
InputSection *getTargetInputSection() const;
bool assignOffsets();
+ // When true, round up reported size of section to 4 KiB. See comment
+ // in addThunkSection() for more details.
+ bool roundUpSizeForErrata = false;
+
private:
std::vector<Thunk *> thunks;
size_t size = 0;
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 7b927a434e36..f9c2e2d74e0a 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -245,8 +245,7 @@ public:
// decide the offsets in the call stub.
PPC32PltCallStub(const InputSection &isec, const Relocation &rel,
Symbol &dest)
- : Thunk(dest, rel.type == R_PPC_PLTREL24 ? rel.addend : 0),
- file(isec.file) {}
+ : Thunk(dest, rel.addend), file(isec.file) {}
uint32_t size() override { return 16; }
void writeTo(uint8_t *buf) override;
void addSymbols(ThunkSection &isec) override;
@@ -257,6 +256,14 @@ private:
const InputFile *file;
};
+class PPC32LongThunk final : public Thunk {
+public:
+ PPC32LongThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
+ uint32_t size() override { return config->isPic ? 32 : 16; }
+ void writeTo(uint8_t *buf) override;
+ void addSymbols(ThunkSection &isec) override;
+};
+
// PPC64 Plt call stubs.
// Any call site that needs to call through a plt entry needs a call stub in
// the .text section. The call stub is responsible for:
@@ -765,6 +772,33 @@ bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec,
return !config->isPic || (isec.file == file && rel.addend == addend);
}
+void PPC32LongThunk::addSymbols(ThunkSection &isec) {
+ addSymbol(saver.save("__LongThunk_" + destination.getName()), STT_FUNC, 0,
+ isec);
+}
+
+void PPC32LongThunk::writeTo(uint8_t *buf) {
+ auto ha = [](uint32_t v) -> uint16_t { return (v + 0x8000) >> 16; };
+ auto lo = [](uint32_t v) -> uint16_t { return v; };
+ uint32_t d = destination.getVA(addend);
+ if (config->isPic) {
+ uint32_t off = d - (getThunkTargetSym()->getVA() + 8);
+ write32(buf + 0, 0x7c0802a6); // mflr r12,0
+ write32(buf + 4, 0x429f0005); // bcl r20,r31,.+4
+ write32(buf + 8, 0x7d8802a6); // mtctr r12
+ write32(buf + 12, 0x3d8c0000 | ha(off)); // addis r12,r12,off@ha
+ write32(buf + 16, 0x398c0000 | lo(off)); // addi r12,r12,off@l
+ write32(buf + 20, 0x7c0803a6); // mtlr r0
+ buf += 24;
+ } else {
+ write32(buf + 0, 0x3d800000 | ha(d)); // lis r12,d@ha
+ write32(buf + 4, 0x398c0000 | lo(d)); // addi r12,r12,d@l
+ buf += 8;
+ }
+ write32(buf + 0, 0x7d8903a6); // mtctr r12
+ write32(buf + 4, 0x4e800420); // bctr
+}
+
void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset) {
uint16_t offHa = (offset + 0x8000) >> 16;
uint16_t offLo = offset & 0xffff;
@@ -902,9 +936,12 @@ static Thunk *addThunkMips(RelType type, Symbol &s) {
static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel,
Symbol &s) {
- assert((rel.type == R_PPC_REL24 || rel.type == R_PPC_PLTREL24) &&
+ assert((rel.type == R_PPC_LOCAL24PC || rel.type == R_PPC_REL24 ||
+ rel.type == R_PPC_PLTREL24) &&
"unexpected relocation type for thunk");
- return make<PPC32PltCallStub>(isec, rel, s);
+ if (s.isInPlt())
+ return make<PPC32PltCallStub>(isec, rel, s);
+ return make<PPC32LongThunk>(s, rel.addend);
}
static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index bc16417646c3..4e55f93882f1 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -30,6 +30,14 @@ ELF Improvements
with GNU now. (`r375051
<https://github.com/llvm/llvm-project/commit/48993d5ab9413f0e5b94dfa292a233ce55b09e3e>`_)
+* New ``elf32btsmipn32_fbsd`` and ``elf32ltsmipn32_fbsd`` emulations
+ are supported.
+
+* Relax MIPS ``jalr``and ``jr`` instructions marked by the ``R_MIPS_JALR``
+ relocation.
+
+* Reduced size of linked MIPS binaries.
+
COFF Improvements
-----------------
@@ -38,7 +46,33 @@ COFF Improvements
MinGW Improvements
------------------
-* ...
+* Allow using custom .edata sections from input object files (for use
+ by Wine)
+ (`dadc6f248868 <https://reviews.llvm.org/rGdadc6f248868>`)
+
+* Don't implicitly create import libraries unless requested
+ (`6540e55067e3 <https://reviews.llvm.org/rG6540e55067e3>`)
+
+* Support merging multiple resource object files
+ (`3d3a9b3b413d <https://reviews.llvm.org/rG3d3a9b3b413d>`)
+ and properly handle the default manifest object files that GCC can pass
+ (`d581dd501381 <https://reviews.llvm.org/rGd581dd501381>`)
+
+* Demangle itanium symbol names in warnings/error messages
+ (`a66fc1c99f3e <https://reviews.llvm.org/rGa66fc1c99f3e>`)
+
+* Print source locations for undefined references and duplicate symbols,
+ if possible
+ (`1d06d48bb346 <https://reviews.llvm.org/rG1d06d48bb346>`)
+ and
+ (`b38f577c015c <https://reviews.llvm.org/rGb38f577c015c>`)
+
+* Look for more filename patterns when resolving ``-l`` options
+ (`0226c35262df <https://reviews.llvm.org/rG0226c35262df>`)
+
+* Don't error out on duplicate absolute symbols with the same value
+ (which can happen for the default-null symbol for weak symbols)
+ (`1737cc750c46 <https://reviews.llvm.org/rG1737cc750c46>`)
MachO Improvements
------------------
diff --git a/lldb/source/DataFormatters/FormatCache.cpp b/lldb/source/DataFormatters/FormatCache.cpp
index 231e7ed0c0a0..99f140705446 100644
--- a/lldb/source/DataFormatters/FormatCache.cpp
+++ b/lldb/source/DataFormatters/FormatCache.cpp
@@ -69,6 +69,8 @@ FormatCache::Entry &FormatCache::GetEntry(ConstString type) {
return m_map[type];
}
+namespace lldb_private {
+
template<> bool FormatCache::Entry::IsCached<lldb::TypeFormatImplSP>() {
return IsFormatCached();
}
@@ -79,6 +81,8 @@ template<> bool FormatCache::Entry::IsCached<lldb::SyntheticChildrenSP>() {
return IsSyntheticCached();
}
+} // namespace lldb_private
+
template <typename ImplSP>
bool FormatCache::Get(ConstString type, ImplSP &format_impl_sp) {
std::lock_guard<std::recursive_mutex> guard(m_mutex);
diff --git a/lldb/source/DataFormatters/LanguageCategory.cpp b/lldb/source/DataFormatters/LanguageCategory.cpp
index e18ec0feaa8b..daf8c7af7d1a 100644
--- a/lldb/source/DataFormatters/LanguageCategory.cpp
+++ b/lldb/source/DataFormatters/LanguageCategory.cpp
@@ -55,6 +55,8 @@ bool LanguageCategory::Get(FormattersMatchData &match_data,
return result;
}
+namespace lldb_private {
+
/// Explicit instantiations for the three types.
/// \{
template bool
@@ -83,6 +85,8 @@ auto &LanguageCategory::GetHardcodedFinder<lldb::SyntheticChildrenSP>() {
return m_hardcoded_synthetics;
}
+} // namespace lldb_private
+
template <typename ImplSP>
bool LanguageCategory::GetHardcoded(FormatManager &fmt_mgr,
FormattersMatchData &match_data,
diff --git a/lldb/source/Interpreter/CommandAlias.cpp b/lldb/source/Interpreter/CommandAlias.cpp
index 5139c53a47b3..5209a7bcbc4e 100644
--- a/lldb/source/Interpreter/CommandAlias.cpp
+++ b/lldb/source/Interpreter/CommandAlias.cpp
@@ -65,7 +65,8 @@ static bool ProcessAliasOptionsArgs(lldb::CommandObjectSP &cmd_obj_sp,
else {
for (auto &entry : args.entries()) {
if (!entry.ref().empty())
- option_arg_vector->emplace_back("<argument>", -1, entry.ref());
+ option_arg_vector->emplace_back(std::string("<argument>"), -1,
+ std::string(entry.ref()));
}
}
}
diff --git a/lldb/source/Interpreter/Options.cpp b/lldb/source/Interpreter/Options.cpp
index 0bceea14269d..80e9d3a6fc15 100644
--- a/lldb/source/Interpreter/Options.cpp
+++ b/lldb/source/Interpreter/Options.cpp
@@ -1061,8 +1061,8 @@ llvm::Expected<Args> Options::ParseAlias(const Args &args,
}
if (!option_arg)
option_arg = "<no-argument>";
- option_arg_vector->emplace_back(option_str.GetString(), has_arg,
- option_arg);
+ option_arg_vector->emplace_back(std::string(option_str.GetString()),
+ has_arg, std::string(option_arg));
// Find option in the argument list; also see if it was supposed to take an
// argument and if one was supplied. Remove option (and argument, if
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 232063a6f339..6166aa77bda4 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -85,35 +85,6 @@ static bool DeclKindIsCXXClass(clang::Decl::Kind decl_kind) {
return false;
}
-struct BitfieldInfo {
- uint64_t bit_size;
- uint64_t bit_offset;
-
- BitfieldInfo()
- : bit_size(LLDB_INVALID_ADDRESS), bit_offset(LLDB_INVALID_ADDRESS) {}
-
- void Clear() {
- bit_size = LLDB_INVALID_ADDRESS;
- bit_offset = LLDB_INVALID_ADDRESS;
- }
-
- bool IsValid() const {
- return (bit_size != LLDB_INVALID_ADDRESS) &&
- (bit_offset != LLDB_INVALID_ADDRESS);
- }
-
- bool NextBitfieldOffsetIsValid(const uint64_t next_bit_offset) const {
- if (IsValid()) {
- // This bitfield info is valid, so any subsequent bitfields must not
- // overlap and must be at a higher bit offset than any previous bitfield
- // + size.
- return (bit_size + bit_offset) <= next_bit_offset;
- } else {
- // If the this BitfieldInfo is not valid, then any offset isOK
- return true;
- }
- }
-};
ClangASTImporter &DWARFASTParserClang::GetClangASTImporter() {
if (!m_clang_ast_importer_up) {
@@ -2419,7 +2390,7 @@ void DWARFASTParserClang::ParseSingleMember(
lldb::AccessType &default_accessibility,
DelayedPropertyList &delayed_properties,
lldb_private::ClangASTImporter::LayoutInfo &layout_info,
- BitfieldInfo &last_field_info) {
+ FieldInfo &last_field_info) {
ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule();
const dw_tag_t tag = die.Tag();
// Get the parent byte size so we can verify any members will fit
@@ -2453,6 +2424,14 @@ void DWARFASTParserClang::ParseSingleMember(
const dw_attr_t attr = attributes.AttributeAtIndex(i);
DWARFFormValue form_value;
if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+ // DW_AT_data_member_location indicates the byte offset of the
+ // word from the base address of the structure.
+ //
+ // DW_AT_bit_offset indicates how many bits into the word
+ // (according to the host endianness) the low-order bit of the
+ // field starts. AT_bit_offset can be negative.
+ //
+ // DW_AT_bit_size indicates the size of the field in bits.
switch (attr) {
case DW_AT_name:
name = form_value.AsCString();
@@ -2603,36 +2582,24 @@ void DWARFASTParserClang::ParseSingleMember(
Type *member_type = die.ResolveTypeUID(encoding_form.Reference());
clang::FieldDecl *field_decl = nullptr;
+ const uint64_t character_width = 8;
+ const uint64_t word_width = 32;
if (tag == DW_TAG_member) {
if (member_type) {
+ CompilerType member_clang_type = member_type->GetLayoutCompilerType();
+
if (accessibility == eAccessNone)
accessibility = default_accessibility;
member_accessibilities.push_back(accessibility);
uint64_t field_bit_offset =
(member_byte_offset == UINT32_MAX ? 0 : (member_byte_offset * 8));
- if (bit_size > 0) {
- BitfieldInfo this_field_info;
+ if (bit_size > 0) {
+ FieldInfo this_field_info;
this_field_info.bit_offset = field_bit_offset;
this_field_info.bit_size = bit_size;
- /////////////////////////////////////////////////////////////
- // How to locate a field given the DWARF debug information
- //
- // AT_byte_size indicates the size of the word in which the bit
- // offset must be interpreted.
- //
- // AT_data_member_location indicates the byte offset of the
- // word from the base address of the structure.
- //
- // AT_bit_offset indicates how many bits into the word
- // (according to the host endianness) the low-order bit of the
- // field starts. AT_bit_offset can be negative.
- //
- // AT_bit_size indicates the size of the field in bits.
- /////////////////////////////////////////////////////////////
-
if (data_bit_offset != UINT64_MAX) {
this_field_info.bit_offset = data_bit_offset;
} else {
@@ -2649,8 +2616,9 @@ void DWARFASTParserClang::ParseSingleMember(
}
if ((this_field_info.bit_offset >= parent_bit_size) ||
- !last_field_info.NextBitfieldOffsetIsValid(
- this_field_info.bit_offset)) {
+ (last_field_info.IsBitfield() &&
+ !last_field_info.NextBitfieldOffsetIsValid(
+ this_field_info.bit_offset))) {
ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
objfile->GetModule()->ReportWarning(
"0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid "
@@ -2659,40 +2627,12 @@ void DWARFASTParserClang::ParseSingleMember(
"compiler and include the preprocessed output for %s\n",
die.GetID(), DW_TAG_value_to_name(tag), name,
this_field_info.bit_offset, GetUnitName(parent_die).c_str());
- this_field_info.Clear();
return;
}
// Update the field bit offset we will report for layout
field_bit_offset = this_field_info.bit_offset;
- // If the member to be emitted did not start on a character
- // boundary and there is empty space between the last field and
- // this one, then we need to emit an anonymous member filling
- // up the space up to its start. There are three cases here:
- //
- // 1 If the previous member ended on a character boundary, then
- // we can emit an
- // anonymous member starting at the most recent character
- // boundary.
- //
- // 2 If the previous member did not end on a character boundary
- // and the distance
- // from the end of the previous member to the current member
- // is less than a
- // word width, then we can emit an anonymous member starting
- // right after the
- // previous member and right before this member.
- //
- // 3 If the previous member did not end on a character boundary
- // and the distance
- // from the end of the previous member to the current member
- // is greater than
- // or equal a word width, then we act as in Case 1.
-
- const uint64_t character_width = 8;
- const uint64_t word_width = 32;
-
// Objective-C has invalid DW_AT_bit_offset values in older
// versions of clang, so we have to be careful and only insert
// unnamed bitfields if we have a new enough clang.
@@ -2704,53 +2644,57 @@ void DWARFASTParserClang::ParseSingleMember(
die.GetCU()->Supports_unnamed_objc_bitfields();
if (detect_unnamed_bitfields) {
- BitfieldInfo anon_field_info;
-
- if ((this_field_info.bit_offset % character_width) !=
- 0) // not char aligned
- {
- uint64_t last_field_end = 0;
-
- if (last_field_info.IsValid())
- last_field_end =
- last_field_info.bit_offset + last_field_info.bit_size;
-
- if (this_field_info.bit_offset != last_field_end) {
- if (((last_field_end % character_width) == 0) || // case 1
- (this_field_info.bit_offset - last_field_end >=
- word_width)) // case 3
- {
- anon_field_info.bit_size =
- this_field_info.bit_offset % character_width;
- anon_field_info.bit_offset =
- this_field_info.bit_offset - anon_field_info.bit_size;
- } else // case 2
- {
- anon_field_info.bit_size =
- this_field_info.bit_offset - last_field_end;
- anon_field_info.bit_offset = last_field_end;
- }
- }
+ clang::Optional<FieldInfo> unnamed_field_info;
+ uint64_t last_field_end = 0;
+
+ last_field_end =
+ last_field_info.bit_offset + last_field_info.bit_size;
+
+ if (!last_field_info.IsBitfield()) {
+ // The last field was not a bit-field...
+ // but if it did take up the entire word then we need to extend
+ // last_field_end so the bit-field does not step into the last
+ // fields padding.
+ if (last_field_end != 0 && ((last_field_end % word_width) != 0))
+ last_field_end += word_width - (last_field_end % word_width);
}
- if (anon_field_info.IsValid()) {
+ // If we have a gap between the last_field_end and the current
+ // field we have an unnamed bit-field
+ if (this_field_info.bit_offset != last_field_end &&
+ !(this_field_info.bit_offset < last_field_end)) {
+ unnamed_field_info = FieldInfo{};
+ unnamed_field_info->bit_size =
+ this_field_info.bit_offset - last_field_end;
+ unnamed_field_info->bit_offset = last_field_end;
+ }
+
+ if (unnamed_field_info) {
clang::FieldDecl *unnamed_bitfield_decl =
ClangASTContext::AddFieldToRecordType(
class_clang_type, llvm::StringRef(),
m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint,
word_width),
- accessibility, anon_field_info.bit_size);
+ accessibility, unnamed_field_info->bit_size);
layout_info.field_offsets.insert(std::make_pair(
- unnamed_bitfield_decl, anon_field_info.bit_offset));
+ unnamed_bitfield_decl, unnamed_field_info->bit_offset));
}
}
+
last_field_info = this_field_info;
+ last_field_info.SetIsBitfield(true);
} else {
- last_field_info.Clear();
+ last_field_info.bit_offset = field_bit_offset;
+
+ if (llvm::Optional<uint64_t> clang_type_size =
+ member_clang_type.GetByteSize(nullptr)) {
+ last_field_info.bit_size = *clang_type_size * character_width;
+ }
+
+ last_field_info.SetIsBitfield(false);
}
- CompilerType member_clang_type = member_type->GetLayoutCompilerType();
if (!member_clang_type.IsCompleteType())
member_clang_type.GetCompleteType();
@@ -2885,7 +2829,7 @@ bool DWARFASTParserClang::ParseChildMembers(
if (!parent_die)
return false;
- BitfieldInfo last_field_info;
+ FieldInfo last_field_info;
ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule();
ClangASTContext *ast =
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
index 4ad757247c3e..8a78299c8b10 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
@@ -170,33 +170,20 @@ protected:
lldb::ModuleSP GetModuleForType(const DWARFDIE &die);
private:
- struct BitfieldInfo {
- uint64_t bit_size;
- uint64_t bit_offset;
+ struct FieldInfo {
+ uint64_t bit_size = 0;
+ uint64_t bit_offset = 0;
+ bool is_bitfield = false;
- BitfieldInfo()
- : bit_size(LLDB_INVALID_ADDRESS), bit_offset(LLDB_INVALID_ADDRESS) {}
+ FieldInfo() = default;
- void Clear() {
- bit_size = LLDB_INVALID_ADDRESS;
- bit_offset = LLDB_INVALID_ADDRESS;
- }
-
- bool IsValid() const {
- return (bit_size != LLDB_INVALID_ADDRESS) &&
- (bit_offset != LLDB_INVALID_ADDRESS);
- }
+ void SetIsBitfield(bool flag) { is_bitfield = flag; }
+ bool IsBitfield() { return is_bitfield; }
bool NextBitfieldOffsetIsValid(const uint64_t next_bit_offset) const {
- if (IsValid()) {
- // This bitfield info is valid, so any subsequent bitfields must not
- // overlap and must be at a higher bit offset than any previous bitfield
- // + size.
- return (bit_size + bit_offset) <= next_bit_offset;
- } else {
- // If the this BitfieldInfo is not valid, then any offset isOK
- return true;
- }
+ // Any subsequent bitfields must not overlap and must be at a higher
+ // bit offset than any previous bitfield + size.
+ return (bit_size + bit_offset) <= next_bit_offset;
}
};
@@ -208,7 +195,7 @@ private:
lldb::AccessType &default_accessibility,
DelayedPropertyList &delayed_properties,
lldb_private::ClangASTImporter::LayoutInfo &layout_info,
- BitfieldInfo &last_field_info);
+ FieldInfo &last_field_info);
bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type,
lldb_private::CompilerType &clang_type);
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 9bfaaccd953e..f06d18720c3a 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -77,7 +77,8 @@ namespace llvm {
static constexpr size_t strLen(const char *Str) {
#if __cplusplus > 201402L
return std::char_traits<char>::length(Str);
-#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || defined(_MSC_VER)
+#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \
+ (defined(_MSC_VER) && _MSC_VER >= 1916)
return __builtin_strlen(Str);
#else
const char *Begin = Str;
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index a860ce2773e1..c710c5d7055c 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -135,7 +135,6 @@ public:
MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
private:
- MCSymbol *CurrentFnBegin = nullptr;
MCSymbol *CurrentFnEnd = nullptr;
MCSymbol *CurExceptionSym = nullptr;
@@ -148,6 +147,8 @@ private:
static char ID;
protected:
+ MCSymbol *CurrentFnBegin = nullptr;
+
/// Protected struct HandlerInfo and Handlers permit target extended
/// AsmPrinter adds their own handlers.
struct HandlerInfo {
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
index eb6d84e8cbb4..218afda1b546 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
@@ -13,7 +13,9 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H
#define LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/ExecutionEngine/Orc/Layer.h"
#include <memory>
namespace llvm {
@@ -28,24 +30,31 @@ namespace orc {
class JITTargetMachineBuilder;
+IRMaterializationUnit::ManglingOptions
+irManglingOptionsFromTargetOptions(const TargetOptions &Opts);
+
/// Simple compile functor: Takes a single IR module and returns an ObjectFile.
/// This compiler supports a single compilation thread and LLVMContext only.
/// For multithreaded compilation, use ConcurrentIRCompiler below.
-class SimpleCompiler {
+class SimpleCompiler : public IRCompileLayer::IRCompiler {
public:
using CompileResult = std::unique_ptr<MemoryBuffer>;
/// Construct a simple compile functor with the given target.
SimpleCompiler(TargetMachine &TM, ObjectCache *ObjCache = nullptr)
- : TM(TM), ObjCache(ObjCache) {}
+ : IRCompiler(irManglingOptionsFromTargetOptions(TM.Options)), TM(TM),
+ ObjCache(ObjCache) {}
/// Set an ObjectCache to query before compiling.
void setObjectCache(ObjectCache *NewCache) { ObjCache = NewCache; }
/// Compile a Module to an ObjectFile.
- CompileResult operator()(Module &M);
+ Expected<CompileResult> operator()(Module &M) override;
private:
+ IRMaterializationUnit::ManglingOptions
+ manglingOptionsForTargetMachine(const TargetMachine &TM);
+
CompileResult tryToLoadFromObjectCache(const Module &M);
void notifyObjectCompiled(const Module &M, const MemoryBuffer &ObjBuffer);
@@ -73,14 +82,14 @@ private:
///
/// This class creates a new TargetMachine and SimpleCompiler instance for each
/// compile.
-class ConcurrentIRCompiler {
+class ConcurrentIRCompiler : public IRCompileLayer::IRCompiler {
public:
ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
ObjectCache *ObjCache = nullptr);
void setObjectCache(ObjectCache *ObjCache) { this->ObjCache = ObjCache; }
- std::unique_ptr<MemoryBuffer> operator()(Module &M);
+ Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) override;
private:
JITTargetMachineBuilder JTMB;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index d0a9ca5c0580..ecba454887b3 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -489,13 +489,18 @@ public:
/// is guaranteed to return Error::success() and can be wrapped with cantFail.
Error notifyEmitted();
- /// Adds new symbols to the JITDylib and this responsibility instance.
- /// JITDylib entries start out in the materializing state.
+ /// Attempt to claim responsibility for new definitions. This method can be
+ /// used to claim responsibility for symbols that are added to a
+ /// materialization unit during the compilation process (e.g. literal pool
+ /// symbols). Symbol linkage rules are the same as for symbols that are
+ /// defined up front: duplicate strong definitions will result in errors.
+ /// Duplicate weak definitions will be discarded (in which case they will
+ /// not be added to this responsibility instance).
///
/// This method can be used by materialization units that want to add
/// additional symbols at materialization time (e.g. stubs, compile
/// callbacks, metadata).
- Error defineMaterializing(const SymbolFlagsMap &SymbolFlags);
+ Error defineMaterializing(SymbolFlagsMap SymbolFlags);
/// Notify all not-yet-emitted covered by this MaterializationResponsibility
/// instance that an error has occurred.
@@ -1023,7 +1028,7 @@ private:
const SymbolStringPtr &DependantName,
MaterializingInfo &EmittedMI);
- Error defineMaterializing(const SymbolFlagsMap &SymbolFlags);
+ Expected<SymbolFlagsMap> defineMaterializing(SymbolFlagsMap SymbolFlags);
void replace(std::unique_ptr<MaterializationUnit> MU);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index 52223a83ad42..bb8270fe80a3 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -29,14 +29,29 @@ namespace orc {
class IRCompileLayer : public IRLayer {
public:
- using CompileFunction =
- std::function<Expected<std::unique_ptr<MemoryBuffer>>(Module &)>;
+ class IRCompiler {
+ public:
+ IRCompiler(IRMaterializationUnit::ManglingOptions MO) : MO(std::move(MO)) {}
+ virtual ~IRCompiler();
+ const IRMaterializationUnit::ManglingOptions &getManglingOptions() const {
+ return MO;
+ }
+ virtual Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) = 0;
+
+ protected:
+ IRMaterializationUnit::ManglingOptions &manglingOptions() { return MO; }
+
+ private:
+ IRMaterializationUnit::ManglingOptions MO;
+ };
using NotifyCompiledFunction =
std::function<void(VModuleKey K, ThreadSafeModule TSM)>;
IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
- CompileFunction Compile);
+ std::unique_ptr<IRCompiler> Compile);
+
+ IRCompiler &getCompiler() { return *Compile; }
void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled);
@@ -45,7 +60,8 @@ public:
private:
mutable std::mutex IRLayerMutex;
ObjectLayer &BaseLayer;
- CompileFunction Compile;
+ std::unique_ptr<IRCompiler> Compile;
+ const IRMaterializationUnit::ManglingOptions *ManglingOpts;
NotifyCompiledFunction NotifyCompiled = NotifyCompiledFunction();
};
@@ -90,7 +106,10 @@ public:
/// Compile the module, and add the resulting object to the base layer
/// along with the given memory manager and symbol resolver.
Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
- if (auto Err = BaseLayer.addObject(std::move(K), Compile(*M)))
+ auto Obj = Compile(*M);
+ if (!Obj)
+ return Obj.takeError();
+ if (auto Err = BaseLayer.addObject(std::move(K), std::move(*Obj)))
return Err;
if (NotifyCompiled)
NotifyCompiled(std::move(K), std::move(M));
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index c048ff3d5522..8e4760024aa8 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -124,7 +124,7 @@ protected:
static std::unique_ptr<ObjectLayer>
createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES);
- static Expected<IRCompileLayer::CompileFunction>
+ static Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>
createCompileFunction(LLJITBuilderState &S, JITTargetMachineBuilder JTMB);
/// Create an LLJIT instance with a single compile thread.
@@ -192,7 +192,7 @@ public:
ExecutionSession &, const Triple &TT)>;
using CompileFunctionCreator =
- std::function<Expected<IRCompileLayer::CompileFunction>(
+ std::function<Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>(
JITTargetMachineBuilder JTMB)>;
std::unique_ptr<ExecutionSession> ES;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
index 8f9bd704395e..95e32b2431a0 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
@@ -21,15 +21,62 @@
namespace llvm {
namespace orc {
+/// IRMaterializationUnit is a convenient base class for MaterializationUnits
+/// wrapping LLVM IR. Represents materialization responsibility for all symbols
+/// in the given module. If symbols are overridden by other definitions, then
+/// their linkage is changed to available-externally.
+class IRMaterializationUnit : public MaterializationUnit {
+public:
+ struct ManglingOptions {
+ bool EmulatedTLS = false;
+ };
+
+ using SymbolNameToDefinitionMap = std::map<SymbolStringPtr, GlobalValue *>;
+
+ /// Create an IRMaterializationLayer. Scans the module to build the
+ /// SymbolFlags and SymbolToDefinition maps.
+ IRMaterializationUnit(ExecutionSession &ES, const ManglingOptions &MO,
+ ThreadSafeModule TSM, VModuleKey K);
+
+ /// Create an IRMaterializationLayer from a module, and pre-existing
+ /// SymbolFlags and SymbolToDefinition maps. The maps must provide
+ /// entries for each definition in M.
+ /// This constructor is useful for delegating work from one
+ /// IRMaterializationUnit to another.
+ IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K,
+ SymbolFlagsMap SymbolFlags,
+ SymbolNameToDefinitionMap SymbolToDefinition);
+
+ /// Return the ModuleIdentifier as the name for this MaterializationUnit.
+ StringRef getName() const override;
+
+ const ThreadSafeModule &getModule() const { return TSM; }
+
+protected:
+ ThreadSafeModule TSM;
+ SymbolNameToDefinitionMap SymbolToDefinition;
+
+private:
+ void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
+};
+
/// Interface for layers that accept LLVM IR.
class IRLayer {
public:
- IRLayer(ExecutionSession &ES);
+ IRLayer(ExecutionSession &ES,
+ const IRMaterializationUnit::ManglingOptions *&MO)
+ : ES(ES), MO(MO) {}
+
virtual ~IRLayer();
/// Returns the ExecutionSession for this layer.
ExecutionSession &getExecutionSession() { return ES; }
+ /// Get the mangling options for this layer.
+ const IRMaterializationUnit::ManglingOptions *&getManglingOptions() const {
+ return MO;
+ }
+
/// Sets the CloneToNewContextOnEmit flag (false by default).
///
/// When set, IR modules added to this layer will be cloned on to a new
@@ -57,49 +104,15 @@ public:
private:
bool CloneToNewContextOnEmit = false;
ExecutionSession &ES;
-};
-
-/// IRMaterializationUnit is a convenient base class for MaterializationUnits
-/// wrapping LLVM IR. Represents materialization responsibility for all symbols
-/// in the given module. If symbols are overridden by other definitions, then
-/// their linkage is changed to available-externally.
-class IRMaterializationUnit : public MaterializationUnit {
-public:
- using SymbolNameToDefinitionMap = std::map<SymbolStringPtr, GlobalValue *>;
-
- /// Create an IRMaterializationLayer. Scans the module to build the
- /// SymbolFlags and SymbolToDefinition maps.
- IRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM,
- VModuleKey K);
-
- /// Create an IRMaterializationLayer from a module, and pre-existing
- /// SymbolFlags and SymbolToDefinition maps. The maps must provide
- /// entries for each definition in M.
- /// This constructor is useful for delegating work from one
- /// IRMaterializationUnit to another.
- IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K,
- SymbolFlagsMap SymbolFlags,
- SymbolNameToDefinitionMap SymbolToDefinition);
-
- /// Return the ModuleIdentifier as the name for this MaterializationUnit.
- StringRef getName() const override;
-
- const ThreadSafeModule &getModule() const { return TSM; }
-
-protected:
- ThreadSafeModule TSM;
- SymbolNameToDefinitionMap SymbolToDefinition;
-
-private:
- void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
+ const IRMaterializationUnit::ManglingOptions *&MO;
};
/// MaterializationUnit that materializes modules by calling the 'emit' method
/// on the given IRLayer.
class BasicIRLayerMaterializationUnit : public IRMaterializationUnit {
public:
- BasicIRLayerMaterializationUnit(IRLayer &L, VModuleKey K,
- ThreadSafeModule TSM);
+ BasicIRLayerMaterializationUnit(IRLayer &L, const ManglingOptions &MO,
+ ThreadSafeModule TSM, VModuleKey K);
private:
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
index f6b86bb23167..97a3dc365457 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
@@ -182,8 +182,8 @@ public:
IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer,
Speculator &Spec, MangleAndInterner &Mangle,
ResultEval Interpreter)
- : IRLayer(ES), NextLayer(BaseLayer), S(Spec), Mangle(Mangle),
- QueryAnalysis(Interpreter) {}
+ : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer),
+ S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {}
void emit(MaterializationResponsibility R, ThreadSafeModule TSM);
diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h
index 9522c4742244..61a1bd405a4d 100644
--- a/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -99,7 +99,8 @@ public:
/// Explicitly trigger a crash recovery in the current process, and
/// return failure from RunSafely(). This function does not return.
- void HandleCrash();
+ LLVM_ATTRIBUTE_NORETURN
+ void HandleExit(int RetCode);
/// In case of a crash, this is the crash identifier.
int RetCode = 0;
diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h
index 67e37912519b..e934b7413c17 100644
--- a/llvm/include/llvm/Support/Process.h
+++ b/llvm/include/llvm/Support/Process.h
@@ -201,6 +201,12 @@ public:
/// Get the result of a process wide random number generator. The
/// generator will be automatically seeded in non-deterministic fashion.
static unsigned GetRandomNumber();
+
+ /// Equivalent to ::exit(), except when running inside a CrashRecoveryContext.
+ /// In that case, the control flow will resume after RunSafely(), like for a
+ /// crash, rather than exiting the current process.
+ LLVM_ATTRIBUTE_NORETURN
+ static void Exit(int RetCode);
};
}
diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 63ff00afc2ae..ababa1d61f66 100644
--- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -62,6 +62,8 @@ public:
typedef std::function<void(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM)>
ExtensionFn;
+ typedef int GlobalExtensionID;
+
enum ExtensionPointTy {
/// EP_EarlyAsPossible - This extension point allows adding passes before
/// any other transformations, allowing them to see the code as it is coming
@@ -193,7 +195,17 @@ public:
/// Adds an extension that will be used by all PassManagerBuilder instances.
/// This is intended to be used by plugins, to register a set of
/// optimisations to run automatically.
- static void addGlobalExtension(ExtensionPointTy Ty, ExtensionFn Fn);
+ ///
+ /// \returns A global extension identifier that can be used to remove the
+ /// extension.
+ static GlobalExtensionID addGlobalExtension(ExtensionPointTy Ty,
+ ExtensionFn Fn);
+ /// Removes an extension that was previously added using addGlobalExtension.
+ /// This is also intended to be used by plugins, to remove any extension that
+ /// was previously registered before being unloaded.
+ ///
+ /// \param ExtensionID Identifier of the extension to be removed.
+ static void removeGlobalExtension(GlobalExtensionID ExtensionID);
void addExtension(ExtensionPointTy Ty, ExtensionFn Fn);
private:
@@ -222,10 +234,20 @@ public:
/// used by optimizer plugins to allow all front ends to transparently use
/// them. Create a static instance of this class in your plugin, providing a
/// private function that the PassManagerBuilder can use to add your passes.
-struct RegisterStandardPasses {
+class RegisterStandardPasses {
+ PassManagerBuilder::GlobalExtensionID ExtensionID;
+
+public:
RegisterStandardPasses(PassManagerBuilder::ExtensionPointTy Ty,
PassManagerBuilder::ExtensionFn Fn) {
- PassManagerBuilder::addGlobalExtension(Ty, std::move(Fn));
+ ExtensionID = PassManagerBuilder::addGlobalExtension(Ty, std::move(Fn));
+ }
+
+ ~RegisterStandardPasses() {
+ // If the collection holding the global extensions is destroyed after the
+ // plugin is unloaded, the extension has to be removed here. Indeed, the
+ // destructor of the ExtensionFn may reference code in the plugin.
+ PassManagerBuilder::removeGlobalExtension(ExtensionID);
}
};
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3516f4a7b370..20cd9da31fbd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -709,15 +709,21 @@ void AsmPrinter::EmitFunctionHeader() {
// Emit M NOPs for -fpatchable-function-entry=N,M where M>0. We arbitrarily
// place prefix data before NOPs.
unsigned PatchableFunctionPrefix = 0;
+ unsigned PatchableFunctionEntry = 0;
(void)F.getFnAttribute("patchable-function-prefix")
.getValueAsString()
.getAsInteger(10, PatchableFunctionPrefix);
+ (void)F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, PatchableFunctionEntry);
if (PatchableFunctionPrefix) {
CurrentPatchableFunctionEntrySym =
OutContext.createLinkerPrivateTempSymbol();
OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
emitNops(PatchableFunctionPrefix);
- } else {
+ } else if (PatchableFunctionEntry) {
+ // May be reassigned when emitting the body, to reference the label after
+ // the initial BTI (AArch64) or endbr32/endbr64 (x86).
CurrentPatchableFunctionEntrySym = CurrentFnBegin;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 38011102c7b3..e97bcd62e8c7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -968,8 +968,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
MachineLocation(CallReg));
} else {
- DIE *CalleeDIE = getDIE(CalleeSP);
- assert(CalleeDIE && "Could not find DIE for call site entry origin");
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
*CalleeDIE);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index fa6800de7955..6e643ad26410 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -540,14 +540,6 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
-DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
- DICompileUnit *Unit = SP->getUnit();
- assert(SP->isDefinition() && "Subprogram not a definition");
- assert(Unit && "Subprogram definition without parent unit");
- auto &CU = getOrCreateDwarfCompileUnit(Unit);
- return *CU.getOrCreateSubprogramDIE(SP);
-}
-
/// Try to interpret values loaded into registers that forward parameters
/// for \p CallMI. Store parameters with interpreted value into \p Params.
static void collectCallSiteParameters(const MachineInstr *CallMI,
@@ -758,17 +750,6 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
if (!CalleeDecl || !CalleeDecl->getSubprogram())
continue;
CalleeSP = CalleeDecl->getSubprogram();
-
- if (CalleeSP->isDefinition()) {
- // Ensure that a subprogram DIE for the callee is available in the
- // appropriate CU.
- constructSubprogramDefinitionDIE(CalleeSP);
- } else {
- // Create the declaration DIE if it is missing. This is required to
- // support compilation of old bitcode with an incomplete list of
- // retained metadata.
- CU.getOrCreateSubprogramDIE(CalleeSP);
- }
}
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
@@ -924,6 +905,11 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
}
+ // Create DIEs for function declarations used for call site debug info.
+ for (auto Scope : DIUnit->getRetainedTypes())
+ if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
+ NewCU.getOrCreateSubprogramDIE(SP);
+
CUMap.insert({DIUnit, &NewCU});
CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index fd82b1f98055..f90dd48458ea 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -442,9 +442,6 @@ class DwarfDebug : public DebugHandlerBase {
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
- /// Construct a DIE for the subprogram definition \p SP and return it.
- DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
-
/// Construct DIEs for call site entries describing the calls in \p MF.
void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
DIE &ScopeDIE, const MachineFunction &MF);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 1aba956c48de..53747aef77fd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -188,9 +188,8 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
/// Check whether the DIE for this MDNode can be shared across CUs.
bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
- // When the MDNode can be part of the type system (this includes subprogram
- // declarations *and* subprogram definitions, even local definitions), the
- // DIE must be shared across CUs.
+ // When the MDNode can be part of the type system, the DIE can be shared
+ // across CUs.
// Combining type units and cross-CU DIE sharing is lower value (since
// cross-CU DIE sharing is used in LTO and removes type redundancy at that
// level already) but may be implementable for some value in projects
@@ -198,7 +197,9 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
// together.
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return false;
- return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
+ return (isa<DIType>(D) ||
+ (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
+ !DD->generateTypeUnits();
}
DIE *DwarfUnit::getDIE(const DINode *D) const {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 003db39fe5f9..7d77664fbf69 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6857,12 +6857,20 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
Value *Addr = Builder.CreateBitCast(
SI.getOperand(1),
SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
- if ((IsLE && Upper) || (!IsLE && !Upper))
+ const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
+ if (IsOffsetStore)
Addr = Builder.CreateGEP(
SplitStoreType, Addr,
ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
+ MaybeAlign Alignment(SI.getAlignment());
+ if (IsOffsetStore && Alignment) {
+ // When splitting the store in half, naturally one half will retain the
+ // alignment of the original wider store, regardless of whether it was
+ // over-aligned or not, while the other will require adjustment.
+ Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
+ }
Builder.CreateAlignedStore(
- V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
+ V, Addr, Alignment.hasValue() ? Alignment.getValue().value() : 0);
};
CreateSplitStore(LValue, false);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 17eca2b0301c..96e794b15a44 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1385,7 +1385,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
- MIRBuilder.buildDirectDbgValue(0, DI.getVariable(), DI.getExpression());
+ MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
} else if (const auto *CI = dyn_cast<Constant>(V)) {
MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
} else {
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 67d9dacda61b..3f6622723bdc 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -107,13 +107,9 @@ MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
- // DBG_VALUE insts now carry IR-level indirection in their DIExpression
- // rather than encoding it in the instruction itself.
- const DIExpression *DIExpr = cast<DIExpression>(Expr);
- DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref});
return insertInstr(BuildMI(getMF(), getDL(),
getTII().get(TargetOpcode::DBG_VALUE),
- /*IsIndirect*/ false, Reg, Variable, DIExpr));
+ /*IsIndirect*/ true, Reg, Variable, Expr));
}
MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
@@ -124,15 +120,11 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
- // DBG_VALUE insts now carry IR-level indirection in their DIExpression
- // rather than encoding it in the instruction itself.
- const DIExpression *DIExpr = cast<DIExpression>(Expr);
- DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref});
return buildInstr(TargetOpcode::DBG_VALUE)
.addFrameIndex(FI)
- .addReg(0)
+ .addImm(0)
.addMetadata(Variable)
- .addMetadata(DIExpr);
+ .addMetadata(Expr);
}
MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
@@ -156,7 +148,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
MIB.addReg(0U);
}
- return MIB.addReg(0).addMetadata(Variable).addMetadata(Expr);
+ return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
}
MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index 5870e20d4227..6e5593abb43e 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -524,6 +524,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
std::string Name = Globals[k]->getName();
+ GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
GlobalValue::DLLStorageClassTypes DLLStorage =
Globals[k]->getDLLStorageClass();
@@ -549,6 +550,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
Linkage, Name, GEP, &M);
+ GA->setVisibility(Visibility);
GA->setDLLStorageClass(DLLStorage);
}
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 2cc547a6b741..5b20a2482b7b 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -100,27 +100,28 @@ enum : unsigned { UndefLocNo = ~0U };
/// usage of the location.
class DbgValueLocation {
public:
- DbgValueLocation(unsigned LocNo)
- : LocNo(LocNo) {
+ DbgValueLocation(unsigned LocNo, bool WasIndirect)
+ : LocNo(LocNo), WasIndirect(WasIndirect) {
static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing");
assert(locNo() == LocNo && "location truncation");
}
- DbgValueLocation() : LocNo(0) {}
+ DbgValueLocation() : LocNo(0), WasIndirect(0) {}
unsigned locNo() const {
// Fix up the undef location number, which gets truncated.
return LocNo == INT_MAX ? UndefLocNo : LocNo;
}
+ bool wasIndirect() const { return WasIndirect; }
bool isUndef() const { return locNo() == UndefLocNo; }
DbgValueLocation changeLocNo(unsigned NewLocNo) const {
- return DbgValueLocation(NewLocNo);
+ return DbgValueLocation(NewLocNo, WasIndirect);
}
friend inline bool operator==(const DbgValueLocation &LHS,
const DbgValueLocation &RHS) {
- return LHS.LocNo == RHS.LocNo;
+ return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect;
}
friend inline bool operator!=(const DbgValueLocation &LHS,
@@ -129,7 +130,8 @@ public:
}
private:
- unsigned LocNo;
+ unsigned LocNo : 31;
+ unsigned WasIndirect : 1;
};
/// Map of where a user value is live, and its location.
@@ -166,6 +168,10 @@ class UserValue {
/// Map of slot indices where this value is live.
LocMap locInts;
+ /// Set of interval start indexes that have been trimmed to the
+ /// lexical scope.
+ SmallSet<SlotIndex, 2> trimmedDefs;
+
/// Insert a DBG_VALUE into MBB at Idx for LocNo.
void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled,
@@ -279,8 +285,8 @@ public:
void mapVirtRegs(LDVImpl *LDV);
/// Add a definition point to this value.
- void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
- DbgValueLocation Loc(getLocationNo(LocMO));
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
+ DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
// Add a singular (Idx,Idx) -> Loc mapping.
LocMap::iterator I = locInts.find(Idx);
if (!I.valid() || I.start() != Idx)
@@ -315,10 +321,11 @@ public:
///
/// \param LI Scan for copies of the value in LI->reg.
/// \param LocNo Location number of LI->reg.
+ /// \param WasIndirect Indicates if the original use of LI->reg was indirect
/// \param Kills Points where the range of LocNo could be extended.
/// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here.
void addDefsFromCopies(
- LiveInterval *LI, unsigned LocNo,
+ LiveInterval *LI, unsigned LocNo, bool WasIndirect,
const SmallVectorImpl<SlotIndex> &Kills,
SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS);
@@ -538,6 +545,8 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
OS << "undef";
else {
OS << I.value().locNo();
+ if (I.value().wasIndirect())
+ OS << " ind";
}
}
for (unsigned i = 0, e = locations.size(); i != e; ++i) {
@@ -646,18 +655,19 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
}
// Get or create the UserValue for (variable,offset) here.
- assert(!MI.getOperand(1).isImm() && "DBG_VALUE with indirect flag before "
- "LiveDebugVariables");
+ bool IsIndirect = MI.getOperand(1).isImm();
+ if (IsIndirect)
+ assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
const DILocalVariable *Var = MI.getDebugVariable();
const DIExpression *Expr = MI.getDebugExpression();
UserValue *UV =
getUserValue(Var, Expr, MI.getDebugLoc());
if (!Discard)
- UV->addDef(Idx, MI.getOperand(0));
+ UV->addDef(Idx, MI.getOperand(0), IsIndirect);
else {
MachineOperand MO = MachineOperand::CreateReg(0U, false);
MO.setIsDebug();
- UV->addDef(Idx, MO);
+ UV->addDef(Idx, MO, false);
}
return true;
}
@@ -765,7 +775,7 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
}
void UserValue::addDefsFromCopies(
- LiveInterval *LI, unsigned LocNo,
+ LiveInterval *LI, unsigned LocNo, bool WasIndirect,
const SmallVectorImpl<SlotIndex> &Kills,
SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS) {
@@ -829,7 +839,7 @@ void UserValue::addDefsFromCopies(
MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
- DbgValueLocation NewLoc(LocNo);
+ DbgValueLocation NewLoc(LocNo, WasIndirect);
I.insert(Idx, Idx.getNextSlot(), NewLoc);
NewDefs.push_back(std::make_pair(Idx, NewLoc));
break;
@@ -877,7 +887,8 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
// sub-register in that regclass). For now, simply skip handling copies if
// a sub-register is involved.
if (LI && !LocMO.getSubReg())
- addDefsFromCopies(LI, Loc.locNo(), Kills, Defs, MRI, LIS);
+ addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
+ LIS);
continue;
}
@@ -910,6 +921,11 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
SlotIndex RStart = LIS.getInstructionIndex(*Range.first);
SlotIndex REnd = LIS.getInstructionIndex(*Range.second);
+ // Variable locations at the first instruction of a block should be
+ // based on the block's SlotIndex, not the first instruction's index.
+ if (Range.first == Range.first->getParent()->begin())
+ RStart = LIS.getSlotIndexes()->getIndexBefore(*Range.first);
+
// At the start of each iteration I has been advanced so that
// I.stop() >= PrevEnd. Check for overlap.
if (PrevEnd && I.start() < PrevEnd) {
@@ -922,7 +938,8 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
++I;
// If the interval also overlaps the start of the "next" (i.e.
- // current) range create a new interval for the remainder
+ // current) range create a new interval for the remainder (which
+ // may be further trimmed).
if (RStart < IStop)
I.insert(RStart, IStop, Loc);
}
@@ -932,6 +949,13 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
if (!I.valid())
return;
+ if (I.start() < RStart) {
+ // Interval start overlaps range - trim to the scope range.
+ I.setStartUnchecked(RStart);
+ // Remember that this interval was trimmed.
+ trimmedDefs.insert(RStart);
+ }
+
// The end of a lexical scope range is the last instruction in the
// range. To convert to an interval we need the index of the
// instruction after it.
@@ -1306,14 +1330,21 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// that the original virtual register was a pointer. Also, add the stack slot
// offset for the spilled register to the expression.
const DIExpression *Expr = Expression;
- if (Spilled)
- Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, SpillOffset);
+ uint8_t DIExprFlags = DIExpression::ApplyOffset;
+ bool IsIndirect = Loc.wasIndirect();
+ if (Spilled) {
+ if (IsIndirect)
+ DIExprFlags |= DIExpression::DerefAfter;
+ Expr =
+ DIExpression::prepend(Expr, DIExprFlags, SpillOffset);
+ IsIndirect = true;
+ }
assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index");
do {
BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
- Spilled, MO, Variable, Expr);
+ IsIndirect, MO, Variable, Expr);
// Continue and insert DBG_VALUES after every redefinition of register
// associated with the debug value within the range
@@ -1345,6 +1376,12 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
bool Spilled = SpillIt != SpillOffsets.end();
unsigned SpillOffset = Spilled ? SpillIt->second : 0;
+ // If the interval start was trimmed to the lexical scope insert the
+ // DBG_VALUE at the previous index (otherwise it appears after the
+ // first instruction in the range).
+ if (trimmedDefs.count(Start))
+ Start = Start.getPrevIndex();
+
LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 2bec8613e79c..8294591b7326 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1393,11 +1393,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
"Expected inlined-at fields to agree");
// A dbg.declare describes the address of a source variable, so lower it
// into an indirect DBG_VALUE.
- auto *Expr = DI->getExpression();
- Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false,
- *Op, DI->getVariable(), Expr);
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
+ *Op, DI->getVariable(), DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
@@ -1421,19 +1419,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
- .addReg(0U)
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
- .addReg(0U)
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
- .addReg(0U)
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (unsigned Reg = lookUpRegForValue(V)) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c613c2540628..176d71643e1a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -677,7 +677,7 @@ MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- const DIExpression *Expr = SD->getExpression();
+ MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
@@ -701,11 +701,12 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
.addFrameIndex(SD->getFrameIx());
-
if (SD->isIndirect())
- Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
-
- FrameMI.addReg(0);
+ // Push [fi + 0] onto the DIExpression stack.
+ FrameMI.addImm(0);
+ else
+ // Push fi onto the DIExpression stack.
+ FrameMI.addReg(0);
return FrameMI.addMetadata(Var).addMetadata(Expr);
}
// Otherwise, we're going to create an instruction here.
@@ -751,9 +752,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// Indirect addressing is indicated by an Imm as the second parameter.
if (SD->isIndirect())
- Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
-
- MIB.addReg(0U, RegState::Debug);
+ MIB.addImm(0U);
+ else
+ MIB.addReg(0U, RegState::Debug);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 974914d00d05..d809139d3807 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4716,11 +4716,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
break;
case ISD::VECREDUCE_FMAX:
NeutralElem = DAG.getConstantFP(
- std::numeric_limits<double>::infinity(), dl, ElemVT);
+ -std::numeric_limits<double>::infinity(), dl, ElemVT);
break;
case ISD::VECREDUCE_FMIN:
NeutralElem = DAG.getConstantFP(
- -std::numeric_limits<double>::infinity(), dl, ElemVT);
+ std::numeric_limits<double>::infinity(), dl, ElemVT);
break;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 728d963a916f..421ff3e7d472 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5622,6 +5622,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+ bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
@@ -5643,6 +5644,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
+ IsIndirect = IsDbgDeclare;
}
}
@@ -5691,7 +5693,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
RegAndSize.first, Variable, *FragmentExpr));
}
};
@@ -5709,6 +5711,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
Op = MachineOperand::CreateReg(VMI->second, false);
+ IsIndirect = IsDbgDeclare;
} else if (ArgRegsAndSizes.size() > 1) {
// This was split due to the calling convention, and no virtual register
// mapping exists for the value.
@@ -5722,28 +5725,9 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
-
- // If the argument arrives in a stack slot, then what the IR thought was a
- // normal Value is actually in memory, and we must add a deref to load it.
- if (Op->isFI()) {
- int FI = Op->getIndex();
- unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI);
- if (Expr->isImplicit()) {
- SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
- Expr = DIExpression::prependOpcodes(Expr, Ops);
- } else {
- Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
- }
- }
-
- // If this location was specified with a dbg.declare, then it and its
- // expression calculate the address of the variable. Append a deref to
- // force it to be a memory location.
- if (IsDbgDeclare)
- Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
-
+ IsIndirect = (Op->isReg()) ? IsIndirect : true;
FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
*Op, Variable, Expr));
return true;
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 40bc36c3030b..9d4fdc6b624c 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -960,7 +960,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
// Remap all instructions to the new stack slots.
- std::vector<std::vector<MachineMemOperand *>> SSRefs(MFI->getObjectIndexEnd());
+ std::vector<std::vector<MachineMemOperand *>> SSRefs(
+ MFI->getObjectIndexEnd());
for (MachineBasicBlock &BB : *MF)
for (MachineInstr &I : BB) {
// Skip lifetime markers. We'll remove them soon.
@@ -1074,12 +1075,13 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
// Rewrite MachineMemOperands that reference old frame indices.
- for (auto E : enumerate(SSRefs)) {
- const PseudoSourceValue *NewSV =
- MF->getPSVManager().getFixedStack(SlotRemap[E.index()]);
- for (MachineMemOperand *Ref : E.value())
- Ref->setValue(NewSV);
- }
+ for (auto E : enumerate(SSRefs))
+ if (!E.value().empty()) {
+ const PseudoSourceValue *NewSV =
+ MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
+ for (MachineMemOperand *Ref : E.value())
+ Ref->setValue(NewSV);
+ }
// Update the location of C++ catch objects for the MSVC personality routine.
if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index 4522484222f5..e8b39c037693 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -847,8 +847,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
// Iterate through, and add to, a tree of operands and users in the use-def.
while (!WorkList.empty()) {
- Value *V = WorkList.back();
- WorkList.pop_back();
+ Value *V = WorkList.pop_back_val();
if (CurrentVisited.count(V))
continue;
@@ -917,7 +916,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
++ToPromote;
}
- // DAG optimisations should be able to handle these cases better, especially
+ // DAG optimizations should be able to handle these cases better, especially
// for function arguments.
if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
return false;
@@ -941,6 +940,9 @@ bool TypePromotion::runOnFunction(Function &F) {
if (!TPC)
return false;
+ AllVisited.clear();
+ SafeToPromote.clear();
+ SafeWrap.clear();
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
const TargetMachine &TM = TPC->getTM<TargetMachine>();
@@ -998,6 +1000,10 @@ bool TypePromotion::runOnFunction(Function &F) {
if (MadeChange)
LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n");
+ AllVisited.clear();
+ SafeToPromote.clear();
+ SafeWrap.clear();
+
return MadeChange;
}
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index f26835ff8a08..9c504da611e0 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -67,9 +67,11 @@ namespace orc {
class PartitioningIRMaterializationUnit : public IRMaterializationUnit {
public:
- PartitioningIRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM,
- VModuleKey K, CompileOnDemandLayer &Parent)
- : IRMaterializationUnit(ES, std::move(TSM), std::move(K)),
+ PartitioningIRMaterializationUnit(ExecutionSession &ES,
+ const ManglingOptions &MO,
+ ThreadSafeModule TSM, VModuleKey K,
+ CompileOnDemandLayer &Parent)
+ : IRMaterializationUnit(ES, MO, std::move(TSM), std::move(K)),
Parent(Parent) {}
PartitioningIRMaterializationUnit(
@@ -111,7 +113,8 @@ CompileOnDemandLayer::compileWholeModule(GlobalValueSet Requested) {
CompileOnDemandLayer::CompileOnDemandLayer(
ExecutionSession &ES, IRLayer &BaseLayer, LazyCallThroughManager &LCTMgr,
IndirectStubsManagerBuilder BuildIndirectStubsManager)
- : IRLayer(ES), BaseLayer(BaseLayer), LCTMgr(LCTMgr),
+ : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer),
+ LCTMgr(LCTMgr),
BuildIndirectStubsManager(std::move(BuildIndirectStubsManager)) {}
void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) {
@@ -136,27 +139,23 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R,
TSM.withModuleDo([&](Module &M) {
// First, do some cleanup on the module:
cleanUpModule(M);
-
- MangleAndInterner Mangle(ES, M.getDataLayout());
- for (auto &GV : M.global_values()) {
- if (GV.isDeclaration() || GV.hasLocalLinkage() ||
- GV.hasAppendingLinkage())
- continue;
-
- auto Name = Mangle(GV.getName());
- auto Flags = JITSymbolFlags::fromGlobalValue(GV);
- if (Flags.isCallable())
- Callables[Name] = SymbolAliasMapEntry(Name, Flags);
- else
- NonCallables[Name] = SymbolAliasMapEntry(Name, Flags);
- }
});
+ for (auto &KV : R.getSymbols()) {
+ auto &Name = KV.first;
+ auto &Flags = KV.second;
+ if (Flags.isCallable())
+ Callables[Name] = SymbolAliasMapEntry(Name, Flags);
+ else
+ NonCallables[Name] = SymbolAliasMapEntry(Name, Flags);
+ }
+
// Create a partitioning materialization unit and lodge it with the
// implementation dylib.
if (auto Err = PDR.getImplDylib().define(
std::make_unique<PartitioningIRMaterializationUnit>(
- ES, std::move(TSM), R.getVModuleKey(), *this))) {
+ ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(),
+ *this))) {
ES.reportError(std::move(Err));
R.failMaterialization();
return;
@@ -316,7 +315,7 @@ void CompileOnDemandLayer::emitPartition(
}
R.replace(std::make_unique<PartitioningIRMaterializationUnit>(
- ES, std::move(TSM), R.getVModuleKey(), *this));
+ ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), *this));
BaseLayer.emit(std::move(R), std::move(*ExtractedTSM));
}
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
index f5671d90420a..160e5ba50311 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
@@ -24,11 +24,20 @@
namespace llvm {
namespace orc {
+IRMaterializationUnit::ManglingOptions
+irManglingOptionsFromTargetOptions(const TargetOptions &Opts) {
+ IRMaterializationUnit::ManglingOptions MO;
+
+ MO.EmulatedTLS = Opts.EmulatedTLS;
+
+ return MO;
+}
+
/// Compile a Module to an ObjectFile.
-SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
+Expected<SimpleCompiler::CompileResult> SimpleCompiler::operator()(Module &M) {
CompileResult CachedObject = tryToLoadFromObjectCache(M);
if (CachedObject)
- return CachedObject;
+ return std::move(CachedObject);
SmallVector<char, 0> ObjBufferSV;
@@ -38,7 +47,8 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
legacy::PassManager PM;
MCContext *Ctx;
if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
- llvm_unreachable("Target does not support MC emission.");
+ return make_error<StringError>("Target does not support MC emission",
+ inconvertibleErrorCode());
PM.run(M);
}
@@ -47,14 +57,11 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
- if (Obj) {
- notifyObjectCompiled(M, *ObjBuffer);
- return std::move(ObjBuffer);
- }
+ if (!Obj)
+ return Obj.takeError();
- // TODO: Actually report errors helpfully.
- consumeError(Obj.takeError());
- return nullptr;
+ notifyObjectCompiled(M, *ObjBuffer);
+ return std::move(ObjBuffer);
}
SimpleCompiler::CompileResult
@@ -73,9 +80,11 @@ void SimpleCompiler::notifyObjectCompiled(const Module &M,
ConcurrentIRCompiler::ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
ObjectCache *ObjCache)
- : JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
+ : IRCompiler(irManglingOptionsFromTargetOptions(JTMB.getOptions())),
+ JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
-std::unique_ptr<MemoryBuffer> ConcurrentIRCompiler::operator()(Module &M) {
+Expected<std::unique_ptr<MemoryBuffer>>
+ConcurrentIRCompiler::operator()(Module &M) {
auto TM = cantFail(JTMB.createTargetMachine());
SimpleCompiler C(*TM, ObjCache);
return C(M);
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 63ef889dae46..ec706cf63d35 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -468,15 +468,19 @@ Error MaterializationResponsibility::notifyEmitted() {
}
Error MaterializationResponsibility::defineMaterializing(
- const SymbolFlagsMap &NewSymbolFlags) {
- // Add the given symbols to this responsibility object.
- // It's ok if we hit a duplicate here: In that case the new version will be
- // discarded, and the JITDylib::defineMaterializing method will return a
- // duplicate symbol error.
- for (auto &KV : NewSymbolFlags)
- SymbolFlags.insert(KV);
+ SymbolFlagsMap NewSymbolFlags) {
- return JD.defineMaterializing(NewSymbolFlags);
+ LLVM_DEBUG({
+ dbgs() << "In " << JD.getName() << " defining materializing symbols "
+ << NewSymbolFlags << "\n";
+ });
+ if (auto AcceptedDefs = JD.defineMaterializing(std::move(NewSymbolFlags))) {
+ // Add all newly accepted symbols to this responsibility object.
+ for (auto &KV : *AcceptedDefs)
+ SymbolFlags.insert(KV);
+ return Error::success();
+ } else
+ return AcceptedDefs.takeError();
}
void MaterializationResponsibility::failMaterialization() {
@@ -809,31 +813,52 @@ void JITDylib::removeGenerator(DefinitionGenerator &G) {
});
}
-Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
- return ES.runSessionLocked([&]() -> Error {
+Expected<SymbolFlagsMap>
+JITDylib::defineMaterializing(SymbolFlagsMap SymbolFlags) {
+
+ return ES.runSessionLocked([&]() -> Expected<SymbolFlagsMap> {
std::vector<SymbolTable::iterator> AddedSyms;
+ std::vector<SymbolFlagsMap::iterator> RejectedWeakDefs;
- for (auto &KV : SymbolFlags) {
- SymbolTable::iterator EntryItr;
- bool Added;
+ for (auto SFItr = SymbolFlags.begin(), SFEnd = SymbolFlags.end();
+ SFItr != SFEnd; ++SFItr) {
- std::tie(EntryItr, Added) =
- Symbols.insert(std::make_pair(KV.first, SymbolTableEntry(KV.second)));
+ auto &Name = SFItr->first;
+ auto &Flags = SFItr->second;
- if (Added) {
- AddedSyms.push_back(EntryItr);
- EntryItr->second.setState(SymbolState::Materializing);
- } else {
- // Remove any symbols already added.
- for (auto &SI : AddedSyms)
- Symbols.erase(SI);
+ auto EntryItr = Symbols.find(Name);
- // FIXME: Return all duplicates.
- return make_error<DuplicateDefinition>(*KV.first);
- }
+ // If the entry already exists...
+ if (EntryItr != Symbols.end()) {
+
+ // If this is a strong definition then error out.
+ if (!Flags.isWeak()) {
+ // Remove any symbols already added.
+ for (auto &SI : AddedSyms)
+ Symbols.erase(SI);
+
+ // FIXME: Return all duplicates.
+ return make_error<DuplicateDefinition>(*Name);
+ }
+
+ // Otherwise just make a note to discard this symbol after the loop.
+ RejectedWeakDefs.push_back(SFItr);
+ continue;
+ } else
+ EntryItr =
+ Symbols.insert(std::make_pair(Name, SymbolTableEntry(Flags))).first;
+
+ AddedSyms.push_back(EntryItr);
+ EntryItr->second.setState(SymbolState::Materializing);
}
- return Error::success();
+ // Remove any rejected weak definitions from the SymbolFlags map.
+ while (!RejectedWeakDefs.empty()) {
+ SymbolFlags.erase(RejectedWeakDefs.back());
+ RejectedWeakDefs.pop_back();
+ }
+
+ return SymbolFlags;
});
}
diff --git a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
index d311f34179c7..023940dc8298 100644
--- a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
@@ -11,9 +11,14 @@
namespace llvm {
namespace orc {
+IRCompileLayer::IRCompiler::~IRCompiler() {}
+
IRCompileLayer::IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
- CompileFunction Compile)
- : IRLayer(ES), BaseLayer(BaseLayer), Compile(std::move(Compile)) {}
+ std::unique_ptr<IRCompiler> Compile)
+ : IRLayer(ES, ManglingOpts), BaseLayer(BaseLayer),
+ Compile(std::move(Compile)) {
+ ManglingOpts = &this->Compile->getManglingOptions();
+}
void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) {
std::lock_guard<std::mutex> Lock(IRLayerMutex);
@@ -24,7 +29,7 @@ void IRCompileLayer::emit(MaterializationResponsibility R,
ThreadSafeModule TSM) {
assert(TSM && "Module must not be null");
- if (auto Obj = TSM.withModuleDo(Compile)) {
+ if (auto Obj = TSM.withModuleDo(*Compile)) {
{
std::lock_guard<std::mutex> Lock(IRLayerMutex);
if (NotifyCompiled)
diff --git a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
index 845ecc71eb87..511248f83b25 100644
--- a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
@@ -12,10 +12,10 @@
namespace llvm {
namespace orc {
-IRTransformLayer::IRTransformLayer(ExecutionSession &ES,
- IRLayer &BaseLayer,
- TransformFunction Transform)
- : IRLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+IRTransformLayer::IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer,
+ TransformFunction Transform)
+ : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer),
+ Transform(std::move(Transform)) {}
void IRTransformLayer::emit(MaterializationResponsibility R,
ThreadSafeModule TSM) {
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 54473ab46423..6189056b3d9f 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -96,8 +96,10 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
auto ObjLinkingLayer =
std::make_unique<RTDyldObjectLinkingLayer>(ES, std::move(GetMemMgr));
- if (S.JTMB->getTargetTriple().isOSBinFormatCOFF())
+ if (S.JTMB->getTargetTriple().isOSBinFormatCOFF()) {
ObjLinkingLayer->setOverrideObjectFlagsWithResponsibilityFlags(true);
+ ObjLinkingLayer->setAutoClaimResponsibilityForObjectSymbols(true);
+ }
// FIXME: Explicit conversion to std::unique_ptr<ObjectLayer> added to silence
// errors from some GCC / libstdc++ bots. Remove this conversion (i.e.
@@ -105,7 +107,7 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
return std::unique_ptr<ObjectLayer>(std::move(ObjLinkingLayer));
}
-Expected<IRCompileLayer::CompileFunction>
+Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>
LLJIT::createCompileFunction(LLJITBuilderState &S,
JITTargetMachineBuilder JTMB) {
@@ -116,13 +118,13 @@ LLJIT::createCompileFunction(LLJITBuilderState &S,
// Otherwise default to creating a SimpleCompiler, or ConcurrentIRCompiler,
// depending on the number of threads requested.
if (S.NumCompileThreads > 0)
- return ConcurrentIRCompiler(std::move(JTMB));
+ return std::make_unique<ConcurrentIRCompiler>(std::move(JTMB));
auto TM = JTMB.createTargetMachine();
if (!TM)
return TM.takeError();
- return TMOwningSimpleCompiler(std::move(*TM));
+ return std::make_unique<TMOwningSimpleCompiler>(std::move(*TM));
}
LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
index 580e2682ec8c..ebc7801f11ff 100644
--- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/Layer.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
@@ -15,15 +16,15 @@
namespace llvm {
namespace orc {
-IRLayer::IRLayer(ExecutionSession &ES) : ES(ES) {}
IRLayer::~IRLayer() {}
Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) {
return JD.define(std::make_unique<BasicIRLayerMaterializationUnit>(
- *this, std::move(K), std::move(TSM)));
+ *this, *getManglingOptions(), std::move(TSM), std::move(K)));
}
IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES,
+ const ManglingOptions &MO,
ThreadSafeModule TSM, VModuleKey K)
: MaterializationUnit(SymbolFlagsMap(), std::move(K)), TSM(std::move(TSM)) {
@@ -32,12 +33,44 @@ IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES,
MangleAndInterner Mangle(ES, this->TSM.getModuleUnlocked()->getDataLayout());
this->TSM.withModuleDo([&](Module &M) {
for (auto &G : M.global_values()) {
- if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() &&
- !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) {
- auto MangledName = Mangle(G.getName());
- SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G);
- SymbolToDefinition[MangledName] = &G;
+ // Skip globals that don't generate symbols.
+ if (!G.hasName() || G.isDeclaration() || G.hasLocalLinkage() ||
+ G.hasAvailableExternallyLinkage() || G.hasAppendingLinkage())
+ continue;
+
+ // thread locals generate different symbols depending on whether or not
+ // emulated TLS is enabled.
+ if (G.isThreadLocal() && MO.EmulatedTLS) {
+ auto &GV = cast<GlobalVariable>(G);
+
+ auto Flags = JITSymbolFlags::fromGlobalValue(GV);
+
+ auto EmuTLSV = Mangle(("__emutls_v." + GV.getName()).str());
+ SymbolFlags[EmuTLSV] = Flags;
+ SymbolToDefinition[EmuTLSV] = &GV;
+
+ // If this GV has a non-zero initializer we'll need to emit an
+ // __emutls.t symbol too.
+ if (GV.hasInitializer()) {
+ const auto *InitVal = GV.getInitializer();
+
+ // Skip zero-initializers.
+ if (isa<ConstantAggregateZero>(InitVal))
+ continue;
+ const auto *InitIntValue = dyn_cast<ConstantInt>(InitVal);
+ if (InitIntValue && InitIntValue->isZero())
+ continue;
+
+ auto EmuTLST = Mangle(("__emutls_t." + GV.getName()).str());
+ SymbolFlags[EmuTLST] = Flags;
+ }
+ continue;
}
+
+ // Otherwise we just need a normal linker mangling.
+ auto MangledName = Mangle(G.getName());
+ SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G);
+ SymbolToDefinition[MangledName] = &G;
}
});
}
@@ -72,8 +105,8 @@ void IRMaterializationUnit::discard(const JITDylib &JD,
}
BasicIRLayerMaterializationUnit::BasicIRLayerMaterializationUnit(
- IRLayer &L, VModuleKey K, ThreadSafeModule TSM)
- : IRMaterializationUnit(L.getExecutionSession(), std::move(TSM),
+ IRLayer &L, const ManglingOptions &MO, ThreadSafeModule TSM, VModuleKey K)
+ : IRMaterializationUnit(L.getExecutionSession(), MO, std::move(TSM),
std::move(K)),
L(L), K(std::move(K)) {}
diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index a92264c0be14..ff8289a264c8 100644
--- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/Object/COFF.h"
namespace {
@@ -160,6 +161,39 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
std::set<StringRef> &InternalSymbols) {
SymbolFlagsMap ExtraSymbolsToClaim;
SymbolMap Symbols;
+
+ // Hack to support COFF constant pool comdats introduced during compilation:
+ // (See http://llvm.org/PR40074)
+ if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(&Obj)) {
+ auto &ES = getExecutionSession();
+
+ // For all resolved symbols that are not already in the responsibilty set:
+ // check whether the symbol is in a comdat section and if so mark it as
+ // weak.
+ for (auto &Sym : COFFObj->symbols()) {
+ if (Sym.getFlags() & object::BasicSymbolRef::SF_Undefined)
+ continue;
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+ auto I = Resolved.find(*Name);
+
+ // Skip unresolved symbols, internal symbols, and symbols that are
+ // already in the responsibility set.
+ if (I == Resolved.end() || InternalSymbols.count(*Name) ||
+ R.getSymbols().count(ES.intern(*Name)))
+ continue;
+ auto Sec = Sym.getSection();
+ if (!Sec)
+ return Sec.takeError();
+ if (*Sec == COFFObj->section_end())
+ continue;
+ auto &COFFSec = *COFFObj->getCOFFSection(**Sec);
+ if (COFFSec.Characteristics & COFF::IMAGE_SCN_LNK_COMDAT)
+ I->second.setFlags(I->second.getFlags() | JITSymbolFlags::Weak);
+ }
+ }
+
for (auto &KV : Resolved) {
// Scan the symbols and add them to the Symbols map for resolution.
@@ -184,10 +218,17 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
Symbols[InternedName] = JITEvaluatedSymbol(KV.second.getAddress(), Flags);
}
- if (!ExtraSymbolsToClaim.empty())
+ if (!ExtraSymbolsToClaim.empty()) {
if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim))
return Err;
+ // If we claimed responsibility for any weak symbols but were rejected then
+ // we need to remove them from the resolved set.
+ for (auto &KV : ExtraSymbolsToClaim)
+ if (KV.second.isWeak() && !R.getSymbols().count(KV.first))
+ Symbols.erase(KV.first);
+ }
+
if (auto Err = R.notifyResolved(Symbols)) {
R.failMaterialization();
return Err;
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index acf0e4afef27..1f978d136049 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2651,8 +2651,10 @@ void AssemblyWriter::printModule(const Module *M) {
printUseLists(nullptr);
// Output all of the functions.
- for (const Function &F : *M)
+ for (const Function &F : *M) {
+ Out << '\n';
printFunction(&F);
+ }
assert(UseListOrders.empty() && "All use-lists should have been consumed");
// Output all attribute groups.
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index e13656ed1c10..af934cc8b9be 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -1277,11 +1277,17 @@ Error IRLinker::linkModuleFlagsMetadata() {
}
// Diagnose inconsistent merge behavior types.
- if (SrcBehaviorValue != DstBehaviorValue)
- return stringErr("linking module flags '" + ID->getString() +
- "': IDs have conflicting behaviors in '" +
- SrcM->getModuleIdentifier() + "' and '" +
- DstM.getModuleIdentifier() + "'");
+ if (SrcBehaviorValue != DstBehaviorValue) {
+ bool MaxAndWarn = (SrcBehaviorValue == Module::Max &&
+ DstBehaviorValue == Module::Warning) ||
+ (DstBehaviorValue == Module::Max &&
+ SrcBehaviorValue == Module::Warning);
+ if (!MaxAndWarn)
+ return stringErr("linking module flags '" + ID->getString() +
+ "': IDs have conflicting behaviors in '" +
+ SrcM->getModuleIdentifier() + "' and '" +
+ DstM.getModuleIdentifier() + "'");
+ }
auto replaceDstValue = [&](MDNode *New) {
Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
@@ -1290,6 +1296,40 @@ Error IRLinker::linkModuleFlagsMetadata() {
Flags[ID].first = Flag;
};
+ // Emit a warning if the values differ and either source or destination
+ // request Warning behavior.
+ if ((DstBehaviorValue == Module::Warning ||
+ SrcBehaviorValue == Module::Warning) &&
+ SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+ std::string Str;
+ raw_string_ostream(Str)
+ << "linking module flags '" << ID->getString()
+ << "': IDs have conflicting values ('" << *SrcOp->getOperand(2)
+ << "' from " << SrcM->getModuleIdentifier() << " with '"
+ << *DstOp->getOperand(2) << "' from " << DstM.getModuleIdentifier()
+ << ')';
+ emitWarning(Str);
+ }
+
+ // Choose the maximum if either source or destination request Max behavior.
+ if (DstBehaviorValue == Module::Max || SrcBehaviorValue == Module::Max) {
+ ConstantInt *DstValue =
+ mdconst::extract<ConstantInt>(DstOp->getOperand(2));
+ ConstantInt *SrcValue =
+ mdconst::extract<ConstantInt>(SrcOp->getOperand(2));
+
+ // The resulting flag should have a Max behavior, and contain the maximum
+ // value from between the source and destination values.
+ Metadata *FlagOps[] = {
+ (DstBehaviorValue != Module::Max ? SrcOp : DstOp)->getOperand(0), ID,
+ (SrcValue->getZExtValue() > DstValue->getZExtValue() ? SrcOp : DstOp)
+ ->getOperand(2)};
+ MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps);
+ DstModFlags->setOperand(DstIndex, Flag);
+ Flags[ID].first = Flag;
+ continue;
+ }
+
// Perform the merge for standard behavior types.
switch (SrcBehaviorValue) {
case Module::Require:
@@ -1305,26 +1345,9 @@ Error IRLinker::linkModuleFlagsMetadata() {
continue;
}
case Module::Warning: {
- // Emit a warning if the values differ.
- if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
- std::string str;
- raw_string_ostream(str)
- << "linking module flags '" << ID->getString()
- << "': IDs have conflicting values ('" << *SrcOp->getOperand(2)
- << "' from " << SrcM->getModuleIdentifier() << " with '"
- << *DstOp->getOperand(2) << "' from " << DstM.getModuleIdentifier()
- << ')';
- emitWarning(str);
- }
- continue;
+ break;
}
case Module::Max: {
- ConstantInt *DstValue =
- mdconst::extract<ConstantInt>(DstOp->getOperand(2));
- ConstantInt *SrcValue =
- mdconst::extract<ConstantInt>(SrcOp->getOperand(2));
- if (SrcValue->getZExtValue() > DstValue->getZExtValue())
- overrideDstValue();
break;
}
case Module::Append: {
@@ -1350,6 +1373,7 @@ Error IRLinker::linkModuleFlagsMetadata() {
break;
}
}
+
}
// Check all of the requirements.
diff --git a/llvm/lib/Support/CRC.cpp b/llvm/lib/Support/CRC.cpp
index a3dba1a3aa10..2bc668beed32 100644
--- a/llvm/lib/Support/CRC.cpp
+++ b/llvm/lib/Support/CRC.cpp
@@ -85,7 +85,15 @@ uint32_t llvm::crc32(uint32_t CRC, ArrayRef<uint8_t> Data) {
#include <zlib.h>
uint32_t llvm::crc32(uint32_t CRC, ArrayRef<uint8_t> Data) {
- return ::crc32(CRC, (const Bytef *)Data.data(), Data.size());
+ // Zlib's crc32() only takes a 32-bit length, so we have to iterate for larger
+ // sizes. One could use crc32_z() instead, but that's a recent (2017) addition
+ // and may not be available on all systems.
+ do {
+ ArrayRef<uint8_t> Slice = Data.take_front(UINT32_MAX);
+ CRC = ::crc32(CRC, (const Bytef *)Slice.data(), (uInt)Slice.size());
+ Data = Data.drop_front(Slice.size());
+ } while (Data.size() > 0);
+ return CRC;
}
#endif
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index b9031f52375c..356835609830 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -14,9 +14,6 @@
#include "llvm/Support/ThreadLocal.h"
#include <mutex>
#include <setjmp.h>
-#ifdef _WIN32
-#include <excpt.h> // for GetExceptionInformation
-#endif
#if LLVM_ON_UNIX
#include <sysexits.h> // EX_IOERR
#endif
@@ -41,11 +38,11 @@ struct CrashRecoveryContextImpl {
::jmp_buf JumpBuffer;
volatile unsigned Failed : 1;
unsigned SwitchedThread : 1;
+ unsigned ValidJumpBuffer : 1;
public:
- CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
- Failed(false),
- SwitchedThread(false) {
+ CrashRecoveryContextImpl(CrashRecoveryContext *CRC) noexcept
+ : CRC(CRC), Failed(false), SwitchedThread(false), ValidJumpBuffer(false) {
Next = CurrentContext->get();
CurrentContext->set(this);
}
@@ -80,10 +77,13 @@ public:
CRC->RetCode = RetCode;
// Jump back to the RunSafely we were called under.
- longjmp(JumpBuffer, 1);
+ if (ValidJumpBuffer)
+ longjmp(JumpBuffer, 1);
+
+ // Otherwise let the caller decide of the outcome of the crash. Currently
+ // this occurs when using SEH on Windows with MSVC or clang-cl.
}
};
-
}
static ManagedStatic<std::mutex> gCrashRecoveryContextMutex;
@@ -175,6 +175,9 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
}
#if defined(_MSC_VER)
+
+#include <windows.h> // for GetExceptionInformation
+
// If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way
// better than VEH. Vectored exception handling catches all exceptions happening
// on the thread with installed exception handlers, so it can interfere with
@@ -188,30 +191,45 @@ static void uninstallExceptionOrSignalHandlers() {}
// We need this function because the call to GetExceptionInformation() can only
// occur inside the __except evaluation block
-static int ExceptionFilter(bool DumpStackAndCleanup,
- _EXCEPTION_POINTERS *Except) {
- if (DumpStackAndCleanup)
- sys::CleanupOnSignal((uintptr_t)Except);
- return EXCEPTION_EXECUTE_HANDLER;
-}
+static int ExceptionFilter(_EXCEPTION_POINTERS *Except) {
+ // Lookup the current thread local recovery object.
+ const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
-static bool InvokeFunctionCall(function_ref<void()> Fn,
- bool DumpStackAndCleanup, int &RetCode) {
- __try {
- Fn();
- } __except (ExceptionFilter(DumpStackAndCleanup, GetExceptionInformation())) {
- RetCode = GetExceptionCode();
- return false;
+ if (!CRCI) {
+ // Something has gone horribly wrong, so let's just tell everyone
+ // to keep searching
+ CrashRecoveryContext::Disable();
+ return EXCEPTION_CONTINUE_SEARCH;
}
- return true;
+
+ int RetCode = (int)Except->ExceptionRecord->ExceptionCode;
+ if ((RetCode & 0xF0000000) == 0xE0000000)
+ RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit
+
+ // Handle the crash
+ const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(
+ RetCode, reinterpret_cast<uintptr_t>(Except));
+
+ return EXCEPTION_EXECUTE_HANDLER;
}
+#if defined(__clang__) && defined(_M_IX86)
+// Work around PR44697.
+__attribute__((optnone))
+#endif
bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
if (!gCrashRecoveryEnabled) {
Fn();
return true;
}
- return InvokeFunctionCall(Fn, DumpStackAndCleanupOnFailure, RetCode);
+ assert(!Impl && "Crash recovery context already initialized!");
+ Impl = new CrashRecoveryContextImpl(this);
+ __try {
+ Fn();
+ } __except (ExceptionFilter(GetExceptionInformation())) {
+ return false;
+ }
+ return true;
}
#else // !_MSC_VER
@@ -264,10 +282,13 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
// TODO: We can capture the stack backtrace here and store it on the
// implementation if we so choose.
+ int RetCode = (int)ExceptionInfo->ExceptionRecord->ExceptionCode;
+ if ((RetCode & 0xF0000000) == 0xE0000000)
+ RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit
+
// Handle the crash
const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(
- (int)ExceptionInfo->ExceptionRecord->ExceptionCode,
- reinterpret_cast<uintptr_t>(ExceptionInfo));
+ RetCode, reinterpret_cast<uintptr_t>(ExceptionInfo));
// Note that we don't actually get here because HandleCrash calls
// longjmp, which means the HandleCrash function never returns.
@@ -388,6 +409,7 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
Impl = CRCI;
+ CRCI->ValidJumpBuffer = true;
if (setjmp(CRCI->JumpBuffer) != 0) {
return false;
}
@@ -399,12 +421,19 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
#endif // !_MSC_VER
-void CrashRecoveryContext::HandleCrash() {
- CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+LLVM_ATTRIBUTE_NORETURN
+void CrashRecoveryContext::HandleExit(int RetCode) {
+#if defined(_WIN32)
+ // SEH and VEH
+ ::RaiseException(0xE0000000 | RetCode, 0, 0, NULL);
+#else
+ // On Unix we don't need to raise an exception, we go directly to
+ // HandleCrash(), then longjmp will unwind the stack for us.
+ CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *)Impl;
assert(CRCI && "Crash recovery context never initialized!");
- // As per convention, -2 indicates a crash or timeout as opposed to failure to
- // execute (see llvm/include/llvm/Support/Program.h)
- CRCI->HandleCrash(-2, 0);
+ CRCI->HandleCrash(RetCode, 0 /*no sig num*/);
+#endif
+ llvm_unreachable("Most likely setjmp wasn't called!");
}
// FIXME: Portability.
diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp
index 0f13f7a536f1..a9463024c420 100644
--- a/llvm/lib/Support/ErrorHandling.cpp
+++ b/llvm/lib/Support/ErrorHandling.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/WindowsError.h"
@@ -122,7 +123,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
// files registered with RemoveFileOnSignal.
sys::RunInterruptHandlers();
- exit(1);
+ sys::Process::Exit(1);
}
void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler,
diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp
index 5b6471008159..509512f643d3 100644
--- a/llvm/lib/Support/Process.cpp
+++ b/llvm/lib/Support/Process.cpp
@@ -13,8 +13,9 @@
#include "llvm/Support/Process.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/llvm-config.h"
#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
@@ -88,6 +89,13 @@ static bool coreFilesPrevented = !LLVM_ENABLE_CRASH_DUMPS;
bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; }
+LLVM_ATTRIBUTE_NORETURN
+void Process::Exit(int RetCode) {
+ if (CrashRecoveryContext *CRC = CrashRecoveryContext::GetCurrent())
+ CRC->HandleExit(RetCode);
+ ::exit(RetCode);
+}
+
// Include the platform-specific parts of this class.
#ifdef LLVM_ON_UNIX
#include "Unix/Process.inc"
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index 8b525f1bd4ac..09e19ae41f1a 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -820,7 +820,13 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
<< "\n";
}
- LocalPrintStackTrace(llvm::errs(), ep ? ep->ContextRecord : nullptr);
+ // Stack unwinding appears to modify the context. Copy it to preserve the
+ // caller's context.
+ CONTEXT ContextCopy;
+ if (ep)
+ memcpy(&ContextCopy, ep->ContextRecord, sizeof(ContextCopy));
+
+ LocalPrintStackTrace(llvm::errs(), ep ? &ContextCopy : nullptr);
return EXCEPTION_EXECUTE_HANDLER;
}
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b8953583a310..6da089d1859a 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1000,6 +1000,26 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
break;
+ case AArch64::HINT: {
+ // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
+ // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
+ // non-empty. If MI is the initial BTI, place the
+ // __patchable_function_entries label after BTI.
+ if (CurrentPatchableFunctionEntrySym &&
+ CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
+ MI == &MF->front().front()) {
+ int64_t Imm = MI->getOperand(0).getImm();
+ if ((Imm & 32) && (Imm & 6)) {
+ MCInst Inst;
+ MCInstLowering.Lower(MI, Inst);
+ EmitToStreamer(*OutStreamer, Inst);
+ CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
+ OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
+ return;
+ }
+ }
+ break;
+ }
case AArch64::MOVMCSym: {
Register DestReg = MI->getOperand(0).getReg();
const MachineOperand &MO_Sym = MI->getOperand(1);
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index bc91d628f0b4..cbca29b63b70 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -66,6 +66,10 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
cl::Hidden);
+// Enable register renaming to find additional store pairing opportunities.
+static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
+ cl::init(false), cl::Hidden);
+
#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
namespace {
@@ -1446,6 +1450,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
Optional<bool> MaybeCanRename = None;
+ if (!EnableRenaming)
+ MaybeCanRename = {false};
+
SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
LiveRegUnits UsedInBetween;
UsedInBetween.init(*TRI);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 6f4569a49783..131219ca6944 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -183,7 +183,21 @@ public:
bool &AllowPromotionWithoutCommonHeader);
bool shouldExpandReduction(const IntrinsicInst *II) const {
- return false;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ // We don't have legalization support for ordered FP reductions.
+ return !II->getFastMathFlags().allowReassoc();
+
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ // Lowering asserts that there are no NaNs.
+ return !II->getFastMathFlags().noNaNs();
+
+ default:
+ // Don't expand anything else, let legalization deal with it.
+ return false;
+ }
}
unsigned getGISelRematGlobalCost() const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index fbed51de0ea4..a55a1747cafe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -156,9 +156,6 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowPass(PassRegistry &);
extern char &SILowerControlFlowID;
-void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
-extern char &SIRemoveShortExecBranchesID;
-
void initializeSIInsertSkipsPass(PassRegistry &);
extern char &SIInsertSkipsPassID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index eb30d659bf0b..c8dc6f6e3bf4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -228,7 +228,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
- initializeSIRemoveShortExecBranchesPass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
@@ -994,7 +993,6 @@ void GCNPassConfig::addPreEmitPass() {
// be better for it to emit S_NOP <N> when possible.
addPass(&PostRAHazardRecognizerID);
- addPass(&SIRemoveShortExecBranchesID);
addPass(&SIInsertSkipsPassID);
addPass(&BranchRelaxationPassID);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 191f603a66d6..01bb60f07f2e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -34,6 +34,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -117,24 +118,58 @@ static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA,
return true;
}
+static void removeDoneExport(Function &F) {
+ ConstantInt *BoolFalse = ConstantInt::getFalse(F.getContext());
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (IntrinsicInst *Intrin = llvm::dyn_cast<IntrinsicInst>(&I)) {
+ if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp) {
+ Intrin->setArgOperand(6, BoolFalse); // done
+ } else if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp_compr) {
+ Intrin->setArgOperand(4, BoolFalse); // done
+ }
+ }
+ }
+ }
+}
+
static BasicBlock *unifyReturnBlockSet(Function &F,
ArrayRef<BasicBlock *> ReturningBlocks,
+ bool InsertExport,
const TargetTransformInfo &TTI,
StringRef Name) {
// Otherwise, we need to insert a new basic block into the function, add a PHI
// nodes (if the function returns values), and convert all of the return
// instructions into unconditional branches.
BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
+ IRBuilder<> B(NewRetBlock);
+
+ if (InsertExport) {
+ // Ensure that there's only one "done" export in the shader by removing the
+ // "done" bit set on the original final export. More than one "done" export
+ // can lead to undefined behavior.
+ removeDoneExport(F);
+
+ Value *Undef = UndefValue::get(B.getFloatTy());
+ B.CreateIntrinsic(Intrinsic::amdgcn_exp, { B.getFloatTy() },
+ {
+ B.getInt32(9), // target, SQ_EXP_NULL
+ B.getInt32(0), // enabled channels
+ Undef, Undef, Undef, Undef, // values
+ B.getTrue(), // done
+ B.getTrue(), // valid mask
+ });
+ }
PHINode *PN = nullptr;
if (F.getReturnType()->isVoidTy()) {
- ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
+ B.CreateRetVoid();
} else {
// If the function doesn't return void... add a PHI node to the block...
- PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
- "UnifiedRetVal");
- NewRetBlock->getInstList().push_back(PN);
- ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+ PN = B.CreatePHI(F.getReturnType(), ReturningBlocks.size(),
+ "UnifiedRetVal");
+ assert(!InsertExport);
+ B.CreateRet(PN);
}
// Loop over all of the blocks, replacing the return instruction with an
@@ -173,6 +208,8 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
// Dummy return block for infinite loop.
BasicBlock *DummyReturnBB = nullptr;
+ bool InsertExport = false;
+
for (BasicBlock *BB : PDT.getRoots()) {
if (isa<ReturnInst>(BB->getTerminator())) {
if (!isUniformlyReached(DA, *BB))
@@ -188,6 +225,36 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
"DummyReturnBlock", &F);
Type *RetTy = F.getReturnType();
Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
+
+ // For pixel shaders, the producer guarantees that an export is
+ // executed before each return instruction. However, if there is an
+ // infinite loop and we insert a return ourselves, we need to uphold
+ // that guarantee by inserting a null export. This can happen e.g. in
+ // an infinite loop with kill instructions, which is supposed to
+ // terminate. However, we don't need to do this if there is a non-void
+ // return value, since then there is an epilog afterwards which will
+ // still export.
+ //
+ // Note: In the case where only some threads enter the infinite loop,
+ // this can result in the null export happening redundantly after the
+ // original exports. However, The last "real" export happens after all
+ // the threads that didn't enter an infinite loop converged, which
+ // means that the only extra threads to execute the null export are
+ // threads that entered the infinite loop, and they only could've
+ // exited through being killed which sets their exec bit to 0.
+ // Therefore, unless there's an actual infinite loop, which can have
+ // invalid results, or there's a kill after the last export, which we
+ // assume the frontend won't do, this export will have the same exec
+ // mask as the last "real" export, and therefore the valid mask will be
+ // overwritten with the same value and will still be correct. Also,
+ // even though this forces an extra unnecessary export wait, we assume
+ // that this happens rare enough in practice to that we don't have to
+ // worry about performance.
+ if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
+ RetTy->isVoidTy()) {
+ InsertExport = true;
+ }
+
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
ReturningBlocks.push_back(DummyReturnBB);
}
@@ -260,6 +327,6 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
const TargetTransformInfo &TTI
= getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- unifyReturnBlockSet(F, ReturningBlocks, TTI, "UnifiedReturnBlock");
+ unifyReturnBlockSet(F, ReturningBlocks, InsertExport, TTI, "UnifiedReturnBlock");
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index 1a526675164a..e2978624811d 100644
--- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -50,6 +50,8 @@ def COS_cm : COS_Common<0x8E>;
def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
+def : SqrtPat<RECIPSQRT_IEEE_cm, RECIP_IEEE_cm>;
+
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
@@ -70,8 +72,6 @@ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
-def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
-
class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
CF_MEM_RAT_CACHELESS <0x14, 0, mask,
(ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 792e26d21f98..88e554ae0bcc 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -118,11 +118,12 @@ def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
+def : SqrtPat<RECIPSQRT_IEEE_eg, RECIP_IEEE_eg>;
+
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
-def : EGPat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
} // End SubtargetPredicate = isEG
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index cbdf0de44f87..869c183e2245 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1233,6 +1233,11 @@ def : R600Pat<
def : RcpPat<recip_ieee, f32>;
}
+class SqrtPat<Instruction RsqInst, Instruction RecipInst> : R600Pat <
+ (fsqrt f32:$src),
+ (RecipInst (RsqInst $src))
+>;
+
//===----------------------------------------------------------------------===//
// R600 / R700 Instructions
//===----------------------------------------------------------------------===//
@@ -1272,8 +1277,8 @@ let Predicates = [isR600] in {
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
- def : R600Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
+ def : SqrtPat<RECIPSQRT_IEEE_r600, RECIP_IEEE_r600>;
def R600_ExportSwz : ExportSwzInst {
let Word1{20-17} = 0; // BURST_COUNT
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 80c044ec00cb..87e63fcc4a04 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -41,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"
static cl::opt<unsigned> SkipThresholdFlag(
- "amdgpu-skip-threshold-legacy",
+ "amdgpu-skip-threshold",
cl::desc("Number of instructions before jumping over divergent control flow"),
cl::init(12), cl::Hidden);
@@ -466,9 +466,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
- case AMDGPU::S_CBRANCH_EXECZ:
- ExecBranchStack.push_back(MI.getOperand(0).getMBB());
- break;
case AMDGPU::SI_MASK_BRANCH:
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
MadeChange |= skipMaskBranch(MI, MBB);
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 61d2719a3aad..bf052dc3c930 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
.addReg(Tmp, RegState::Kill);
- // Insert the S_CBRANCH_EXECZ instruction which will be optimized later
- // during SIRemoveShortExecBranches.
- MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ // Insert a pseudo terminator to help keep the verifier happy. This will also
+ // be used later when inserting skips.
+ MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
.add(MI.getOperand(2));
if (!LIS) {
@@ -323,8 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
.addReg(DstReg);
MachineInstr *Branch =
- BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
- .addMBB(DestBB);
+ BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+ .addMBB(DestBB);
if (!LIS) {
MI.eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
deleted file mode 100644
index 51779e97ac62..000000000000
--- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass optmizes the s_cbranch_execz instructions.
-/// The pass removes this skip instruction for short branches,
-/// if there is no unwanted sideeffect in the fallthrough code sequence.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-remove-short-exec-branches"
-
-static unsigned SkipThreshold;
-
-static cl::opt<unsigned, true> SkipThresholdFlag(
- "amdgpu-skip-threshold", cl::Hidden,
- cl::desc(
- "Number of instructions before jumping over divergent control flow"),
- cl::location(SkipThreshold), cl::init(12));
-
-namespace {
-
-class SIRemoveShortExecBranches : public MachineFunctionPass {
-private:
- const SIInstrInfo *TII = nullptr;
- bool getBlockDestinations(MachineBasicBlock &SrcMBB,
- MachineBasicBlock *&TrueMBB,
- MachineBasicBlock *&FalseMBB,
- SmallVectorImpl<MachineOperand> &Cond);
- bool mustRetainExeczBranch(const MachineBasicBlock &From,
- const MachineBasicBlock &To) const;
- bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
-
-public:
- static char ID;
-
- SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
- initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
- "SI remove short exec branches", false, false)
-
-char SIRemoveShortExecBranches::ID = 0;
-
-char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
-
-bool SIRemoveShortExecBranches::getBlockDestinations(
- MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
- MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
- if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
- return false;
-
- if (!FalseMBB)
- FalseMBB = SrcMBB.getNextNode();
-
- return true;
-}
-
-bool SIRemoveShortExecBranches::mustRetainExeczBranch(
- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
- unsigned NumInstr = 0;
- const MachineFunction *MF = From.getParent();
-
- for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
- MBBI != End && MBBI != ToI; ++MBBI) {
- const MachineBasicBlock &MBB = *MBBI;
-
- for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- // When a uniform loop is inside non-uniform control flow, the branch
- // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
- // when EXEC = 0. We should skip the loop lest it becomes infinite.
- if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
- I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
- return true;
-
- if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
- return true;
-
- // These instructions are potentially expensive even if EXEC = 0.
- if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
- I->getOpcode() == AMDGPU::S_WAITCNT)
- return true;
-
- ++NumInstr;
- if (NumInstr >= SkipThreshold)
- return true;
- }
- }
-
- return false;
-}
-
-// Returns true if the skip branch instruction is removed.
-bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
- MachineBasicBlock &SrcMBB) {
- MachineBasicBlock *TrueMBB = nullptr;
- MachineBasicBlock *FalseMBB = nullptr;
- SmallVector<MachineOperand, 1> Cond;
-
- if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
- return false;
-
- // Consider only the forward branches.
- if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
- mustRetainExeczBranch(*FalseMBB, *TrueMBB))
- return false;
-
- LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
- MI.eraseFromParent();
- SrcMBB.removeSuccessor(TrueMBB);
-
- return true;
-}
-
-bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- TII = ST.getInstrInfo();
- MF.RenumberBlocks();
- bool Changed = false;
-
- for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
- if (MBBI == MBB.end())
- continue;
-
- MachineInstr &MI = *MBBI;
- switch (MI.getOpcode()) {
- case AMDGPU::S_CBRANCH_EXECZ:
- Changed = removeExeczBranch(MI, MBB);
- break;
- default:
- break;
- }
- }
-
- return Changed;
-}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5271bc3aacc6..8b21b9346987 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -559,7 +559,7 @@ bool isReadOnlySegment(const GlobalValue *GV) {
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
- return TT.getOS() == Triple::AMDPAL;
+ return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600;
}
int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 634fb89b8e89..66ad120a111f 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -330,8 +330,8 @@ void ARMConstantIslands::verify() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
- BBInfoVector &BBInfo = BBUtils->getBBInfo();
LLVM_DEBUG({
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
const BasicBlockInfo &BBI = BBInfo[J];
dbgs() << format("%08x %bb.%u\t", BBI.Offset, J)
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2c3ac816219f..de4377ec5a47 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1952,24 +1952,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
- case ARM::LOADDUAL:
- case ARM::STOREDUAL: {
- Register PairReg = MI.getOperand(0).getReg();
-
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
- .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
- Opcode == ARM::LOADDUAL ? RegState::Define : 0)
- .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
- Opcode == ARM::LOADDUAL ? RegState::Define : 0);
- for (unsigned i = 1; i < MI.getNumOperands(); i++)
- MIB.add(MI.getOperand(i));
- MIB.add(predOps(ARMCC::AL));
- MIB.cloneMemRefs(MI);
- MI.eraseFromParent();
- return true;
- }
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 76a9ac12062d..9b06987178d8 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -145,8 +145,6 @@ public:
// Thumb 2 Addressing Modes:
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
- template <unsigned Shift>
- bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
@@ -1296,33 +1294,6 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
return true;
}
-template <unsigned Shift>
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
- SDValue &OffImm) {
- if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
- int RHSC;
- if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(
- FI, TLI->getPointerTy(CurDAG->getDataLayout()));
- }
-
- if (N.getOpcode() == ISD::SUB)
- RHSC = -RHSC;
- OffImm =
- CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
- return true;
- }
- }
-
- // Base only.
- Base = N;
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
- return true;
-}
-
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R - imm8 operands.
@@ -3515,26 +3486,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->RemoveDeadNode(N);
return;
}
- case ARMISD::LDRD: {
- if (Subtarget->isThumb2())
- break; // TableGen handles isel in this case.
- SDValue Base, RegOffset, ImmOffset;
- const SDValue &Chain = N->getOperand(0);
- const SDValue &Addr = N->getOperand(1);
- SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
- SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
- SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
- {MVT::Untyped, MVT::Other}, Ops);
- SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
- SDValue(New, 0));
- SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
- SDValue(New, 0));
- ReplaceUses(SDValue(N, 0), Lo);
- ReplaceUses(SDValue(N, 1), Hi);
- ReplaceUses(SDValue(N, 2), SDValue(New, 1));
- CurDAG->RemoveDeadNode(N);
- return;
- }
case ARMISD::LOOP_DEC: {
SDValue Ops[] = { N->getOperand(1),
N->getOperand(2),
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index cf738cd66434..1e6f7d889201 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1073,8 +1073,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
- setOperationAction(ISD::LOAD, MVT::i64, Custom);
- setOperationAction(ISD::STORE, MVT::i64, Custom);
// MVE lowers 64 bit shifts to lsll and lsrl
// assuming that ISD::SRL and SRA of i64 are already marked custom
@@ -1598,9 +1596,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
- case ARMISD::LDRD: return "ARMISD::LDRD";
- case ARMISD::STRD: return "ARMISD::STRD";
-
case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
@@ -9088,24 +9083,6 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
}
-void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- EVT MemVT = LD->getMemoryVT();
- assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
-
- if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
- !Subtarget->isThumb1Only() && LD->isVolatile()) {
- SDLoc dl(N);
- SDValue Result = DAG.getMemIntrinsicNode(
- ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
- {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
- SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
- Result.getValue(0), Result.getValue(1));
- Results.append({Pair, Result.getValue(2)});
- }
-}
-
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT();
@@ -9135,34 +9112,6 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
ST->getMemOperand());
}
-static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
- StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
- EVT MemVT = ST->getMemoryVT();
- assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
-
- if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
- !Subtarget->isThumb1Only() && ST->isVolatile()) {
- SDNode *N = Op.getNode();
- SDLoc dl(N);
-
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
- DAG.getTargetConstant(0, dl, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
- DAG.getTargetConstant(1, dl, MVT::i32));
-
- return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
- {ST->getChain(), Lo, Hi, ST->getBasePtr()},
- MemVT, ST->getMemOperand());
- } else if (Subtarget->hasMVEIntegerOps() &&
- ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
- MemVT == MVT::v16i1))) {
- return LowerPredicateStore(Op, DAG);
- }
-
- return SDValue();
-}
-
static bool isZeroVector(SDValue N) {
return (ISD::isBuildVectorAllZeros(N.getNode()) ||
(N->getOpcode() == ARMISD::VMOVIMM &&
@@ -9350,7 +9299,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::LOAD:
return LowerPredicateLoad(Op, DAG);
case ISD::STORE:
- return LowerSTORE(Op, DAG, Subtarget);
+ return LowerPredicateStore(Op, DAG);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
case ISD::ATOMIC_LOAD:
@@ -9452,9 +9401,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ABS:
lowerABS(N, Results, DAG);
return ;
- case ISD::LOAD:
- LowerLOAD(N, Results, DAG);
- break;
+
}
if (Res.getNode())
Results.push_back(Res);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 1baa22a4fa56..cc74e5d875d8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -278,11 +278,7 @@ class VectorType;
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD,
-
- // Load/Store of dual registers
- LDRD,
- STRD
+ VST4LN_UPD
};
} // end namespace ARMISD
@@ -735,8 +731,6 @@ class VectorType;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
- void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index ce67af6f1b49..3efe85a7d45c 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -243,12 +243,6 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
-def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-
-def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
// Vector operations shared between NEON and MVE
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
@@ -2701,14 +2695,6 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
Requires<[IsARM, HasV5TE]>;
}
-let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
-def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr),
- 64, IIC_iLoad_d_r, []>,
- Requires<[IsARM, HasV5TE]> {
- let AM = AddrMode3;
-}
-}
-
def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "lda", "\t$Rt, $addr", []>;
def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
@@ -2984,19 +2970,6 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
}
}
-let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
-def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
- 64, IIC_iStore_d_r, []>,
- Requires<[IsARM, HasV5TE]> {
- let AM = AddrMode3;
-}
-}
-
-let Predicates = [IsARM, HasV5TE] in {
-def : Pat<(ARMstrd GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
- (STOREDUAL (REG_SEQUENCE GPRPair, GPR:$Rt, gsub_0, GPR:$Rt2, gsub_1), addrmode3:$addr)>;
-}
-
// Indexed stores
multiclass AI2_stridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 4193e8147f47..c5aae235f25d 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -270,8 +270,7 @@ def t2am_imm8_offset : MemOperand,
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-class T2AddrMode_Imm8s4 : MemOperand,
- ComplexPattern<i32, 2, "SelectT2AddrModeImm8<2>", []> {
+class T2AddrMode_Imm8s4 : MemOperand {
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8s4";
let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -1449,8 +1448,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "",
- [(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>,
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1631,8 +1629,7 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
- IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "",
- [(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>,
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
Sched<[WriteST]>;
// Indexed stores
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 880588adfdfd..f66083eaf187 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -171,7 +171,26 @@ public:
TTI::ReductionFlags Flags) const;
bool shouldExpandReduction(const IntrinsicInst *II) const {
- return false;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ // We don't have legalization support for ordered FP reductions.
+ if (!II->getFastMathFlags().allowReassoc())
+ return true;
+ // Can't legalize reductions with soft floats.
+ return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
+
+ case Intrinsic::experimental_vector_reduce_fmin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ // Can't legalize reductions with soft floats, and NoNan will create
+ // fminimum which we do not know how to lower.
+ return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() ||
+ !II->getFastMathFlags().noNaNs();
+
+ default:
+ // Don't expand anything else, let legalization deal with it.
+ return false;
+ }
}
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index b81bf4e1320d..cbae4675cb14 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -110,6 +110,19 @@ private:
return true;
}
+ // Prevent reducing load width during SelectionDag phase.
+ // Otherwise, we may transform the following
+ // ctx = ctx + reloc_offset
+ // ... (*(u32 *)ctx) & 0x8000...
+ // to
+ // ctx = ctx + reloc_offset
+ // ... (*(u8 *)(ctx + 1)) & 0x80 ...
+ // which will be rejected by the verifier.
+ bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
+ EVT NewVT) const override {
+ return false;
+ }
+
unsigned EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, unsigned Reg,
bool isSigned) const;
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 5310f0f07b65..29abc9303a62 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -70,9 +70,10 @@ private:
public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
- if (!skipFunction(MF.getFunction())) {
- initialize(MF);
- }
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ initialize(MF);
return removeLD();
}
};
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 53562f42a184..c7efdf42a7c6 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -195,12 +195,13 @@ public:
Parser.addAliasForDirective(".dword", ".8byte");
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
- if (Options.ABIName.back() == 'f' &&
+ auto ABIName = StringRef(Options.ABIName);
+ if (ABIName.endswith("f") &&
!getSTI().getFeatureBits()[RISCV::FeatureStdExtF]) {
errs() << "Hard-float 'f' ABI can't be used for a target that "
"doesn't support the F instruction set extension (ignoring "
"target-abi)\n";
- } else if (Options.ABIName.back() == 'd' &&
+ } else if (ABIName.endswith("d") &&
!getSTI().getFeatureBits()[RISCV::FeatureStdExtD]) {
errs() << "Hard-float 'd' ABI can't be used for a target that "
"doesn't support the D instruction set extension (ignoring "
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 82afa13aece3..770e883221d1 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -92,10 +92,13 @@ include "RISCVSystemOperands.td"
// Registers, calling conventions, instruction descriptions.
//===----------------------------------------------------------------------===//
+include "RISCVSchedule.td"
include "RISCVRegisterInfo.td"
include "RISCVCallingConv.td"
include "RISCVInstrInfo.td"
include "RISCVRegisterBanks.td"
+include "RISCVSchedRocket32.td"
+include "RISCVSchedRocket64.td"
//===----------------------------------------------------------------------===//
// RISC-V processors supported.
@@ -106,6 +109,12 @@ def : ProcessorModel<"generic-rv32", NoSchedModel, [FeatureRVCHints]>;
def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit,
FeatureRVCHints]>;
+def : ProcessorModel<"rocket-rv32", Rocket32Model, [FeatureRVCHints]>;
+
+def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit,
+ FeatureRVCHints]>;
+
+
//===----------------------------------------------------------------------===//
// Define the RISC-V target.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 7229ebfe1db0..3ed10cca5377 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -103,7 +103,8 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
// Pseudo instructions
class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string argstr = "">
- : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo> {
+ : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo>,
+ Sched<[]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 8e9ad4965583..81f1abe8337e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -298,7 +298,8 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class BranchCC_rri<bits<3> funct3, string opcodestr>
: RVInstB<funct3, OPC_BRANCH, (outs),
(ins GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12),
- opcodestr, "$rs1, $rs2, $imm12"> {
+ opcodestr, "$rs1, $rs2, $imm12">,
+ Sched<[WriteJmp]> {
let isBranch = 1;
let isTerminator = 1;
}
@@ -320,13 +321,15 @@ class Store_rri<bits<3> funct3, string opcodestr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class ALU_ri<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12),
- opcodestr, "$rd, $rs1, $imm12">;
+ opcodestr, "$rd, $rs1, $imm12">,
+ Sched<[WriteIALU, ReadIALU]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class Shift_ri<bit arithshift, bits<3> funct3, string opcodestr>
: RVInstIShift<arithshift, funct3, OPC_OP_IMM, (outs GPR:$rd),
(ins GPR:$rs1, uimmlog2xlen:$shamt), opcodestr,
- "$rd, $rs1, $shamt">;
+ "$rd, $rs1, $shamt">,
+ Sched<[WriteShift, ReadShift]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
@@ -336,19 +339,20 @@ class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
class CSR_ir<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, GPR:$rs1),
- opcodestr, "$rd, $imm12, $rs1">;
+ opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR, ReadCSR]>;
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
class CSR_ii<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd),
(ins csr_sysreg:$imm12, uimm5:$rs1),
- opcodestr, "$rd, $imm12, $rs1">;
+ opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class ShiftW_ri<bit arithshift, bits<3> funct3, string opcodestr>
: RVInstIShiftW<arithshift, funct3, OPC_OP_IMM_32, (outs GPR:$rd),
(ins GPR:$rs1, uimm5:$shamt), opcodestr,
- "$rd, $rs1, $shamt">;
+ "$rd, $rs1, $shamt">,
+ Sched<[WriteShift32, ReadShift32]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class ALUW_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
@@ -367,19 +371,20 @@ class Priv<string opcodestr, bits<7> funct7>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20_lui:$imm20),
- "lui", "$rd, $imm20">;
+ "lui", "$rd, $imm20">, Sched<[WriteIALU]>;
def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20_auipc:$imm20),
- "auipc", "$rd, $imm20">;
+ "auipc", "$rd, $imm20">, Sched<[WriteIALU]>;
let isCall = 1 in
def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20),
- "jal", "$rd, $imm20">;
+ "jal", "$rd, $imm20">, Sched<[WriteJal]>;
let isCall = 1 in
def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd),
(ins GPR:$rs1, simm12:$imm12),
- "jalr", "$rd, ${imm12}(${rs1})">;
+ "jalr", "$rd, ${imm12}(${rs1})">,
+ Sched<[WriteJalr, ReadJalr]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
def BEQ : BranchCC_rri<0b000, "beq">;
@@ -389,15 +394,15 @@ def BGE : BranchCC_rri<0b101, "bge">;
def BLTU : BranchCC_rri<0b110, "bltu">;
def BGEU : BranchCC_rri<0b111, "bgeu">;
-def LB : Load_ri<0b000, "lb">;
-def LH : Load_ri<0b001, "lh">;
-def LW : Load_ri<0b010, "lw">;
-def LBU : Load_ri<0b100, "lbu">;
-def LHU : Load_ri<0b101, "lhu">;
+def LB : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>;
+def LH : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>;
+def LW : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>;
+def LBU : Load_ri<0b100, "lbu">, Sched<[WriteLDB, ReadMemBase]>;
+def LHU : Load_ri<0b101, "lhu">, Sched<[WriteLDH, ReadMemBase]>;
-def SB : Store_rri<0b000, "sb">;
-def SH : Store_rri<0b001, "sh">;
-def SW : Store_rri<0b010, "sw">;
+def SB : Store_rri<0b000, "sb">, Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+def SH : Store_rri<0b001, "sh">, Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+def SW : Store_rri<0b010, "sw">, Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
// ADDI isn't always rematerializable, but isReMaterializable will be used as
// a hint which is verified in isReallyTriviallyReMaterializable.
@@ -418,21 +423,21 @@ def SLLI : Shift_ri<0, 0b001, "slli">;
def SRLI : Shift_ri<0, 0b101, "srli">;
def SRAI : Shift_ri<1, 0b101, "srai">;
-def ADD : ALU_rr<0b0000000, 0b000, "add">;
-def SUB : ALU_rr<0b0100000, 0b000, "sub">;
-def SLL : ALU_rr<0b0000000, 0b001, "sll">;
-def SLT : ALU_rr<0b0000000, 0b010, "slt">;
-def SLTU : ALU_rr<0b0000000, 0b011, "sltu">;
-def XOR : ALU_rr<0b0000000, 0b100, "xor">;
-def SRL : ALU_rr<0b0000000, 0b101, "srl">;
-def SRA : ALU_rr<0b0100000, 0b101, "sra">;
-def OR : ALU_rr<0b0000000, 0b110, "or">;
-def AND : ALU_rr<0b0000000, 0b111, "and">;
+def ADD : ALU_rr<0b0000000, 0b000, "add">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SUB : ALU_rr<0b0100000, 0b000, "sub">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SLL : ALU_rr<0b0000000, 0b001, "sll">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SLT : ALU_rr<0b0000000, 0b010, "slt">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SLTU : ALU_rr<0b0000000, 0b011, "sltu">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def XOR : ALU_rr<0b0000000, 0b100, "xor">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SRL : ALU_rr<0b0000000, 0b101, "srl">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SRA : ALU_rr<0b0100000, 0b101, "sra">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def OR : ALU_rr<0b0000000, 0b110, "or">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def AND : ALU_rr<0b0000000, 0b111, "and">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs),
(ins fencearg:$pred, fencearg:$succ),
- "fence", "$pred, $succ"> {
+ "fence", "$pred, $succ">, Sched<[]> {
bits<4> pred;
bits<4> succ;
@@ -441,25 +446,26 @@ def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs),
let imm12 = {0b0000,pred,succ};
}
-def FENCE_TSO : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins), "fence.tso", ""> {
+def FENCE_TSO : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins), "fence.tso", "">, Sched<[]> {
let rs1 = 0;
let rd = 0;
let imm12 = {0b1000,0b0011,0b0011};
}
-def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", ""> {
+def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", "">, Sched<[]> {
let rs1 = 0;
let rd = 0;
let imm12 = 0;
}
-def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", ""> {
+def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", "">, Sched<[WriteJmp]> {
let rs1 = 0;
let rd = 0;
let imm12 = 0;
}
-def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> {
+def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", "">,
+ Sched<[]> {
let rs1 = 0;
let rd = 0;
let imm12 = 1;
@@ -468,7 +474,8 @@ def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> {
// This is a de facto standard (as set by GNU binutils) 32-bit unimplemented
// instruction (i.e., it should always trap, if your implementation has invalid
// instruction traps).
-def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", ""> {
+def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", "">,
+ Sched<[]> {
let rs1 = 0;
let rd = 0;
let imm12 = 0b110000000000;
@@ -486,24 +493,30 @@ def CSRRCI : CSR_ii<0b111, "csrrci">;
/// RV64I instructions
let Predicates = [IsRV64] in {
-def LWU : Load_ri<0b110, "lwu">;
-def LD : Load_ri<0b011, "ld">;
-def SD : Store_rri<0b011, "sd">;
+def LWU : Load_ri<0b110, "lwu">, Sched<[WriteLDWU, ReadMemBase]>;
+def LD : Load_ri<0b011, "ld">, Sched<[WriteLDD, ReadMemBase]>;
+def SD : Store_rri<0b011, "sd">, Sched<[WriteSTD, ReadStoreData, ReadMemBase]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def ADDIW : RVInstI<0b000, OPC_OP_IMM_32, (outs GPR:$rd),
(ins GPR:$rs1, simm12:$imm12),
- "addiw", "$rd, $rs1, $imm12">;
+ "addiw", "$rd, $rs1, $imm12">,
+ Sched<[WriteIALU32, ReadIALU32]>;
def SLLIW : ShiftW_ri<0, 0b001, "slliw">;
def SRLIW : ShiftW_ri<0, 0b101, "srliw">;
def SRAIW : ShiftW_ri<1, 0b101, "sraiw">;
-def ADDW : ALUW_rr<0b0000000, 0b000, "addw">;
-def SUBW : ALUW_rr<0b0100000, 0b000, "subw">;
-def SLLW : ALUW_rr<0b0000000, 0b001, "sllw">;
-def SRLW : ALUW_rr<0b0000000, 0b101, "srlw">;
-def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">;
+def ADDW : ALUW_rr<0b0000000, 0b000, "addw">,
+ Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SUBW : ALUW_rr<0b0100000, 0b000, "subw">,
+ Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SLLW : ALUW_rr<0b0000000, 0b001, "sllw">,
+ Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SRLW : ALUW_rr<0b0000000, 0b101, "srlw">,
+ Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">,
+ Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
} // Predicates = [IsRV64]
//===----------------------------------------------------------------------===//
@@ -511,26 +524,26 @@ def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">;
//===----------------------------------------------------------------------===//
let isBarrier = 1, isReturn = 1, isTerminator = 1 in {
-def URET : Priv<"uret", 0b0000000> {
+def URET : Priv<"uret", 0b0000000>, Sched<[]> {
let rd = 0;
let rs1 = 0;
let rs2 = 0b00010;
}
-def SRET : Priv<"sret", 0b0001000> {
+def SRET : Priv<"sret", 0b0001000>, Sched<[]> {
let rd = 0;
let rs1 = 0;
let rs2 = 0b00010;
}
-def MRET : Priv<"mret", 0b0011000> {
+def MRET : Priv<"mret", 0b0011000>, Sched<[]> {
let rd = 0;
let rs1 = 0;
let rs2 = 0b00010;
}
} // isBarrier = 1, isReturn = 1, isTerminator = 1
-def WFI : Priv<"wfi", 0b0001000> {
+def WFI : Priv<"wfi", 0b0001000>, Sched<[]> {
let rd = 0;
let rs1 = 0;
let rs2 = 0b00101;
@@ -539,7 +552,7 @@ def WFI : Priv<"wfi", 0b0001000> {
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs),
(ins GPR:$rs1, GPR:$rs2),
- "sfence.vma", "$rs1, $rs2"> {
+ "sfence.vma", "$rs1, $rs2">, Sched<[]> {
let rd = 0;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 7321f4bd9d2f..de73c8df9367 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -77,31 +77,51 @@ multiclass AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy> {
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtA] in {
-defm LR_W : LR_r_aq_rl<0b010, "lr.w">;
-defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">;
-defm AMOSWAP_W : AMO_rr_aq_rl<0b00001, 0b010, "amoswap.w">;
-defm AMOADD_W : AMO_rr_aq_rl<0b00000, 0b010, "amoadd.w">;
-defm AMOXOR_W : AMO_rr_aq_rl<0b00100, 0b010, "amoxor.w">;
-defm AMOAND_W : AMO_rr_aq_rl<0b01100, 0b010, "amoand.w">;
-defm AMOOR_W : AMO_rr_aq_rl<0b01000, 0b010, "amoor.w">;
-defm AMOMIN_W : AMO_rr_aq_rl<0b10000, 0b010, "amomin.w">;
-defm AMOMAX_W : AMO_rr_aq_rl<0b10100, 0b010, "amomax.w">;
-defm AMOMINU_W : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">;
-defm AMOMAXU_W : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">;
+defm LR_W : LR_r_aq_rl<0b010, "lr.w">, Sched<[WriteAtomicLDW, ReadAtomicLDW]>;
+defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">,
+ Sched<[WriteAtomicSTW, ReadAtomicSTW, ReadAtomicSTW]>;
+defm AMOSWAP_W : AMO_rr_aq_rl<0b00001, 0b010, "amoswap.w">,
+ Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOADD_W : AMO_rr_aq_rl<0b00000, 0b010, "amoadd.w">,
+ Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOXOR_W : AMO_rr_aq_rl<0b00100, 0b010, "amoxor.w">,
+ Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOAND_W : AMO_rr_aq_rl<0b01100, 0b010, "amoand.w">,
+ Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOOR_W : AMO_rr_aq_rl<0b01000, 0b010, "amoor.w">,
+ Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOMIN_W : AMO_rr_aq_rl<0b10000, 0b010, "amomin.w">,
+ Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOMAX_W : AMO_rr_aq_rl<0b10100, 0b010, "amomax.w">,
+ Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOMINU_W : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">,
+ Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOMAXU_W : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">,
+ Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
} // Predicates = [HasStdExtA]
let Predicates = [HasStdExtA, IsRV64] in {
-defm LR_D : LR_r_aq_rl<0b011, "lr.d">;
-defm SC_D : AMO_rr_aq_rl<0b00011, 0b011, "sc.d">;
-defm AMOSWAP_D : AMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">;
-defm AMOADD_D : AMO_rr_aq_rl<0b00000, 0b011, "amoadd.d">;
-defm AMOXOR_D : AMO_rr_aq_rl<0b00100, 0b011, "amoxor.d">;
-defm AMOAND_D : AMO_rr_aq_rl<0b01100, 0b011, "amoand.d">;
-defm AMOOR_D : AMO_rr_aq_rl<0b01000, 0b011, "amoor.d">;
-defm AMOMIN_D : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">;
-defm AMOMAX_D : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">;
-defm AMOMINU_D : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">;
-defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">;
+defm LR_D : LR_r_aq_rl<0b011, "lr.d">, Sched<[WriteAtomicLDD, ReadAtomicLDD]>;
+defm SC_D : AMO_rr_aq_rl<0b00011, 0b011, "sc.d">,
+ Sched<[WriteAtomicSTD, ReadAtomicSTD, ReadAtomicSTD]>;
+defm AMOSWAP_D : AMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">,
+ Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOADD_D : AMO_rr_aq_rl<0b00000, 0b011, "amoadd.d">,
+ Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOXOR_D : AMO_rr_aq_rl<0b00100, 0b011, "amoxor.d">,
+ Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOAND_D : AMO_rr_aq_rl<0b01100, 0b011, "amoand.d">,
+ Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOOR_D : AMO_rr_aq_rl<0b01000, 0b011, "amoor.d">,
+ Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOMIN_D : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">,
+ Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOMAX_D : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">,
+ Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOMINU_D : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">,
+ Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">,
+ Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
} // Predicates = [HasStdExtA, IsRV64]
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index fa0050f107b2..f68767847ade 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -282,7 +282,8 @@ let Predicates = [HasStdExtC] in {
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [X2] in
def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
(ins SP:$rs1, uimm10_lsb00nonzero:$imm),
- "c.addi4spn", "$rd, $rs1, $imm"> {
+ "c.addi4spn", "$rd, $rs1, $imm">,
+ Sched<[WriteIALU, ReadIALU]> {
bits<5> rs1;
let Inst{12-11} = imm{5-4};
let Inst{10-7} = imm{9-6};
@@ -291,13 +292,15 @@ def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
}
let Predicates = [HasStdExtC, HasStdExtD] in
-def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000> {
+def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>,
+ Sched<[WriteFLD64, ReadMemBase]> {
bits<8> imm;
let Inst{12-10} = imm{5-3};
let Inst{6-5} = imm{7-6};
}
-def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> {
+def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>,
+ Sched<[WriteLDW, ReadMemBase]> {
bits<7> imm;
let Inst{12-10} = imm{5-3};
let Inst{6} = imm{2};
@@ -306,7 +309,8 @@ def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> {
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00> {
+def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
+ Sched<[WriteFLD32, ReadMemBase]> {
bits<7> imm;
let Inst{12-10} = imm{5-3};
let Inst{6} = imm{2};
@@ -314,20 +318,23 @@ def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00> {
}
let Predicates = [HasStdExtC, IsRV64] in
-def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000> {
+def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>,
+ Sched<[WriteLDD, ReadMemBase]> {
bits<8> imm;
let Inst{12-10} = imm{5-3};
let Inst{6-5} = imm{7-6};
}
let Predicates = [HasStdExtC, HasStdExtD] in
-def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000> {
+def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>,
+ Sched<[WriteFST64, ReadStoreData, ReadMemBase]> {
bits<8> imm;
let Inst{12-10} = imm{5-3};
let Inst{6-5} = imm{7-6};
}
-def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> {
+def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
bits<7> imm;
let Inst{12-10} = imm{5-3};
let Inst{6} = imm{2};
@@ -336,7 +343,8 @@ def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> {
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00> {
+def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>,
+ Sched<[WriteFST32, ReadStoreData, ReadMemBase]> {
bits<7> imm;
let Inst{12-10} = imm{5-3};
let Inst{6} = imm{2};
@@ -344,14 +352,16 @@ def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00> {
}
let Predicates = [HasStdExtC, IsRV64] in
-def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> {
+def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>,
+ Sched<[WriteSTD, ReadStoreData, ReadMemBase]> {
bits<8> imm;
let Inst{12-10} = imm{5-3};
let Inst{6-5} = imm{7-6};
}
let rd = 0, imm = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">
+def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">,
+ Sched<[WriteNop]>
{
let Inst{6-2} = 0;
}
@@ -359,7 +369,8 @@ def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
(ins GPRNoX0:$rd, simm6nonzero:$imm),
- "c.addi", "$rd, $imm"> {
+ "c.addi", "$rd, $imm">,
+ Sched<[WriteIALU, ReadIALU]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = imm{4-0};
}
@@ -367,7 +378,8 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_ADDI_NOP : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb),
(ins GPRX0:$rd, immzero:$imm),
- "c.addi", "$rd, $imm"> {
+ "c.addi", "$rd, $imm">,
+ Sched<[WriteIALU, ReadIALU]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = 0;
let isAsmParserOnly = 1;
@@ -377,27 +389,30 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1,
DecoderNamespace = "RISCV32Only_", Defs = [X1],
Predicates = [HasStdExtC, IsRV32] in
def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset),
- "c.jal", "$offset">;
+ "c.jal", "$offset">, Sched<[WriteJal]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
Predicates = [HasStdExtC, IsRV64] in
def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb),
(ins GPRNoX0:$rd, simm6:$imm),
- "c.addiw", "$rd, $imm"> {
+ "c.addiw", "$rd, $imm">,
+ Sched<[WriteIALU32, ReadIALU32]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = imm{4-0};
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm),
- "c.li", "$rd, $imm"> {
+ "c.li", "$rd, $imm">,
+ Sched<[WriteIALU]> {
let Inst{6-2} = imm{4-0};
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb),
(ins SP:$rd, simm10_lsb0000nonzero:$imm),
- "c.addi16sp", "$rd, $imm"> {
+ "c.addi16sp", "$rd, $imm">,
+ Sched<[WriteIALU, ReadIALU]> {
let Constraints = "$rd = $rd_wb";
let Inst{12} = imm{9};
let Inst{11-7} = 2;
@@ -410,78 +425,93 @@ def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb),
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd),
(ins c_lui_imm:$imm),
- "c.lui", "$rd, $imm"> {
+ "c.lui", "$rd, $imm">,
+ Sched<[WriteIALU]> {
let Inst{6-2} = imm{4-0};
}
-def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>;
-def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>;
+def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>,
+ Sched<[WriteShift, ReadShift]>;
+def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>,
+ Sched<[WriteShift, ReadShift]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rs1_wb), (ins GPRC:$rs1, simm6:$imm),
- "c.andi", "$rs1, $imm"> {
+ "c.andi", "$rs1, $imm">,
+ Sched<[WriteIALU, ReadIALU]> {
let Constraints = "$rs1 = $rs1_wb";
let Inst{12} = imm{5};
let Inst{11-10} = 0b10;
let Inst{6-2} = imm{4-0};
}
-def C_SUB : CS_ALU<0b100011, 0b00, "c.sub", GPRC>;
-def C_XOR : CS_ALU<0b100011, 0b01, "c.xor", GPRC>;
-def C_OR : CS_ALU<0b100011, 0b10, "c.or" , GPRC>;
-def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>;
+def C_SUB : CS_ALU<0b100011, 0b00, "c.sub", GPRC>,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def C_XOR : CS_ALU<0b100011, 0b01, "c.xor", GPRC>,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def C_OR : CS_ALU<0b100011, 0b10, "c.or" , GPRC>,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
let Predicates = [HasStdExtC, IsRV64] in {
-def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>;
-def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>;
+def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>,
+ Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>,
+ Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset),
- "c.j", "$offset"> {
+ "c.j", "$offset">, Sched<[WriteJmp]> {
let isBranch = 1;
let isTerminator=1;
let isBarrier=1;
}
-def C_BEQZ : Bcz<0b110, "c.beqz", seteq, GPRC>;
-def C_BNEZ : Bcz<0b111, "c.bnez", setne, GPRC>;
+def C_BEQZ : Bcz<0b110, "c.beqz", seteq, GPRC>, Sched<[WriteJmp]>;
+def C_BNEZ : Bcz<0b111, "c.bnez", setne, GPRC>, Sched<[WriteJmp]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb),
(ins GPRNoX0:$rd, uimmlog2xlennonzero:$imm),
- "c.slli" ,"$rd, $imm"> {
+ "c.slli" ,"$rd, $imm">,
+ Sched<[WriteShift, ReadShift]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = imm{4-0};
}
let Predicates = [HasStdExtC, HasStdExtD] in
-def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000> {
+def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>,
+ Sched<[WriteFLD64, ReadMemBase]> {
let Inst{6-5} = imm{4-3};
let Inst{4-2} = imm{8-6};
}
-def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00> {
+def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>,
+ Sched<[WriteLDW, ReadMemBase]> {
let Inst{6-4} = imm{4-2};
let Inst{3-2} = imm{7-6};
}
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00> {
+def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>,
+ Sched<[WriteFLD32, ReadMemBase]> {
let Inst{6-4} = imm{4-2};
let Inst{3-2} = imm{7-6};
}
let Predicates = [HasStdExtC, IsRV64] in
-def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000> {
+def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>,
+ Sched<[WriteLDD, ReadMemBase]> {
let Inst{6-5} = imm{4-3};
let Inst{4-2} = imm{8-6};
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1),
- "c.jr", "$rs1"> {
+ "c.jr", "$rs1">, Sched<[WriteJmpReg]> {
let isBranch = 1;
let isBarrier = 1;
let isTerminator = 1;
@@ -491,43 +521,49 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1),
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2),
- "c.mv", "$rs1, $rs2">;
+ "c.mv", "$rs1, $rs2">,
+ Sched<[WriteIALU, ReadIALU]>;
let rs1 = 0, rs2 = 0, hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
-def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">;
+def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">, Sched<[]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
isCall=1, Defs=[X1], rs2 = 0 in
def C_JALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPRNoX0:$rs1),
- "c.jalr", "$rs1">;
+ "c.jalr", "$rs1">, Sched<[WriteJalr, ReadJalr]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPRNoX0:$rs1_wb),
(ins GPRNoX0:$rs1, GPRNoX0:$rs2),
- "c.add", "$rs1, $rs2"> {
+ "c.add", "$rs1, $rs2">,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]> {
let Constraints = "$rs1 = $rs1_wb";
}
let Predicates = [HasStdExtC, HasStdExtD] in
-def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000> {
+def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>,
+ Sched<[WriteFST64, ReadStoreData, ReadMemBase]> {
let Inst{12-10} = imm{5-3};
let Inst{9-7} = imm{8-6};
}
-def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00> {
+def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
let Inst{12-9} = imm{5-2};
let Inst{8-7} = imm{7-6};
}
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00> {
+def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>,
+ Sched<[WriteFST32, ReadStoreData, ReadMemBase]> {
let Inst{12-9} = imm{5-2};
let Inst{8-7} = imm{7-6};
}
let Predicates = [HasStdExtC, IsRV64] in
-def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> {
+def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000>,
+ Sched<[WriteSTD, ReadStoreData, ReadMemBase]> {
let Inst{12-10} = imm{5-3};
let Inst{9-7} = imm{8-6};
}
@@ -535,7 +571,8 @@ def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> {
// The all zeros pattern isn't a valid RISC-V instruction. It's used by GNU
// binutils as 16-bit instruction known to be unimplemented (i.e., trapping).
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
-def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> {
+def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>,
+ Sched<[]> {
let Inst{15-0} = 0;
}
@@ -551,7 +588,7 @@ let Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0,
let rd = 0 in
def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm),
- "c.nop", "$imm"> {
+ "c.nop", "$imm">, Sched<[WriteNop]> {
let Inst{6-2} = imm{4-0};
let DecoderMethod = "decodeRVCInstrSImm";
}
@@ -559,7 +596,8 @@ def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm),
// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6.
def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb),
(ins GPRX0:$rd, simm6nonzero:$imm),
- "c.addi", "$rd, $imm"> {
+ "c.addi", "$rd, $imm">,
+ Sched<[WriteIALU, ReadIALU]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = imm{4-0};
let isAsmParserOnly = 1;
@@ -567,14 +605,16 @@ def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb),
def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
(ins GPRNoX0:$rd, immzero:$imm),
- "c.addi", "$rd, $imm"> {
+ "c.addi", "$rd, $imm">,
+ Sched<[WriteIALU, ReadIALU]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = 0;
let isAsmParserOnly = 1;
}
def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm),
- "c.li", "$rd, $imm"> {
+ "c.li", "$rd, $imm">,
+ Sched<[WriteIALU]> {
let Inst{6-2} = imm{4-0};
let Inst{11-7} = 0;
let DecoderMethod = "decodeRVCInstrRdSImm";
@@ -582,14 +622,15 @@ def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm),
def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd),
(ins c_lui_imm:$imm),
- "c.lui", "$rd, $imm"> {
+ "c.lui", "$rd, $imm">,
+ Sched<[WriteIALU]> {
let Inst{6-2} = imm{4-0};
let Inst{11-7} = 0;
let DecoderMethod = "decodeRVCInstrRdSImm";
}
def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2),
- "c.mv", "$rs1, $rs2">
+ "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]>
{
let Inst{11-7} = 0;
let DecoderMethod = "decodeRVCInstrRdRs2";
@@ -597,7 +638,8 @@ def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2),
def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb),
(ins GPRX0:$rs1, GPRNoX0:$rs2),
- "c.add", "$rs1, $rs2"> {
+ "c.add", "$rs1, $rs2">,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]> {
let Constraints = "$rs1 = $rs1_wb";
let Inst{11-7} = 0;
let DecoderMethod = "decodeRVCInstrRdRs1Rs2";
@@ -605,7 +647,8 @@ def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb),
def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb),
(ins GPRX0:$rd, uimmlog2xlennonzero:$imm),
- "c.slli" ,"$rd, $imm"> {
+ "c.slli" ,"$rd, $imm">,
+ Sched<[WriteShift, ReadShift]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = imm{4-0};
let Inst{11-7} = 0;
@@ -613,7 +656,8 @@ def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb),
}
def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd),
- "c.slli64" ,"$rd"> {
+ "c.slli64" ,"$rd">,
+ Sched<[WriteShift, ReadShift]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = 0;
let Inst{12} = 0;
@@ -621,7 +665,8 @@ def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd),
def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
(ins GPRC:$rd),
- "c.srli64", "$rd"> {
+ "c.srli64", "$rd">,
+ Sched<[WriteShift, ReadShift]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = 0;
let Inst{11-10} = 0;
@@ -630,7 +675,8 @@ def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
def C_SRAI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
(ins GPRC:$rd),
- "c.srai64", "$rd"> {
+ "c.srai64", "$rd">,
+ Sched<[WriteShift, ReadShift]> {
let Constraints = "$rd = $rd_wb";
let Inst{6-2} = 0;
let Inst{11-10} = 1;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index b5343e8a8309..4a036eb52bb8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -42,13 +42,15 @@ class FPFMADDynFrmAlias<FPFMAD_rrr_frm Inst, string OpcodeStr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUD_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR64:$rd),
- (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+ (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">,
+ Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUD_rr_frm<bits<7> funct7, string opcodestr>
: RVInstRFrm<funct7, OPC_OP_FP, (outs FPR64:$rd),
(ins FPR64:$rs1, FPR64:$rs2, frmarg:$funct3), opcodestr,
- "$rd, $rs1, $rs2, $funct3">;
+ "$rd, $rs1, $rs2, $funct3">,
+ Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr>
: InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
@@ -57,7 +59,8 @@ class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPCmpD_rr<bits<3> funct3, string opcodestr>
: RVInstR<0b1010001, funct3, OPC_OP_FP, (outs GPR:$rd),
- (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+ (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">,
+ Sched<[WriteFCmp64, ReadFCmp64, ReadFCmp64]>;
//===----------------------------------------------------------------------===//
// Instructions
@@ -68,7 +71,8 @@ let Predicates = [HasStdExtD] in {
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd),
(ins GPR:$rs1, simm12:$imm12),
- "fld", "$rd, ${imm12}(${rs1})">;
+ "fld", "$rd, ${imm12}(${rs1})">,
+ Sched<[WriteFLD64, ReadMemBase]>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
@@ -76,15 +80,20 @@ def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd),
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
def FSD : RVInstS<0b011, OPC_STORE_FP, (outs),
(ins FPR64:$rs2, GPR:$rs1, simm12:$imm12),
- "fsd", "$rs2, ${imm12}(${rs1})">;
+ "fsd", "$rs2, ${imm12}(${rs1})">,
+ Sched<[WriteFST64, ReadStoreData, ReadMemBase]>;
-def FMADD_D : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">;
+def FMADD_D : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">,
+ Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>;
def : FPFMADDynFrmAlias<FMADD_D, "fmadd.d">;
-def FMSUB_D : FPFMAD_rrr_frm<OPC_MSUB, "fmsub.d">;
+def FMSUB_D : FPFMAD_rrr_frm<OPC_MSUB, "fmsub.d">,
+ Sched<[WriteFMulSub64, ReadFMulSub64, ReadFMulSub64, ReadFMulSub64]>;
def : FPFMADDynFrmAlias<FMSUB_D, "fmsub.d">;
-def FNMSUB_D : FPFMAD_rrr_frm<OPC_NMSUB, "fnmsub.d">;
+def FNMSUB_D : FPFMAD_rrr_frm<OPC_NMSUB, "fnmsub.d">,
+ Sched<[WriteFMulSub64, ReadFMulSub64, ReadFMulSub64, ReadFMulSub64]>;
def : FPFMADDynFrmAlias<FNMSUB_D, "fnmsub.d">;
-def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">;
+def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">,
+ Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>;
def : FPFMADDynFrmAlias<FNMADD_D, "fnmadd.d">;
def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">;
@@ -96,7 +105,8 @@ def : FPALUDDynFrmAlias<FMUL_D, "fmul.d">;
def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">;
def : FPALUDDynFrmAlias<FDIV_D, "fdiv.d">;
-def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d"> {
+def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d">,
+ Sched<[WriteFSqrt32, ReadFSqrt32]> {
let rs2 = 0b00000;
}
def : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>;
@@ -107,12 +117,14 @@ def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">;
def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">;
def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">;
-def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d"> {
+def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d">,
+ Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]> {
let rs2 = 0b00001;
}
def : FPUnaryOpDynFrmAlias<FCVT_S_D, "fcvt.s.d", FPR32, FPR64>;
-def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s"> {
+def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s">,
+ Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]> {
let rs2 = 0b00000;
}
@@ -120,55 +132,66 @@ def FEQ_D : FPCmpD_rr<0b010, "feq.d">;
def FLT_D : FPCmpD_rr<0b001, "flt.d">;
def FLE_D : FPCmpD_rr<0b000, "fle.d">;
-def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d"> {
+def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d">,
+ Sched<[WriteFClass64, ReadFClass64]> {
let rs2 = 0b00000;
}
-def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d"> {
+def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d">,
+ Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> {
let rs2 = 0b00000;
}
def : FPUnaryOpDynFrmAlias<FCVT_W_D, "fcvt.w.d", GPR, FPR64>;
-def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d"> {
+def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d">,
+ Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> {
let rs2 = 0b00001;
}
def : FPUnaryOpDynFrmAlias<FCVT_WU_D, "fcvt.wu.d", GPR, FPR64>;
-def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w"> {
+def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w">,
+ Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> {
let rs2 = 0b00000;
}
-def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu"> {
+def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu">,
+ Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> {
let rs2 = 0b00001;
}
} // Predicates = [HasStdExtD]
let Predicates = [HasStdExtD, IsRV64] in {
-def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d"> {
+def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d">,
+ Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> {
let rs2 = 0b00010;
}
def : FPUnaryOpDynFrmAlias<FCVT_L_D, "fcvt.l.d", GPR, FPR64>;
-def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d"> {
+def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d">,
+ Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> {
let rs2 = 0b00011;
}
def : FPUnaryOpDynFrmAlias<FCVT_LU_D, "fcvt.lu.d", GPR, FPR64>;
-def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d"> {
+def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d">,
+ Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]> {
let rs2 = 0b00000;
}
-def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l"> {
+def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l">,
+ Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> {
let rs2 = 0b00010;
}
def : FPUnaryOpDynFrmAlias<FCVT_D_L, "fcvt.d.l", FPR64, GPR>;
-def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu"> {
+def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu">,
+ Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> {
let rs2 = 0b00011;
}
def : FPUnaryOpDynFrmAlias<FCVT_D_LU, "fcvt.d.lu", FPR64, GPR>;
-def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x"> {
+def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x">,
+ Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]> {
let rs2 = 0b00000;
}
} // Predicates = [HasStdExtD, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 3b73c865ea17..782c3f65af14 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -60,7 +60,8 @@ class FPFMASDynFrmAlias<FPFMAS_rrr_frm Inst, string OpcodeStr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUS_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR32:$rd),
- (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+ (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">,
+ Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUS_rr_frm<bits<7> funct7, string opcodestr>
@@ -93,7 +94,8 @@ class FPUnaryOpDynFrmAlias<FPUnaryOp_r_frm Inst, string OpcodeStr,
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPCmpS_rr<bits<3> funct3, string opcodestr>
: RVInstR<0b1010000, funct3, OPC_OP_FP, (outs GPR:$rd),
- (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+ (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">,
+ Sched<[WriteFCmp32, ReadFCmp32, ReadFCmp32]>;
//===----------------------------------------------------------------------===//
// Instructions
@@ -103,7 +105,8 @@ let Predicates = [HasStdExtF] in {
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd),
(ins GPR:$rs1, simm12:$imm12),
- "flw", "$rd, ${imm12}(${rs1})">;
+ "flw", "$rd, ${imm12}(${rs1})">,
+ Sched<[WriteFLD32, ReadMemBase]>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
@@ -111,27 +114,37 @@ def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd),
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
def FSW : RVInstS<0b010, OPC_STORE_FP, (outs),
(ins FPR32:$rs2, GPR:$rs1, simm12:$imm12),
- "fsw", "$rs2, ${imm12}(${rs1})">;
+ "fsw", "$rs2, ${imm12}(${rs1})">,
+ Sched<[WriteFST32, ReadStoreData, ReadMemBase]>;
-def FMADD_S : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">;
+def FMADD_S : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">,
+ Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>;
def : FPFMASDynFrmAlias<FMADD_S, "fmadd.s">;
-def FMSUB_S : FPFMAS_rrr_frm<OPC_MSUB, "fmsub.s">;
+def FMSUB_S : FPFMAS_rrr_frm<OPC_MSUB, "fmsub.s">,
+ Sched<[WriteFMulSub32, ReadFMulSub32, ReadFMulSub32, ReadFMulSub32]>;
def : FPFMASDynFrmAlias<FMSUB_S, "fmsub.s">;
-def FNMSUB_S : FPFMAS_rrr_frm<OPC_NMSUB, "fnmsub.s">;
+def FNMSUB_S : FPFMAS_rrr_frm<OPC_NMSUB, "fnmsub.s">,
+ Sched<[WriteFMulSub32, ReadFMulSub32, ReadFMulSub32, ReadFMulSub32]>;
def : FPFMASDynFrmAlias<FNMSUB_S, "fnmsub.s">;
-def FNMADD_S : FPFMAS_rrr_frm<OPC_NMADD, "fnmadd.s">;
+def FNMADD_S : FPFMAS_rrr_frm<OPC_NMADD, "fnmadd.s">,
+ Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>;
def : FPFMASDynFrmAlias<FNMADD_S, "fnmadd.s">;
-def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">;
+def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">,
+ Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
def : FPALUSDynFrmAlias<FADD_S, "fadd.s">;
-def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">;
+def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">,
+ Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
def : FPALUSDynFrmAlias<FSUB_S, "fsub.s">;
-def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">;
+def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">,
+ Sched<[WriteFMul32, ReadFMul32, ReadFMul32]>;
def : FPALUSDynFrmAlias<FMUL_S, "fmul.s">;
-def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">;
+def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">,
+ Sched<[WriteFDiv32, ReadFDiv32, ReadFDiv32]>;
def : FPALUSDynFrmAlias<FDIV_S, "fdiv.s">;
-def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s"> {
+def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s">,
+ Sched<[WriteFSqrt32, ReadFSqrt32]> {
let rs2 = 0b00000;
}
def : FPUnaryOpDynFrmAlias<FSQRT_S, "fsqrt.s", FPR32, FPR32>;
@@ -142,17 +155,20 @@ def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">;
def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">;
def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">;
-def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s"> {
+def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s">,
+ Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> {
let rs2 = 0b00000;
}
def : FPUnaryOpDynFrmAlias<FCVT_W_S, "fcvt.w.s", GPR, FPR32>;
-def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s"> {
+def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s">,
+ Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> {
let rs2 = 0b00001;
}
def : FPUnaryOpDynFrmAlias<FCVT_WU_S, "fcvt.wu.s", GPR, FPR32>;
-def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w"> {
+def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w">,
+ Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]> {
let rs2 = 0b00000;
}
@@ -160,42 +176,50 @@ def FEQ_S : FPCmpS_rr<0b010, "feq.s">;
def FLT_S : FPCmpS_rr<0b001, "flt.s">;
def FLE_S : FPCmpS_rr<0b000, "fle.s">;
-def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s"> {
+def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s">,
+ Sched<[WriteFClass32, ReadFClass32]> {
let rs2 = 0b00000;
}
-def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w"> {
+def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w">,
+ Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> {
let rs2 = 0b00000;
}
def : FPUnaryOpDynFrmAlias<FCVT_S_W, "fcvt.s.w", FPR32, GPR>;
-def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu"> {
+def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu">,
+ Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> {
let rs2 = 0b00001;
}
def : FPUnaryOpDynFrmAlias<FCVT_S_WU, "fcvt.s.wu", FPR32, GPR>;
-def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x"> {
+def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x">,
+ Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]> {
let rs2 = 0b00000;
}
} // Predicates = [HasStdExtF]
let Predicates = [HasStdExtF, IsRV64] in {
-def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s"> {
+def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s">,
+ Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> {
let rs2 = 0b00010;
}
def : FPUnaryOpDynFrmAlias<FCVT_L_S, "fcvt.l.s", GPR, FPR32>;
-def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s"> {
+def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s">,
+ Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> {
let rs2 = 0b00011;
}
def : FPUnaryOpDynFrmAlias<FCVT_LU_S, "fcvt.lu.s", GPR, FPR32>;
-def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l"> {
+def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l">,
+ Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> {
let rs2 = 0b00010;
}
def : FPUnaryOpDynFrmAlias<FCVT_S_L, "fcvt.s.l", FPR32, GPR>;
-def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu"> {
+def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu">,
+ Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> {
let rs2 = 0b00011;
}
def : FPUnaryOpDynFrmAlias<FCVT_S_LU, "fcvt.s.lu", FPR32, GPR>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index e75151ba99c7..987534aadd79 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -24,22 +24,35 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>;
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtM] in {
-def MUL : ALU_rr<0b0000001, 0b000, "mul">;
-def MULH : ALU_rr<0b0000001, 0b001, "mulh">;
-def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">;
-def MULHU : ALU_rr<0b0000001, 0b011, "mulhu">;
-def DIV : ALU_rr<0b0000001, 0b100, "div">;
-def DIVU : ALU_rr<0b0000001, 0b101, "divu">;
-def REM : ALU_rr<0b0000001, 0b110, "rem">;
-def REMU : ALU_rr<0b0000001, 0b111, "remu">;
+def MUL : ALU_rr<0b0000001, 0b000, "mul">,
+ Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+def MULH : ALU_rr<0b0000001, 0b001, "mulh">,
+ Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">,
+ Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+def MULHU : ALU_rr<0b0000001, 0b011, "mulhu">,
+ Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+def DIV : ALU_rr<0b0000001, 0b100, "div">,
+ Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
+def DIVU : ALU_rr<0b0000001, 0b101, "divu">,
+ Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
+def REM : ALU_rr<0b0000001, 0b110, "rem">,
+ Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
+def REMU : ALU_rr<0b0000001, 0b111, "remu">,
+ Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
} // Predicates = [HasStdExtM]
let Predicates = [HasStdExtM, IsRV64] in {
-def MULW : ALUW_rr<0b0000001, 0b000, "mulw">;
-def DIVW : ALUW_rr<0b0000001, 0b100, "divw">;
-def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">;
-def REMW : ALUW_rr<0b0000001, 0b110, "remw">;
-def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">;
+def MULW : ALUW_rr<0b0000001, 0b000, "mulw">,
+ Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>;
+def DIVW : ALUW_rr<0b0000001, 0b100, "divw">,
+ Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
+def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">,
+ Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
+def REMW : ALUW_rr<0b0000001, 0b110, "remw">,
+ Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
+def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">,
+ Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
} // Predicates = [HasStdExtM, IsRV64]
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket32.td b/llvm/lib/Target/RISCV/RISCVSchedRocket32.td
new file mode 100644
index 000000000000..8a91a70b61c7
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket32.td
@@ -0,0 +1,213 @@
+//==- RISCVSchedRocket32.td - Rocket Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedule.h for details.
+
+// Rocket machine model for scheduling and other instruction cost heuristics.
+def Rocket32Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order.
+ let IssueWidth = 1; // 1 micro-ops are dispatched per cycle.
+ let LoadLatency = 3;
+ let MispredictPenalty = 3;
+ let CompleteModel = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
+// Rocket is in-order.
+
+let BufferSize = 0 in {
+def Rocket32UnitALU : ProcResource<1>; // Int ALU
+def Rocket32UnitIMul : ProcResource<1>; // Int Multiply
+def Rocket32UnitMem : ProcResource<1>; // Load/Store
+def Rocket32UnitB : ProcResource<1>; // Branch
+
+def Rocket32UnitFPALU : ProcResource<1>; // FP ALU
+}
+
+let BufferSize = 1 in {
+def Rocket32UnitIDiv : ProcResource<1>; // Int Division
+def Rocket32UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt'
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which both map the ProcResources and
+// set the latency.
+
+let SchedModel = Rocket32Model in {
+
+def : WriteRes<WriteJmp, [Rocket32UnitB]>;
+def : WriteRes<WriteJal, [Rocket32UnitB]>;
+def : WriteRes<WriteJalr, [Rocket32UnitB]>;
+def : WriteRes<WriteJmpReg, [Rocket32UnitB]>;
+
+def : WriteRes<WriteIALU, [Rocket32UnitALU]>;
+def : WriteRes<WriteShift, [Rocket32UnitALU]>;
+
+// Multiplies on Rocket differ by implementation; placeholder until
+// we can determine how to read from command line
+def : WriteRes<WriteIMul, [Rocket32UnitIMul]> { let Latency = 4; }
+
+// 32-bit divides have worse case latency of 34 cycle
+def : WriteRes<WriteIDiv, [Rocket32UnitIDiv]> {
+ let Latency = 34;
+ let ResourceCycles = [34];
+}
+
+// Memory
+def : WriteRes<WriteSTB, [Rocket32UnitMem]>;
+def : WriteRes<WriteSTH, [Rocket32UnitMem]>;
+def : WriteRes<WriteSTW, [Rocket32UnitMem]>;
+def : WriteRes<WriteFST32, [Rocket32UnitMem]>;
+def : WriteRes<WriteFST64, [Rocket32UnitMem]>;
+
+let Latency = 3 in {
+def : WriteRes<WriteLDB, [Rocket32UnitMem]>;
+def : WriteRes<WriteLDH, [Rocket32UnitMem]>;
+def : WriteRes<WriteCSR, [Rocket32UnitALU]>;
+}
+
+let Latency = 2 in {
+def : WriteRes<WriteLDW, [Rocket32UnitMem]>;
+def : WriteRes<WriteFLD32, [Rocket32UnitMem]>;
+def : WriteRes<WriteFLD64, [Rocket32UnitMem]>;
+
+def : WriteRes<WriteAtomicW, [Rocket32UnitMem]>;
+def : WriteRes<WriteAtomicLDW, [Rocket32UnitMem]>;
+}
+
+def : WriteRes<WriteAtomicSTW, [Rocket32UnitMem]>;
+
+// Most FP single precision operations are 4 cycles
+def : WriteRes<WriteFALU32, [Rocket32UnitFPALU]> { let Latency = 4; }
+
+// Most FP double precision operations are 6 cycles
+def : WriteRes<WriteFALU64, [Rocket32UnitFPALU]> { let Latency = 6; }
+
+let Latency = 2 in {
+def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>;
+
+def : WriteRes<WriteFClass32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFClass64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCmp32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCmp64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMovF32ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMovI32ToF32, [Rocket32UnitFPALU]>;
+}
+
+let Latency = 5 in {
+def : WriteRes<WriteFMul32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMulAdd32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMulSub32, [Rocket32UnitFPALU]>;
+}
+
+let Latency = 7 in {
+def : WriteRes<WriteFMul64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMulAdd64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMulSub64, [Rocket32UnitFPALU]>;
+}
+
+// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency
+let Latency = 20, ResourceCycles = [20] in {
+def : WriteRes<WriteFDiv32, [Rocket32UnitFPDivSqrt]>;
+def : WriteRes<WriteFDiv64, [Rocket32UnitFPDivSqrt]>;
+}
+
+// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency
+def : WriteRes<WriteFSqrt32, [Rocket32UnitFPDivSqrt]> { let Latency = 20;
+ let ResourceCycles = [20];}
+def : WriteRes<WriteFSqrt64, [Rocket32UnitFPDivSqrt]> { let Latency = 25;
+ let ResourceCycles = [25];}
+
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+let Unsupported = 1 in {
+def : WriteRes<WriteIALU32, []>;
+def : WriteRes<WriteShift32, []>;
+def : WriteRes<WriteIMul32, []>;
+def : WriteRes<WriteIDiv32, []>;
+def : WriteRes<WriteSTD, []>;
+def : WriteRes<WriteLDWU, []>;
+def : WriteRes<WriteLDD, []>;
+def : WriteRes<WriteAtomicD, []>;
+def : WriteRes<WriteAtomicLDD, []>;
+def : WriteRes<WriteAtomicSTD, []>;
+def : WriteRes<WriteFCvtI64ToF32, []>;
+def : WriteRes<WriteFCvtI64ToF64, []>;
+def : WriteRes<WriteFCvtF64ToI64, []>;
+def : WriteRes<WriteFCvtF32ToI64, []>;
+def : WriteRes<WriteFMovI64ToF64, []>;
+def : WriteRes<WriteFMovF64ToI64, []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types with cycles.
+// Dummy definitions for RocketCore.
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShift, 0>;
+def : ReadAdvance<ReadShift32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFALU32, 0>;
+def : ReadAdvance<ReadFALU64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMulAdd32, 0>;
+def : ReadAdvance<ReadFMulSub32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMulAdd64, 0>;
+def : ReadAdvance<ReadFMulSub64, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket64.td b/llvm/lib/Target/RISCV/RISCVSchedRocket64.td
new file mode 100644
index 000000000000..79e79f90f2f0
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket64.td
@@ -0,0 +1,214 @@
+//==- RISCVSchedRocket64.td - Rocket Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedule.h for details.
+
+// Rocket machine model for scheduling and other instruction cost heuristics.
+def Rocket64Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order.
+ let IssueWidth = 1; // 1 micro-ops are dispatched per cycle.
+ let LoadLatency = 3;
+ let MispredictPenalty = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
+// Rocket is in-order.
+
+let BufferSize = 0 in {
+def Rocket64UnitALU : ProcResource<1>; // Int ALU
+def Rocket64UnitIMul : ProcResource<1>; // Int Multiply
+def Rocket64UnitMem : ProcResource<1>; // Load/Store
+def Rocket64UnitB : ProcResource<1>; // Branch
+
+def Rocket64UnitFPALU : ProcResource<1>; // FP ALU
+}
+
+let BufferSize = 1 in {
+def Rocket64UnitIDiv : ProcResource<1>; // Int Division
+def Rocket64UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which both map the ProcResources and
+// set the latency.
+
+let SchedModel = Rocket64Model in {
+
+def : WriteRes<WriteJmp, [Rocket64UnitB]>;
+def : WriteRes<WriteJal, [Rocket64UnitB]>;
+def : WriteRes<WriteJalr, [Rocket64UnitB]>;
+def : WriteRes<WriteJmpReg, [Rocket64UnitB]>;
+
+def : WriteRes<WriteIALU32, [Rocket64UnitALU]>;
+def : WriteRes<WriteIALU, [Rocket64UnitALU]>;
+def : WriteRes<WriteShift32, [Rocket64UnitALU]>;
+def : WriteRes<WriteShift, [Rocket64UnitALU]>;
+
+let Latency = 4 in {
+def : WriteRes<WriteIMul, [Rocket64UnitIMul]>;
+def : WriteRes<WriteIMul32, [Rocket64UnitIMul]>;
+}
+
+// Integer divide varies based on operand magnitude and sign; worse case latency is 34.
+def : WriteRes<WriteIDiv32, [Rocket64UnitIDiv]> {
+ let Latency = 34;
+ let ResourceCycles = [34];
+}
+def : WriteRes<WriteIDiv, [Rocket64UnitIDiv]> {
+ let Latency = 33;
+ let ResourceCycles = [33];
+}
+
+// Memory
+def : WriteRes<WriteSTB, [Rocket64UnitMem]>;
+def : WriteRes<WriteSTH, [Rocket64UnitMem]>;
+def : WriteRes<WriteSTW, [Rocket64UnitMem]>;
+def : WriteRes<WriteSTD, [Rocket64UnitMem]>;
+def : WriteRes<WriteFST32, [Rocket64UnitMem]>;
+def : WriteRes<WriteFST64, [Rocket64UnitMem]>;
+
+let Latency = 3 in {
+def : WriteRes<WriteLDB, [Rocket64UnitMem]>;
+def : WriteRes<WriteLDH, [Rocket64UnitMem]>;
+def : WriteRes<WriteCSR, [Rocket64UnitALU]>;
+}
+
+let Latency = 2 in {
+def : WriteRes<WriteLDW, [Rocket64UnitMem]>;
+def : WriteRes<WriteLDWU, [Rocket64UnitMem]>;
+def : WriteRes<WriteLDD, [Rocket64UnitMem]>;
+def : WriteRes<WriteFLD32, [Rocket64UnitMem]>;
+def : WriteRes<WriteFLD64, [Rocket64UnitMem]>;
+
+def : WriteRes<WriteAtomicW, [Rocket64UnitMem]>;
+def : WriteRes<WriteAtomicD, [Rocket64UnitMem]>;
+
+def : WriteRes<WriteAtomicLDW, [Rocket64UnitMem]>;
+def : WriteRes<WriteAtomicLDD, [Rocket64UnitMem]>;
+}
+
+def : WriteRes<WriteAtomicSTW, [Rocket64UnitMem]>;
+def : WriteRes<WriteAtomicSTD, [Rocket64UnitMem]>;
+
+// Most FP single precision operations are 4 cycles
+def : WriteRes<WriteFALU32, [Rocket64UnitFPALU]> { let Latency = 4; }
+
+// Most FP double precision operations are 6 cycles
+def : WriteRes<WriteFALU64, [Rocket64UnitFPALU]> { let Latency = 6; }
+
+// Conversion instructions
+let Latency = 2 in {
+def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtI64ToF32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtI64ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToI64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToI64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>;
+
+def : WriteRes<WriteFClass32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFClass64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFCmp32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFCmp64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMovF32ToI32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMovI32ToF32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMovF64ToI64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMovI64ToF64, [Rocket64UnitFPALU]>;
+}
+
+let Latency = 5 in {
+def : WriteRes<WriteFMul32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMulAdd32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMulSub32, [Rocket64UnitFPALU]>;
+}
+
+let Latency = 7 in {
+def : WriteRes<WriteFMul64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMulAdd64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMulSub64, [Rocket64UnitFPALU]>;
+}
+
+// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency
+let Latency = 20, ResourceCycles = [20] in {
+def : WriteRes<WriteFDiv32, [Rocket64UnitFPDivSqrt]>;
+def : WriteRes<WriteFDiv64, [Rocket64UnitFPDivSqrt]>;
+}
+
+// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency
+def : WriteRes<WriteFSqrt32, [Rocket64UnitFPDivSqrt]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def : WriteRes<WriteFSqrt64, [Rocket64UnitFPDivSqrt]> { let Latency = 25;
+ let ResourceCycles = [25]; }
+
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types with cycles.
+// Dummy definitions for RocketCore.
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShift, 0>;
+def : ReadAdvance<ReadShift32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFALU32, 0>;
+def : ReadAdvance<ReadFALU64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMulAdd32, 0>;
+def : ReadAdvance<ReadFMulSub32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMulAdd64, 0>;
+def : ReadAdvance<ReadFMulSub64, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
new file mode 100644
index 000000000000..9e2762a5d171
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -0,0 +1,138 @@
+//===-- RISCVSchedule.td - RISCV Scheduling Definitions -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+/// Define scheduler resources associated with def operands.
+def WriteIALU : SchedWrite; // 32 or 64-bit integer ALU operations
+def WriteIALU32 : SchedWrite; // 32-bit integer ALU operations on RV64I
+def WriteShift32 : SchedWrite; // 32-bit shift operations on RV64Ix
+def WriteShift : SchedWrite; // 32 or 64-bit shift operations
+def WriteIDiv : SchedWrite; // 32-bit or 64-bit divide and remainder
+def WriteIDiv32 : SchedWrite; // 32-bit divide and remainder on RV64I
+def WriteIMul : SchedWrite; // 32-bit or 64-bit multiply
+def WriteIMul32 : SchedWrite; // 32-bit multiply on RV64I
+def WriteJmp : SchedWrite; // Jump
+def WriteJal : SchedWrite; // Jump and link
+def WriteJalr : SchedWrite; // Jump and link register
+def WriteJmpReg : SchedWrite; // Jump register
+def WriteNop : SchedWrite;
+def WriteLDB : SchedWrite; // Load byte
+def WriteLDH : SchedWrite; // Load half-word
+def WriteLDW : SchedWrite; // Load word
+def WriteLDWU : SchedWrite; // Load word unsigned
+def WriteLDD : SchedWrite; // Load double-word
+def WriteCSR : SchedWrite; // CSR instructions
+def WriteSTB : SchedWrite; // Store byte
+def WriteSTH : SchedWrite; // Store half-word
+def WriteSTW : SchedWrite; // Store word
+def WriteSTD : SchedWrite; // Store double-word
+def WriteAtomicW : SchedWrite; //Atomic memory operation word size
+def WriteAtomicD : SchedWrite; //Atomic memory operation double word size
+def WriteAtomicLDW : SchedWrite; // Atomic load word
+def WriteAtomicLDD : SchedWrite; // Atomic load double word
+def WriteAtomicSTW : SchedWrite; // Atomic store word
+def WriteAtomicSTD : SchedWrite; // Atomic store double word
+def WriteFALU32 : SchedWrite; // FP 32-bit computation
+def WriteFALU64 : SchedWrite; // FP 64-bit computation
+def WriteFMul32 : SchedWrite; // 32-bit floating point multiply
+def WriteFMulAdd32 : SchedWrite; // 32-bit floating point multiply add
+def WriteFMulSub32 : SchedWrite; // 32-bit floating point multiply sub
+def WriteFMul64 : SchedWrite; // 64-bit floating point multiply
+def WriteFMulAdd64 : SchedWrite; // 64-bit floating point multiply add
+def WriteFMulSub64 : SchedWrite; // 64-bit floating point multiply sub
+def WriteFDiv32 : SchedWrite; // 32-bit floating point divide
+def WriteFDiv64 : SchedWrite; // 64-bit floating point divide
+def WriteFSqrt32 : SchedWrite; // 32-bit floating point sqrt
+def WriteFSqrt64 : SchedWrite; // 64-bit floating point sqrt
+
+// Integer to float conversions
+def WriteFCvtI32ToF32 : SchedWrite;
+def WriteFCvtI32ToF64 : SchedWrite;
+def WriteFCvtI64ToF32 : SchedWrite; // RV64I only
+def WriteFCvtI64ToF64 : SchedWrite; // RV64I only
+
+//Float to integer conversions
+def WriteFCvtF32ToI32 : SchedWrite;
+def WriteFCvtF32ToI64 : SchedWrite; // RV64I only
+def WriteFCvtF64ToI32 : SchedWrite;
+def WriteFCvtF64ToI64 : SchedWrite; // RV64I only
+
+// Float to float conversions
+def WriteFCvtF32ToF64 : SchedWrite;
+def WriteFCvtF64ToF32 : SchedWrite;
+
+def WriteFConv32 : SchedWrite; // 32-bit floating point convert
+def WriteFConv64 : SchedWrite; // 64-bit floating point convert
+def WriteFClass32 : SchedWrite; // 32-bit floating point classify
+def WriteFClass64 : SchedWrite; // 64-bit floating point classify
+def WriteFCmp32 : SchedWrite; // 32-bit floating point compare
+def WriteFCmp64 : SchedWrite; // 64-bit floating point compare
+
+def WriteFMovF32ToI32 : SchedWrite;
+def WriteFMovI32ToF32 : SchedWrite;
+def WriteFMovF64ToI64 : SchedWrite; // RV64I only
+def WriteFMovI64ToF64 : SchedWrite; // RV64I only
+
+def WriteFMov32 : SchedWrite; // 32-bit floating point move
+def WriteFMov64 : SchedWrite; // 64-bit floating point move
+def WriteFLD32 : SchedWrite; // Floating point sp load
+def WriteFLD64 : SchedWrite; // Floating point dp load
+def WriteFST32 : SchedWrite; // Floating point sp store
+def WriteFST64 : SchedWrite; // Floating point dp store
+
+/// Define scheduler resources associated with use operands.
+def ReadJmp : SchedRead;
+def ReadJalr : SchedRead;
+def ReadCSR : SchedRead;
+def ReadMemBase : SchedRead;
+def ReadStoreData : SchedRead;
+def ReadIALU : SchedRead;
+def ReadIALU32 : SchedRead; // 32-bit integer ALU operations on RV64I
+def ReadShift : SchedRead;
+def ReadShift32 : SchedRead; // 32-bit shift operations on RV64Ix
+def ReadIDiv : SchedRead;
+def ReadIDiv32 : SchedRead;
+def ReadIMul : SchedRead;
+def ReadIMul32 : SchedRead;
+def ReadAtomicWA : SchedRead;
+def ReadAtomicWD : SchedRead;
+def ReadAtomicDA : SchedRead;
+def ReadAtomicDD : SchedRead;
+def ReadAtomicLDW : SchedRead; // Atomic load word
+def ReadAtomicLDD : SchedRead; // Atomic load double word
+def ReadAtomicSTW : SchedRead; // Atomic store word
+def ReadAtomicSTD : SchedRead; // Atomic store double word
+def ReadFALU32 : SchedRead; // FP 32-bit computation
+def ReadFALU64 : SchedRead; // FP 64-bit computation
+def ReadFMul32 : SchedRead; // 32-bit floating point multiply
+def ReadFMulAdd32 : SchedRead; // 32-bit floating point multiply add
+def ReadFMulSub32 : SchedRead; // 32-bit floating point multiply sub
+def ReadFMul64 : SchedRead; // 64-bit floating point multiply
+def ReadFMulAdd64 : SchedRead; // 64-bit floating point multiply add
+def ReadFMulSub64 : SchedRead; // 64-bit floating point multiply sub
+def ReadFDiv32 : SchedRead; // 32-bit floating point divide
+def ReadFDiv64 : SchedRead; // 64-bit floating point divide
+def ReadFSqrt32 : SchedRead; // 32-bit floating point sqrt
+def ReadFSqrt64 : SchedRead; // 64-bit floating point sqrt
+def ReadFCmp32 : SchedRead;
+def ReadFCmp64 : SchedRead;
+def ReadFCvtF32ToI32 : SchedRead;
+def ReadFCvtF32ToI64 : SchedRead;
+def ReadFCvtF64ToI32 : SchedRead;
+def ReadFCvtF64ToI64 : SchedRead;
+def ReadFCvtI32ToF32 : SchedRead;
+def ReadFCvtI32ToF64 : SchedRead;
+def ReadFCvtI64ToF32 : SchedRead;
+def ReadFCvtI64ToF64 : SchedRead;
+def ReadFMovF32ToI32 : SchedRead;
+def ReadFMovI32ToF32 : SchedRead;
+def ReadFMovF64ToI64 : SchedRead;
+def ReadFMovI64ToF64 : SchedRead;
+def ReadFCvtF32ToF64 : SchedRead;
+def ReadFCvtF64ToF32 : SchedRead;
+def ReadFClass32 : SchedRead;
+def ReadFClass64 : SchedRead;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 2bb26988c7da..de71c01753de 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -15,6 +15,7 @@
#include "RISCVTargetObjectFile.h"
#include "RISCVTargetTransformInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
@@ -89,8 +90,17 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, FS,
- Options.MCOptions.getABIName(), *this);
+ auto ABIName = Options.MCOptions.getABIName();
+ if (const MDString *ModuleTargetABI = dyn_cast_or_null<MDString>(
+ F.getParent()->getModuleFlag("target-abi"))) {
+ auto TargetABI = RISCVABI::getTargetABI(ABIName);
+ if (TargetABI != RISCVABI::ABI_Unknown &&
+ ModuleTargetABI->getString() != ABIName) {
+ report_fatal_error("-target-abi option != target-abi module flag");
+ }
+ ABIName = ModuleTargetABI->getString();
+ }
+ I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, FS, ABIName, *this);
}
return I.get();
}
diff --git a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
index 432ebb294d46..43b1f8b80c5f 100644
--- a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
@@ -12,16 +12,7 @@ namespace RISCVSysReg {
namespace RISCVABI {
ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
StringRef ABIName) {
- auto TargetABI = StringSwitch<ABI>(ABIName)
- .Case("ilp32", ABI_ILP32)
- .Case("ilp32f", ABI_ILP32F)
- .Case("ilp32d", ABI_ILP32D)
- .Case("ilp32e", ABI_ILP32E)
- .Case("lp64", ABI_LP64)
- .Case("lp64f", ABI_LP64F)
- .Case("lp64d", ABI_LP64D)
- .Default(ABI_Unknown);
-
+ auto TargetABI = getTargetABI(ABIName);
bool IsRV64 = TT.isArch64Bit();
bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
@@ -58,6 +49,19 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
return ABI_ILP32;
}
+ABI getTargetABI(StringRef ABIName) {
+ auto TargetABI = StringSwitch<ABI>(ABIName)
+ .Case("ilp32", ABI_ILP32)
+ .Case("ilp32f", ABI_ILP32F)
+ .Case("ilp32d", ABI_ILP32D)
+ .Case("ilp32e", ABI_ILP32E)
+ .Case("lp64", ABI_LP64)
+ .Case("lp64f", ABI_LP64F)
+ .Case("lp64d", ABI_LP64D)
+ .Default(ABI_Unknown);
+ return TargetABI;
+}
+
// To avoid the BP value clobbered by a function call, we need to choose a
// callee saved register to save the value. RV32E only has X8 and X9 as callee
// saved registers and X8 will be used as fp. So we choose X9 as bp.
diff --git a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index cf078df9609a..d36c528bba1e 100644
--- a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -202,6 +202,8 @@ enum ABI {
ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
StringRef ABIName);
+ABI getTargetABI(StringRef ABIName);
+
// Returns the register used to hold the stack pointer after realignment.
Register getBPReg();
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index c73905d3357a..ab00069497af 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -6859,8 +6859,6 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
for (MachineBasicBlock::iterator NextMIIt =
std::next(MachineBasicBlock::iterator(MI));
NextMIIt != MBB->end(); ++NextMIIt) {
- if (NextMIIt->definesRegister(SystemZ::CC))
- break;
if (isSelectPseudo(*NextMIIt)) {
assert(NextMIIt->getOperand(3).getImm() == CCValid &&
"Bad CCValid operands since CC was not redefined.");
@@ -6871,6 +6869,9 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
}
break;
}
+ if (NextMIIt->definesRegister(SystemZ::CC) ||
+ NextMIIt->usesCustomInsertionHook())
+ break;
bool User = false;
for (auto SelMI : Selects)
if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index d1f3acbd221e..3e905c18fa3b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -751,6 +751,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
if (!II)
continue;
+ Changed = true;
LandingPads.insert(II->getLandingPadInst());
IRB.SetInsertPoint(II);
@@ -791,6 +792,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
auto *RI = dyn_cast<ResumeInst>(&I);
if (!RI)
continue;
+ Changed = true;
// Split the input into legal values
Value *Input = RI->getValue();
@@ -815,6 +817,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
continue;
if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for)
continue;
+ Changed = true;
IRB.SetInsertPoint(CI);
CallInst *NewCI =
@@ -830,7 +833,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
if (auto *LPI = dyn_cast<LandingPadInst>(I))
LandingPads.insert(LPI);
}
- Changed = !LandingPads.empty();
+ Changed |= !LandingPads.empty();
// Handle all the landingpad for this function together, as multiple invokes
// may share a single lp
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index dffda5217675..2284cd7a70b8 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -85,13 +85,13 @@ cl::opt<unsigned> X86AlignBranchBoundary(
cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
"x86-align-branch",
- cl::desc("Specify types of branches to align (plus separated list of "
- "types). The branches's types are combination of jcc, fused, "
- "jmp, call, ret, indirect."),
- cl::value_desc("jcc indicates conditional jumps, fused indicates fused "
- "conditional jumps, jmp indicates unconditional jumps, call "
- "indicates direct and indirect calls, ret indicates rets, "
- "indirect indicates indirect jumps."),
+ cl::desc(
+ "Specify types of branches to align. The branches's types are "
+ "combination of jcc, fused, jmp, call, ret, indirect. jcc indicates "
+ "conditional jumps, fused indicates fused conditional jumps, jmp "
+ "indicates unconditional jumps, call indicates direct and indirect "
+ "calls, ret indicates rets, indirect indicates indirect jumps."),
+ cl::value_desc("(plus separated list of types)"),
cl::location(X86AlignBranchKindLoc));
cl::opt<bool> X86AlignBranchWithin32BBoundaries(
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0f152968ddfd..cbdd7135de43 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21056,7 +21056,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
// Divide by pow2.
SDValue SRA =
- DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64));
+ DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 2fc9a2af01d7..7f49c6e861d4 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2002,6 +2002,25 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
break;
}
+ case X86::ENDBR32:
+ case X86::ENDBR64: {
+ // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
+ // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
+ // non-empty. If MI is the initial ENDBR, place the
+ // __patchable_function_entries label after ENDBR.
+ if (CurrentPatchableFunctionEntrySym &&
+ CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
+ MI == &MF->front().front()) {
+ MCInst Inst;
+ MCInstLowering.Lower(MI, Inst);
+ EmitAndCountInstruction(Inst);
+ CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
+ OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
+ return;
+ }
+ break;
+ }
+
case X86::TAILJMPr:
case X86::TAILJMPm:
case X86::TAILJMPd:
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9c992830879a..7cfc29f7bf7a 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm-c/Transforms/PassManagerBuilder.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
@@ -187,8 +188,13 @@ PassManagerBuilder::~PassManagerBuilder() {
}
/// Set of global extensions, automatically added as part of the standard set.
-static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
- PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+static ManagedStatic<
+ SmallVector<std::tuple<PassManagerBuilder::ExtensionPointTy,
+ PassManagerBuilder::ExtensionFn,
+ PassManagerBuilder::GlobalExtensionID>,
+ 8>>
+ GlobalExtensions;
+static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter;
/// Check if GlobalExtensions is constructed and not empty.
/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger
@@ -197,10 +203,29 @@ static bool GlobalExtensionsNotEmpty() {
return GlobalExtensions.isConstructed() && !GlobalExtensions->empty();
}
-void PassManagerBuilder::addGlobalExtension(
- PassManagerBuilder::ExtensionPointTy Ty,
- PassManagerBuilder::ExtensionFn Fn) {
- GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn)));
+PassManagerBuilder::GlobalExtensionID
+PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty,
+ PassManagerBuilder::ExtensionFn Fn) {
+ auto ExtensionID = GlobalExtensionsCounter++;
+ GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID));
+ return ExtensionID;
+}
+
+void PassManagerBuilder::removeGlobalExtension(
+ PassManagerBuilder::GlobalExtensionID ExtensionID) {
+ // RegisterStandardPasses may try to call this function after GlobalExtensions
+ // has already been destroyed; doing so should not generate an error.
+ if (!GlobalExtensions.isConstructed())
+ return;
+
+ auto GlobalExtension =
+ llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) {
+ return std::get<2>(elem) == ExtensionID;
+ });
+ assert(GlobalExtension != GlobalExtensions->end() &&
+ "The extension ID to be removed should always be valid.");
+
+ GlobalExtensions->erase(GlobalExtension);
}
void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
@@ -211,8 +236,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
legacy::PassManagerBase &PM) const {
if (GlobalExtensionsNotEmpty()) {
for (auto &Ext : *GlobalExtensions) {
- if (Ext.first == ETy)
- Ext.second(*this, PM);
+ if (std::get<0>(Ext) == ETy)
+ std::get<1>(Ext)(*this, PM);
}
}
for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c288a7d8d403..74654f7ef51d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1336,6 +1336,11 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy))
return false;
+ // Make sure the type would actually change.
+ // This condition can be hit with chains of bitcasts.
+ if (LI->getType() == CmpLoadTy)
+ return false;
+
// Make sure we're not changing the size of the load/store.
const auto &DL = IC.getDataLayout();
if (DL.getTypeStoreSizeInBits(LI->getType()) !=
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 05a624fde86b..49645e9460cd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1013,6 +1013,12 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
Cmp.getPredicate() == CanonicalPred)
return nullptr;
+ // Bail out on unsimplified X-0 operand (due to some worklist management bug),
+ // as this may cause an infinite combine loop. Let the sub be folded first.
+ if (match(LHS, m_Sub(m_Value(), m_Zero())) ||
+ match(RHS, m_Sub(m_Value(), m_Zero())))
+ return nullptr;
+
// Create the canonical compare and plug it into the select.
Sel.setCondition(Builder.CreateICmp(CanonicalPred, LHS, RHS));
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 801c09a317a7..bf32996d96e2 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3568,7 +3568,8 @@ static bool combineInstructionsOverFunction(
ProfileSummaryInfo *PSI, bool ExpensiveCombines, unsigned MaxIterations,
LoopInfo *LI) {
auto &DL = F.getParent()->getDataLayout();
- ExpensiveCombines |= EnableExpensiveCombines;
+ if (EnableExpensiveCombines.getNumOccurrences())
+ ExpensiveCombines = EnableExpensiveCombines;
MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
/// Builder - This is an IRBuilder that automatically inserts new
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 80acab307578..f581142df8f7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3005,6 +3005,43 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ Constant *getPclmulMask(IRBuilder<> &IRB, unsigned Width, bool OddElements) {
+ SmallVector<Constant *, 8> Mask;
+ for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
+ Constant *C = ConstantInt::get(IRB.getInt32Ty(), X);
+ Mask.push_back(C);
+ Mask.push_back(C);
+ }
+ return ConstantVector::get(Mask);
+ }
+
+ // Instrument pclmul intrinsics.
+ // These intrinsics operate either on odd or on even elements of the input
+ // vectors, depending on the constant in the 3rd argument, ignoring the rest.
+ // Replace the unused elements with copies of the used ones, ex:
+ // (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
+ // or
+ // (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
+ // and then apply the usual shadow combining logic.
+ void handlePclmulIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Type *ShadowTy = getShadowTy(&I);
+ unsigned Width = I.getArgOperand(0)->getType()->getVectorNumElements();
+ assert(isa<ConstantInt>(I.getArgOperand(2)) &&
+ "pclmul 3rd operand must be a constant");
+ unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Value *Shuf0 =
+ IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy),
+ getPclmulMask(IRB, Width, Imm & 0x01));
+ Value *Shuf1 =
+ IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy),
+ getPclmulMask(IRB, Width, Imm & 0x10));
+ ShadowAndOriginCombiner SOC(this, IRB);
+ SOC.Add(Shuf0, getOrigin(&I, 0));
+ SOC.Add(Shuf1, getOrigin(&I, 1));
+ SOC.Done(&I);
+ }
+
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case Intrinsic::lifetime_start:
@@ -3238,6 +3275,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleBmiIntrinsic(I);
break;
+ case Intrinsic::x86_pclmulqdq:
+ case Intrinsic::x86_pclmulqdq_256:
+ case Intrinsic::x86_pclmulqdq_512:
+ handlePclmulIntrinsic(I);
+ break;
+
case Intrinsic::is_constant:
// The result of llvm.is.constant() is always defined.
setShadow(&I, getCleanShadow(&I));
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index c3ca43fcd492..e5edd305d3d5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -279,9 +279,10 @@ private:
/// Build a VPlan using VPRecipes according to the information gather by
/// Legal. This method is only used for the legacy inner loop vectorizer.
- VPlanPtr
- buildVPlanWithVPRecipes(VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
- SmallPtrSetImpl<Instruction *> &DeadInstructions);
+ VPlanPtr buildVPlanWithVPRecipes(
+ VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
+ SmallPtrSetImpl<Instruction *> &DeadInstructions,
+ const DenseMap<Instruction *, Instruction *> &SinkAfter);
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 684a3098e564..ebfd5fe8b762 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6716,7 +6716,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
assert(BI && "Unexpected terminator found");
- if (!BI->isConditional())
+ if (!BI->isConditional() || BI->getSuccessor(0) == BI->getSuccessor(1))
return EdgeMaskCache[Edge] = SrcMask;
VPValue *EdgeMask = Plan->getVPValue(BI->getCondition());
@@ -7118,25 +7118,29 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
SmallPtrSet<Instruction *, 4> DeadInstructions;
collectTriviallyDeadInstructions(DeadInstructions);
+ DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
+ // Dead instructions do not need sinking. Remove them from SinkAfter.
+ for (Instruction *I : DeadInstructions)
+ SinkAfter.erase(I);
+
for (unsigned VF = MinVF; VF < MaxVF + 1;) {
VFRange SubRange = {VF, MaxVF + 1};
- VPlans.push_back(
- buildVPlanWithVPRecipes(SubRange, NeedDef, DeadInstructions));
+ VPlans.push_back(buildVPlanWithVPRecipes(SubRange, NeedDef,
+ DeadInstructions, SinkAfter));
VF = SubRange.End;
}
}
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
- SmallPtrSetImpl<Instruction *> &DeadInstructions) {
+ SmallPtrSetImpl<Instruction *> &DeadInstructions,
+ const DenseMap<Instruction *, Instruction *> &SinkAfter) {
// Hold a mapping from predicated instructions to their recipes, in order to
// fix their AlsoPack behavior if a user is determined to replicate and use a
// scalar instead of vector value.
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
- DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
-
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp
index bfe7e8f04303..0efd0df2c12b 100644
--- a/llvm/tools/lli/lli.cpp
+++ b/llvm/tools/lli/lli.cpp
@@ -197,6 +197,11 @@ namespace {
cl::desc("Generate software floating point library calls"),
cl::init(false));
+ cl::opt<bool> NoProcessSymbols(
+ "no-process-syms",
+ cl::desc("Do not resolve lli process symbols in JIT'd code"),
+ cl::init(false));
+
enum class DumpKind {
NoDump,
DumpFuncsToStdOut,
@@ -794,12 +799,16 @@ int runOrcLazyJIT(const char *ProgName) {
});
orc::MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout());
- J->getMainJITDylib().addGenerator(
- ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
- J->getDataLayout().getGlobalPrefix(),
- [MainName = Mangle("main")](const orc::SymbolStringPtr &Name) {
- return Name != MainName;
- })));
+
+ // Unless they've been explicitly disabled, make process symbols available to
+ // JIT'd code.
+ if (!NoProcessSymbols)
+ J->getMainJITDylib().addGenerator(
+ ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+ J->getDataLayout().getGlobalPrefix(),
+ [MainName = Mangle("main")](const orc::SymbolStringPtr &Name) {
+ return Name != MainName;
+ })));
orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle));