aboutsummaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-02-07 14:58:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-06 20:14:07 +0000
commitb3edf4467982447620505a28fc82e38a414c07dc (patch)
tree3bbe0ab71c23f020abf58f17f1f43e1dcdaca7c5 /contrib
parent868ee3f2855615016ba87c14b9444c18a2e1ba30 (diff)
parent4fdf604ba667503ae582304cebdd3df426778a6b (diff)
downloadsrc-b3edf4467982447620505a28fc82e38a414c07dc.tar.gz
src-b3edf4467982447620505a28fc82e38a414c07dc.zip
Merge llvm-project release/18.x llvmorg-18.1.0-rc2-0-gc6c86965d967
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvm-project release/18.x llvmorg-18.1.0-rc2-0-gc6c86965d967. PR: 276104 MFC after: 1 month
Diffstat (limited to 'contrib')
-rw-r--r--contrib/llvm-project/clang/include/clang/AST/Type.h3
-rw-r--r--contrib/llvm-project/clang/include/clang/Basic/AttrDocs.td5
-rw-r--r--contrib/llvm-project/clang/include/clang/Basic/BuiltinsAMDGPU.def62
-rw-r--r--contrib/llvm-project/clang/include/clang/Basic/CodeGenOptions.def3
-rw-r--r--contrib/llvm-project/clang/include/clang/Basic/DiagnosticCommonKinds.td2
-rw-r--r--contrib/llvm-project/clang/include/clang/Basic/DiagnosticDocs.td9
-rw-r--r--contrib/llvm-project/clang/include/clang/Basic/DiagnosticSemaKinds.td6
-rw-r--r--contrib/llvm-project/clang/include/clang/Basic/LangOptions.def1
-rw-r--r--contrib/llvm-project/clang/include/clang/Driver/Options.td13
-rwxr-xr-xcontrib/llvm-project/clang/include/clang/Format/Format.h43
-rw-r--r--contrib/llvm-project/clang/include/clang/Lex/Preprocessor.h11
-rw-r--r--contrib/llvm-project/clang/include/clang/Sema/Lookup.h3
-rw-r--r--contrib/llvm-project/clang/include/clang/Serialization/ASTReader.h6
-rw-r--r--contrib/llvm-project/clang/lib/AST/ASTContext.cpp20
-rw-r--r--contrib/llvm-project/clang/lib/AST/ExprConstant.cpp9
-rw-r--r--contrib/llvm-project/clang/lib/AST/ItaniumMangle.cpp25
-rw-r--r--contrib/llvm-project/clang/lib/AST/JSONNodeDumper.cpp3
-rw-r--r--contrib/llvm-project/clang/lib/AST/ODRHash.cpp49
-rw-r--r--contrib/llvm-project/clang/lib/AST/TemplateBase.cpp3
-rw-r--r--contrib/llvm-project/clang/lib/AST/TextNodeDumper.cpp3
-rw-r--r--contrib/llvm-project/clang/lib/AST/Type.cpp15
-rw-r--r--contrib/llvm-project/clang/lib/AST/TypePrinter.cpp2
-rw-r--r--contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp23
-rw-r--r--contrib/llvm-project/clang/lib/Basic/Targets/AArch64.h1
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp1
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp177
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp17
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp6
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/Targets/RISCV.cpp21
-rw-r--r--contrib/llvm-project/clang/lib/Driver/Driver.cpp6
-rw-r--r--contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp15
-rw-r--r--contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp52
-rw-r--r--contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.h3
-rw-r--r--contrib/llvm-project/clang/lib/Format/Format.cpp23
-rw-r--r--contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp11
-rw-r--r--contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp2
-rw-r--r--contrib/llvm-project/clang/lib/Lex/PPExpressions.cpp4
-rw-r--r--contrib/llvm-project/clang/lib/Sema/SemaChecking.cpp55
-rwxr-xr-xcontrib/llvm-project/clang/lib/Sema/SemaConcept.cpp8
-rw-r--r--contrib/llvm-project/clang/lib/Sema/SemaDecl.cpp3
-rw-r--r--contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp6
-rw-r--r--contrib/llvm-project/clang/lib/Sema/SemaOverload.cpp44
-rw-r--r--contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp44
-rw-r--r--contrib/llvm-project/clang/lib/Sema/SemaType.cpp21
-rw-r--r--contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp3
-rw-r--r--contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp38
-rw-r--r--contrib/llvm-project/clang/lib/Serialization/ASTWriter.cpp8
-rw-r--r--contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp13
-rw-r--r--contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Environment.cpp8
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/i386/chkstk.S2
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/x86_64/chkstk.S2
-rw-r--r--contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c2
-rw-r--r--contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp4
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/copy_move_common.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/equal.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/equal_range.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/fold.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/in_found_result.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/in_fun_result.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/in_in_out_result.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/in_in_result.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/in_out_out_result.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/includes.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/next_permutation.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/nth_element.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/partial_sort.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/partial_sort_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/partition.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/prev_permutation.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_any_all_none_of.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_count.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_equal.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_fill.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_find.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_for_each.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_generate.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_is_partitioned.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_merge.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_move.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_replace.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_rotate_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_sort.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_stable_sort.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/pstl_transform.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_all_of.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_any_of.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_binary_search.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_clamp.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_contains.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_backward.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_if.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_n.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_count.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_count_if.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_ends_with.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_equal.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_equal_range.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_fill.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_fill_n.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_find.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_find_end.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_find_first_of.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if_not.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each_n.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_generate.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_generate_n.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_includes.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_inplace_merge.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap_until.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_is_partitioned.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_is_permutation.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted_until.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_iterator_concept.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_lexicographical_compare.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_lower_bound.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_make_heap.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_max_element.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_merge.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_min_element.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_minmax_element.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_mismatch.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_move.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_move_backward.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_next_permutation.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_none_of.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_nth_element.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_partition.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_point.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_pop_heap.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_prev_permutation.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_push_heap.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_remove.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy_if.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_if.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_replace.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy_if.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_if.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_reverse_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_sample.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_search_n.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_set_difference.h6
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_set_intersection.h6
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_set_symmetric_difference.h6
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_set_union.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_shuffle.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_sort.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_sort_heap.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_partition.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_sort.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_starts_with.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_swap_ranges.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_transform.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_unique.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/ranges_unique_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/remove.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/remove_if.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/reverse.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/rotate.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/set_difference.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/set_intersection.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/set_symmetric_difference.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/set_union.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/shift_left.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/shift_right.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/sort.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/sort_heap.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/stable_partition.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/stable_sort.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/swap_ranges.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/unique.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/unique_copy.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/unwrap_iter.h2
-rw-r--r--contrib/llvm-project/libcxx/include/__algorithm/unwrap_range.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__config9
-rw-r--r--contrib/llvm-project/libcxx/include/__filesystem/directory_iterator.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__filesystem/path.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__filesystem/recursive_directory_iterator.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__format/format_arg.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__format/format_context.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__format/format_functions.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__format/formatter_output.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__format/write_escaped.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__functional/function.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__iterator/cpp17_iterator_concepts.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__iterator/iterator_with_data.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__memory/ranges_uninitialized_algorithms.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__memory/raw_storage_iterator.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__memory/shared_ptr.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__memory/uninitialized_algorithms.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__mutex/once_flag.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__numeric/pstl_reduce.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__numeric/pstl_transform_reduce.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__numeric/reduce.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__numeric/saturation_arithmetic.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__numeric/transform_reduce.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/counted.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/drop_while_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/elements_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/filter_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/iota_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/join_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/lazy_split_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/repeat_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/reverse_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/single_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/split_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/take_while_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__ranges/transform_view.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__thread/jthread.h5
-rw-r--r--contrib/llvm-project/libcxx/include/__thread/thread.h5
-rw-r--r--contrib/llvm-project/libcxx/include/array5
-rw-r--r--contrib/llvm-project/libcxx/include/condition_variable5
-rw-r--r--contrib/llvm-project/libcxx/include/experimental/iterator5
-rw-r--r--contrib/llvm-project/libcxx/include/future5
-rw-r--r--contrib/llvm-project/libcxx/include/ios5
-rw-r--r--contrib/llvm-project/libcxx/include/map5
-rw-r--r--contrib/llvm-project/libcxx/include/ostream5
-rw-r--r--contrib/llvm-project/libcxx/include/queue5
-rw-r--r--contrib/llvm-project/libcxx/include/set5
-rw-r--r--contrib/llvm-project/libcxx/include/stack5
-rw-r--r--contrib/llvm-project/libcxx/include/string7
-rw-r--r--contrib/llvm-project/libcxx/include/strstream5
-rw-r--r--contrib/llvm-project/libcxx/include/unordered_map5
-rw-r--r--contrib/llvm-project/libcxx/include/unordered_set5
-rw-r--r--contrib/llvm-project/libcxx/include/version14
-rw-r--r--contrib/llvm-project/libcxx/modules/std/atomic.inc2
-rw-r--r--contrib/llvm-project/libcxx/modules/std/iosfwd.inc2
-rw-r--r--contrib/llvm-project/libcxx/modules/std/string.inc4
-rw-r--r--contrib/llvm-project/libcxx/modules/std/string_view.inc2
-rw-r--r--contrib/llvm-project/lld/ELF/Arch/RISCV.cpp199
-rw-r--r--contrib/llvm-project/lld/ELF/InputFiles.cpp7
-rw-r--r--contrib/llvm-project/lld/ELF/InputSection.cpp13
-rw-r--r--contrib/llvm-project/lld/ELF/Relocations.cpp50
-rw-r--r--contrib/llvm-project/lld/ELF/Writer.cpp12
-rw-r--r--contrib/llvm-project/lld/docs/ReleaseNotes.rst62
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td119
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Lint.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/Loads.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp240
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp229
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp330
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp213
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td500
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp210
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.h28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.td93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp12
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp3
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp34
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h4
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp58
345 files changed, 4521 insertions, 1278 deletions
diff --git a/contrib/llvm-project/clang/include/clang/AST/Type.h b/contrib/llvm-project/clang/include/clang/AST/Type.h
index ea425791fc97..6384cf9420b8 100644
--- a/contrib/llvm-project/clang/include/clang/AST/Type.h
+++ b/contrib/llvm-project/clang/include/clang/AST/Type.h
@@ -3495,6 +3495,9 @@ enum class VectorKind {
/// is RISC-V RVV fixed-length data vector
RVVFixedLengthData,
+
+ /// is RISC-V RVV fixed-length mask vector
+ RVVFixedLengthMask,
};
/// Represents a GCC generic vector type. This type is created using
diff --git a/contrib/llvm-project/clang/include/clang/Basic/AttrDocs.td b/contrib/llvm-project/clang/include/clang/Basic/AttrDocs.td
index 7e633f8e2635..e02a1201e2ad 100644
--- a/contrib/llvm-project/clang/include/clang/Basic/AttrDocs.td
+++ b/contrib/llvm-project/clang/include/clang/Basic/AttrDocs.td
@@ -2424,7 +2424,10 @@ only be a power of 2 between 64 and 65536.
For types where LMUL!=1, ``__riscv_v_fixed_vlen`` needs to be scaled by the LMUL
of the type before passing to the attribute.
-``vbool*_t`` types are not supported at this time.
+For ``vbool*_t`` types, ``__riscv_v_fixed_vlen`` needs to be divided by the
+number from the type name. For example, ``vbool8_t`` needs to use
+``__riscv_v_fixed_vlen`` / 8. If the resulting value is not a multiple of 8,
+the type is not supported for that value of ``__riscv_v_fixed_vlen``.
}];
}
diff --git a/contrib/llvm-project/clang/include/clang/Basic/BuiltinsAMDGPU.def b/contrib/llvm-project/clang/include/clang/Basic/BuiltinsAMDGPU.def
index d208342d9c51..74dfd1d214e8 100644
--- a/contrib/llvm-project/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/contrib/llvm-project/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -436,5 +436,67 @@ TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_i32, "ii*1", "nc", "gfx12-insts,w
TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_v4i16, "V4sV4s*1", "nc", "gfx12-insts,wavefrontsize64")
TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_v4f16, "V4hV4h*1", "nc", "gfx12-insts,wavefrontsize64")
+//===----------------------------------------------------------------------===//
+// WMMA builtins.
+// Postfix w32 indicates the builtin requires wavefront size of 32.
+// Postfix w64 indicates the builtin requires wavefront size of 64.
+//
+// Some of these are very similar to their GFX11 counterparts, but they don't
+// require replication of the A,B matrices, so they use fewer vector elements.
+// Therefore, we add an "_gfx12" suffix to distinguish them from the existing
+// builtins.
+//===----------------------------------------------------------------------===//
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12, "V8fV8hV8hV8f", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12, "V8fV8sV8sV8f", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12, "V8hV8hV8hV8h", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12, "V8sV8sV8sV8s", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12, "V8iIbV2iIbV2iV8iIb", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12, "V8iIbiIbiV8iIb", "nc", "gfx12-insts,wavefrontsize32")
+// These are gfx12-only, but for consistency with the other WMMA variants we're
+// keeping the "_gfx12" suffix.
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12, "V8iIbV2iIbV2iV8iIb", "nc", "gfx12-insts,wavefrontsize32")
+
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12, "V4fV4hV4hV4f", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12, "V4fV4sV4sV4f", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12, "V4hV4hV4hV4h", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12, "V4sV4sV4sV4s", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64")
+// These are gfx12-only, but for consistency with the other WMMA variants we're
+// keeping the "_gfx12" suffix.
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64")
+
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32, "V8fV8hV16hV8fs", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32, "V8fV8sV16sV8fs", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32, "V8hV8hV16hV8hs", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32, "V8sV8sV16sV8ss", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32, "V8iIbV2iIbV4iV8isIb", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32, "V8iIbiIbV2iV8isIb", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32, "V8iIbV2iIbV4iV8isIb", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32")
+
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64, "V4fV4hV8hV4fs", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64, "V4fV4sV8sV4fs", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64, "V4hV4hV8hV4hs", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64, "V4sV4sV8sV4ss", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64, "V4iIbiIbV2iV4isIb", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64, "V4iIbiIbiV4isIb", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64, "V4iIbiIbV2iV4isIb", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/contrib/llvm-project/clang/include/clang/Basic/CodeGenOptions.def b/contrib/llvm-project/clang/include/clang/Basic/CodeGenOptions.def
index 2f2e45d5cf63..7c0bfe328496 100644
--- a/contrib/llvm-project/clang/include/clang/Basic/CodeGenOptions.def
+++ b/contrib/llvm-project/clang/include/clang/Basic/CodeGenOptions.def
@@ -369,6 +369,9 @@ ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 3, llvm::driver::VectorLibr
/// The default TLS model to use.
ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel)
+/// Whether to enable TLSDESC. AArch64 enables TLSDESC regardless of this value.
+CODEGENOPT(EnableTLSDESC, 1, 0)
+
/// Bit size of immediate TLS offsets (0 == use the default).
VALUE_CODEGENOPT(TLSSize, 8, 0)
diff --git a/contrib/llvm-project/clang/include/clang/Basic/DiagnosticCommonKinds.td b/contrib/llvm-project/clang/include/clang/Basic/DiagnosticCommonKinds.td
index b1bada65cb6b..08bb1d81ba29 100644
--- a/contrib/llvm-project/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/contrib/llvm-project/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -73,7 +73,7 @@ def warn_pragma_debug_unexpected_argument : Warning<
def warn_fp_nan_inf_when_disabled : Warning<
"use of %select{infinity|NaN}0%select{| via a macro}1 is undefined behavior "
"due to the currently enabled floating-point options">,
- InGroup<DiagGroup<"nan-infinity-disabled">>;
+ InGroup<DiagGroup<"nan-infinity-disabled", [], NanInfDisabledDocs>>;
}
// Parse && Sema
diff --git a/contrib/llvm-project/clang/include/clang/Basic/DiagnosticDocs.td b/contrib/llvm-project/clang/include/clang/Basic/DiagnosticDocs.td
index e9862422b499..8c024b5cad74 100644
--- a/contrib/llvm-project/clang/include/clang/Basic/DiagnosticDocs.td
+++ b/contrib/llvm-project/clang/include/clang/Basic/DiagnosticDocs.td
@@ -87,3 +87,12 @@ program by treating all string literals as having type ``const char *``
instead of ``char *``. This can cause unexpected behaviors with type-sensitive
constructs like ``_Generic``.
}];
+
+defvar NanInfDisabledDocs = [{
+This warning is enabled when source code using the macros ``INFINITY`` or ``NAN``
+is compiled with floating-point options preventing these two values. This can
+lead to undefined behavior. Check the order of command line arguments that modify
+this behavior, such as ``-ffast-math``, ``-fhonor-infinities``, and
+``-fhonor-nans`` (etc), as well as ``#pragma`` directives if this diagnostic is
+generated unexpectedly.
+}];
diff --git a/contrib/llvm-project/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm-project/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c5f1b67c4aa1..07ba4ecf7e12 100644
--- a/contrib/llvm-project/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/contrib/llvm-project/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3711,6 +3711,12 @@ def err_sme_za_call_no_za_state : Error<
"call to a shared ZA function requires the caller to have ZA state">;
def err_sme_zt0_call_no_zt0_state : Error<
"call to a shared ZT0 function requires the caller to have ZT0 state">;
+def err_sme_unimplemented_za_save_restore : Error<
+ "call to a function that shares state other than 'za' from a "
+ "function that has live 'za' state requires a spill/fill of ZA, which is not yet "
+ "implemented">;
+def note_sme_use_preserves_za : Note<
+ "add '__arm_preserves(\"za\")' to the callee if it preserves ZA">;
def err_sme_definition_using_sm_in_non_sme_target : Error<
"function executed in streaming-SVE mode requires 'sme'">;
def err_sme_definition_using_za_in_non_sme_target : Error<
diff --git a/contrib/llvm-project/clang/include/clang/Basic/LangOptions.def b/contrib/llvm-project/clang/include/clang/Basic/LangOptions.def
index 8fc75e1cca03..4942dcaa086e 100644
--- a/contrib/llvm-project/clang/include/clang/Basic/LangOptions.def
+++ b/contrib/llvm-project/clang/include/clang/Basic/LangOptions.def
@@ -174,6 +174,7 @@ LANGOPT(MathErrno , 1, 1, "errno in math functions")
BENIGN_LANGOPT(HeinousExtensions , 1, 0, "extensions that we really don't like and may be ripped out at any time")
LANGOPT(Modules , 1, 0, "modules semantics")
COMPATIBLE_LANGOPT(CPlusPlusModules, 1, 0, "C++ modules syntax")
+LANGOPT(SkipODRCheckInGMF, 1, 0, "Skip ODR checks for decls in the global module fragment")
LANGOPT(BuiltinHeadersInSystemModules, 1, 0, "builtin headers belong to system modules, and _Builtin_ modules are ignored for cstdlib headers")
BENIGN_ENUM_LANGOPT(CompilingModule, CompilingModuleKind, 3, CMK_None,
"compiling a module interface")
diff --git a/contrib/llvm-project/clang/include/clang/Driver/Options.td b/contrib/llvm-project/clang/include/clang/Driver/Options.td
index 7f4fa33748fa..e8d03fc26902 100644
--- a/contrib/llvm-project/clang/include/clang/Driver/Options.td
+++ b/contrib/llvm-project/clang/include/clang/Driver/Options.td
@@ -2985,6 +2985,14 @@ def fmodule_output : Flag<["-"], "fmodule-output">, Flags<[NoXarchOption]>,
Visibility<[ClangOption, CC1Option]>,
HelpText<"Save intermediate module file results when compiling a standard C++ module unit.">;
+defm skip_odr_check_in_gmf : BoolOption<"f", "skip-odr-check-in-gmf",
+ LangOpts<"SkipODRCheckInGMF">, DefaultFalse,
+ PosFlag<SetTrue, [], [CC1Option],
+ "Skip ODR checks for decls in the global module fragment.">,
+ NegFlag<SetFalse, [], [CC1Option],
+ "Perform ODR checks for decls in the global module fragment.">>,
+ Group<f_Group>;
+
def fmodules_prune_interval : Joined<["-"], "fmodules-prune-interval=">, Group<i_Group>,
Visibility<[ClangOption, CC1Option]>, MetaVarName<"<seconds>">,
HelpText<"Specify the interval (in seconds) between attempts to prune the module cache">,
@@ -4419,6 +4427,8 @@ def mtls_size_EQ : Joined<["-"], "mtls-size=">, Group<m_Group>,
HelpText<"Specify bit size of immediate TLS offsets (AArch64 ELF only): "
"12 (for 4KB) | 24 (for 16MB, default) | 32 (for 4GB) | 48 (for 256TB, needs -mcmodel=large)">,
MarshallingInfoInt<CodeGenOpts<"TLSSize">>;
+def mtls_dialect_EQ : Joined<["-"], "mtls-dialect=">, Group<m_Group>,
+ Flags<[TargetSpecific]>, HelpText<"Which thread-local storage dialect to use for dynamic accesses of TLS variables">;
def mimplicit_it_EQ : Joined<["-"], "mimplicit-it=">, Group<m_Group>;
def mdefault_build_attributes : Joined<["-"], "mdefault-build-attributes">, Group<m_Group>;
def mno_default_build_attributes : Joined<["-"], "mno-default-build-attributes">, Group<m_Group>;
@@ -7066,6 +7076,9 @@ def fexperimental_assignment_tracking_EQ : Joined<["-"], "fexperimental-assignme
Values<"disabled,enabled,forced">, NormalizedValues<["Disabled","Enabled","Forced"]>,
MarshallingInfoEnum<CodeGenOpts<"AssignmentTrackingMode">, "Enabled">;
+def enable_tlsdesc : Flag<["-"], "enable-tlsdesc">,
+ MarshallingInfoFlag<CodeGenOpts<"EnableTLSDESC">>;
+
} // let Visibility = [CC1Option]
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/clang/include/clang/Format/Format.h b/contrib/llvm-project/clang/include/clang/Format/Format.h
index bc9eecd42f9e..efcb4e1d87ea 100755
--- a/contrib/llvm-project/clang/include/clang/Format/Format.h
+++ b/contrib/llvm-project/clang/include/clang/Format/Format.h
@@ -4157,14 +4157,9 @@ struct FormatStyle {
/// Different ways to put a space before opening parentheses.
enum SpaceBeforeParensStyle : int8_t {
- /// Never put a space before opening parentheses.
- /// \code
- /// void f() {
- /// if(true) {
- /// f();
- /// }
- /// }
- /// \endcode
+ /// This is **deprecated** and replaced by ``Custom`` below, with all
+ /// ``SpaceBeforeParensOptions`` but ``AfterPlacementOperator`` set to
+ /// ``false``.
SBPO_Never,
/// Put a space before opening parentheses only after control statement
/// keywords (``for/if/while...``).
@@ -4273,28 +4268,14 @@ struct FormatStyle {
/// object.operator++ (10); object.operator++(10);
/// \endcode
bool AfterOverloadedOperator;
- /// Styles for adding spacing between ``new/delete`` operators and opening
- /// parentheses.
- enum AfterPlacementOperatorStyle : int8_t {
- /// Remove space after ``new/delete`` operators and before ``(``.
- /// \code
- /// new(buf) T;
- /// delete(buf) T;
- /// \endcode
- APO_Never,
- /// Always add space after ``new/delete`` operators and before ``(``.
- /// \code
- /// new (buf) T;
- /// delete (buf) T;
- /// \endcode
- APO_Always,
- /// Leave placement ``new/delete`` expressions as they are.
- APO_Leave,
- };
- /// Defines in which cases to put a space between ``new/delete`` operators
- /// and opening parentheses.
- /// \version 18
- AfterPlacementOperatorStyle AfterPlacementOperator;
+ /// If ``true``, put a space between operator ``new``/``delete`` and opening
+ /// parenthesis.
+ /// \code
+ /// true: false:
+ /// new (buf) T; vs. new(buf) T;
+ /// delete (buf) T; delete(buf) T;
+ /// \endcode
+ bool AfterPlacementOperator;
/// If ``true``, put space between requires keyword in a requires clause and
/// opening parentheses, if there is one.
/// \code
@@ -4327,7 +4308,7 @@ struct FormatStyle {
: AfterControlStatements(false), AfterForeachMacros(false),
AfterFunctionDeclarationName(false),
AfterFunctionDefinitionName(false), AfterIfMacros(false),
- AfterOverloadedOperator(false), AfterPlacementOperator(APO_Leave),
+ AfterOverloadedOperator(false), AfterPlacementOperator(true),
AfterRequiresInClause(false), AfterRequiresInExpression(false),
BeforeNonEmptyParentheses(false) {}
diff --git a/contrib/llvm-project/clang/include/clang/Lex/Preprocessor.h b/contrib/llvm-project/clang/include/clang/Lex/Preprocessor.h
index 2d9c53cdf5bd..b0a8ec0fec5e 100644
--- a/contrib/llvm-project/clang/include/clang/Lex/Preprocessor.h
+++ b/contrib/llvm-project/clang/include/clang/Lex/Preprocessor.h
@@ -2828,7 +2828,8 @@ public:
return AnnotationInfos.find(II)->second;
}
- void emitMacroExpansionWarnings(const Token &Identifier) const {
+ void emitMacroExpansionWarnings(const Token &Identifier,
+ bool IsIfnDef = false) const {
IdentifierInfo *Info = Identifier.getIdentifierInfo();
if (Info->isDeprecatedMacro())
emitMacroDeprecationWarning(Identifier);
@@ -2837,12 +2838,12 @@ public:
!SourceMgr.isInMainFile(Identifier.getLocation()))
emitRestrictExpansionWarning(Identifier);
- if (Info->getName() == "INFINITY")
- if (getLangOpts().NoHonorInfs)
+ if (!IsIfnDef) {
+ if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
emitRestrictInfNaNWarning(Identifier, 0);
- if (Info->getName() == "NAN")
- if (getLangOpts().NoHonorNaNs)
+ if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
emitRestrictInfNaNWarning(Identifier, 1);
+ }
}
static void processPathForFileMacro(SmallVectorImpl<char> &Path,
diff --git a/contrib/llvm-project/clang/include/clang/Sema/Lookup.h b/contrib/llvm-project/clang/include/clang/Sema/Lookup.h
index 9c93bf1e6fb4..2f2f2607a937 100644
--- a/contrib/llvm-project/clang/include/clang/Sema/Lookup.h
+++ b/contrib/llvm-project/clang/include/clang/Sema/Lookup.h
@@ -754,7 +754,8 @@ public:
private:
void diagnoseAccess() {
- if (isClassLookup() && getSema().getLangOpts().AccessControl)
+ if (!isAmbiguous() && isClassLookup() &&
+ getSema().getLangOpts().AccessControl)
getSema().CheckLookupAccess(*this);
}
diff --git a/contrib/llvm-project/clang/include/clang/Serialization/ASTReader.h b/contrib/llvm-project/clang/include/clang/Serialization/ASTReader.h
index dd1451bbf2d2..cd28226c295b 100644
--- a/contrib/llvm-project/clang/include/clang/Serialization/ASTReader.h
+++ b/contrib/llvm-project/clang/include/clang/Serialization/ASTReader.h
@@ -2452,6 +2452,12 @@ private:
uint32_t CurrentBitsIndex = ~0;
};
+inline bool shouldSkipCheckingODR(const Decl *D) {
+ return D->getOwningModule() &&
+ D->getASTContext().getLangOpts().SkipODRCheckInGMF &&
+ D->getOwningModule()->isExplicitGlobalModule();
+}
+
} // namespace clang
#endif // LLVM_CLANG_SERIALIZATION_ASTREADER_H
diff --git a/contrib/llvm-project/clang/lib/AST/ASTContext.cpp b/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
index 9a0ede201059..cc5de9a6295e 100644
--- a/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
+++ b/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
@@ -1949,7 +1949,8 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
else if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
// Adjust the alignment for fixed-length SVE predicates.
Align = 16;
- else if (VT->getVectorKind() == VectorKind::RVVFixedLengthData)
+ else if (VT->getVectorKind() == VectorKind::RVVFixedLengthData ||
+ VT->getVectorKind() == VectorKind::RVVFixedLengthMask)
// Adjust the alignment for fixed-length RVV vectors.
Align = std::min<unsigned>(64, Width);
break;
@@ -9420,7 +9421,9 @@ bool ASTContext::areCompatibleVectorTypes(QualType FirstVec,
Second->getVectorKind() != VectorKind::SveFixedLengthData &&
Second->getVectorKind() != VectorKind::SveFixedLengthPredicate &&
First->getVectorKind() != VectorKind::RVVFixedLengthData &&
- Second->getVectorKind() != VectorKind::RVVFixedLengthData)
+ Second->getVectorKind() != VectorKind::RVVFixedLengthData &&
+ First->getVectorKind() != VectorKind::RVVFixedLengthMask &&
+ Second->getVectorKind() != VectorKind::RVVFixedLengthMask)
return true;
return false;
@@ -9526,8 +9529,11 @@ static uint64_t getRVVTypeSize(ASTContext &Context, const BuiltinType *Ty) {
ASTContext::BuiltinVectorTypeInfo Info = Context.getBuiltinVectorTypeInfo(Ty);
- uint64_t EltSize = Context.getTypeSize(Info.ElementType);
- uint64_t MinElts = Info.EC.getKnownMinValue();
+ unsigned EltSize = Context.getTypeSize(Info.ElementType);
+ if (Info.ElementType == Context.BoolTy)
+ EltSize = 1;
+
+ unsigned MinElts = Info.EC.getKnownMinValue();
return VScale->first * MinElts * EltSize;
}
@@ -9541,6 +9547,12 @@ bool ASTContext::areCompatibleRVVTypes(QualType FirstType,
auto IsValidCast = [this](QualType FirstType, QualType SecondType) {
if (const auto *BT = FirstType->getAs<BuiltinType>()) {
if (const auto *VT = SecondType->getAs<VectorType>()) {
+ if (VT->getVectorKind() == VectorKind::RVVFixedLengthMask) {
+ BuiltinVectorTypeInfo Info = getBuiltinVectorTypeInfo(BT);
+ return FirstType->isRVVVLSBuiltinType() &&
+ Info.ElementType == BoolTy &&
+ getTypeSize(SecondType) == getRVVTypeSize(*this, BT);
+ }
if (VT->getVectorKind() == VectorKind::RVVFixedLengthData ||
VT->getVectorKind() == VectorKind::Generic)
return FirstType->isRVVVLSBuiltinType() &&
diff --git a/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp b/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp
index f1d07d022b25..edf9b5e2d52b 100644
--- a/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp
+++ b/contrib/llvm-project/clang/lib/AST/ExprConstant.cpp
@@ -7951,7 +7951,8 @@ public:
// Overloaded operator calls to member functions are represented as normal
// calls with '*this' as the first argument.
const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
- if (MD && MD->isImplicitObjectMemberFunction()) {
+ if (MD &&
+ (MD->isImplicitObjectMemberFunction() || (OCE && MD->isStatic()))) {
// FIXME: When selecting an implicit conversion for an overloaded
// operator delete, we sometimes try to evaluate calls to conversion
// operators without a 'this' parameter!
@@ -7960,7 +7961,11 @@ public:
if (!EvaluateObjectArgument(Info, Args[0], ThisVal))
return false;
- This = &ThisVal;
+
+ // If we are calling a static operator, the 'this' argument needs to be
+ // ignored after being evaluated.
+ if (MD->isInstance())
+ This = &ThisVal;
// If this is syntactically a simple assignment using a trivial
// assignment operator, start the lifetimes of union members as needed,
diff --git a/contrib/llvm-project/clang/lib/AST/ItaniumMangle.cpp b/contrib/llvm-project/clang/lib/AST/ItaniumMangle.cpp
index 40b1e086ddd0..688141b30441 100644
--- a/contrib/llvm-project/clang/lib/AST/ItaniumMangle.cpp
+++ b/contrib/llvm-project/clang/lib/AST/ItaniumMangle.cpp
@@ -3994,7 +3994,8 @@ void CXXNameMangler::mangleAArch64FixedSveVectorType(
}
void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) {
- assert(T->getVectorKind() == VectorKind::RVVFixedLengthData &&
+ assert((T->getVectorKind() == VectorKind::RVVFixedLengthData ||
+ T->getVectorKind() == VectorKind::RVVFixedLengthMask) &&
"expected fixed-length RVV vector!");
QualType EltType = T->getElementType();
@@ -4009,7 +4010,10 @@ void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) {
TypeNameOS << "int8";
break;
case BuiltinType::UChar:
- TypeNameOS << "uint8";
+ if (T->getVectorKind() == VectorKind::RVVFixedLengthData)
+ TypeNameOS << "uint8";
+ else
+ TypeNameOS << "bool";
break;
case BuiltinType::Short:
TypeNameOS << "int16";
@@ -4048,12 +4052,16 @@ void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) {
auto VScale = getASTContext().getTargetInfo().getVScaleRange(
getASTContext().getLangOpts());
unsigned VLen = VScale->first * llvm::RISCV::RVVBitsPerBlock;
- TypeNameOS << 'm';
- if (VecSizeInBits >= VLen)
- TypeNameOS << (VecSizeInBits / VLen);
- else
- TypeNameOS << 'f' << (VLen / VecSizeInBits);
+ if (T->getVectorKind() == VectorKind::RVVFixedLengthData) {
+ TypeNameOS << 'm';
+ if (VecSizeInBits >= VLen)
+ TypeNameOS << (VecSizeInBits / VLen);
+ else
+ TypeNameOS << 'f' << (VLen / VecSizeInBits);
+ } else {
+ TypeNameOS << (VLen / VecSizeInBits);
+ }
TypeNameOS << "_t";
Out << "9__RVV_VLSI" << 'u' << TypeNameStr.size() << TypeNameStr << "Lj"
@@ -4093,7 +4101,8 @@ void CXXNameMangler::mangleType(const VectorType *T) {
T->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
mangleAArch64FixedSveVectorType(T);
return;
- } else if (T->getVectorKind() == VectorKind::RVVFixedLengthData) {
+ } else if (T->getVectorKind() == VectorKind::RVVFixedLengthData ||
+ T->getVectorKind() == VectorKind::RVVFixedLengthMask) {
mangleRISCVFixedRVVVectorType(T);
return;
}
diff --git a/contrib/llvm-project/clang/lib/AST/JSONNodeDumper.cpp b/contrib/llvm-project/clang/lib/AST/JSONNodeDumper.cpp
index 3daba13d0fce..3c11b75d7472 100644
--- a/contrib/llvm-project/clang/lib/AST/JSONNodeDumper.cpp
+++ b/contrib/llvm-project/clang/lib/AST/JSONNodeDumper.cpp
@@ -703,6 +703,9 @@ void JSONNodeDumper::VisitVectorType(const VectorType *VT) {
case VectorKind::RVVFixedLengthData:
JOS.attribute("vectorKind", "fixed-length rvv data vector");
break;
+ case VectorKind::RVVFixedLengthMask:
+ JOS.attribute("vectorKind", "fixed-length rvv mask vector");
+ break;
}
}
diff --git a/contrib/llvm-project/clang/lib/AST/ODRHash.cpp b/contrib/llvm-project/clang/lib/AST/ODRHash.cpp
index 5b98646a1e8d..2dbc259138a8 100644
--- a/contrib/llvm-project/clang/lib/AST/ODRHash.cpp
+++ b/contrib/llvm-project/clang/lib/AST/ODRHash.cpp
@@ -745,55 +745,8 @@ void ODRHash::AddEnumDecl(const EnumDecl *Enum) {
if (Enum->isScoped())
AddBoolean(Enum->isScopedUsingClassTag());
- if (Enum->getIntegerTypeSourceInfo()) {
- // FIMXE: This allows two enums with different spellings to have the same
- // hash.
- //
- // // mod1.cppm
- // module;
- // extern "C" {
- // typedef unsigned __int64 size_t;
- // }
- // namespace std {
- // using :: size_t;
- // }
- //
- // extern "C++" {
- // namespace std {
- // enum class align_val_t : std::size_t {};
- // }
- // }
- //
- // export module mod1;
- // export using std::align_val_t;
- //
- // // mod2.cppm
- // module;
- // extern "C" {
- // typedef unsigned __int64 size_t;
- // }
- //
- // extern "C++" {
- // namespace std {
- // enum class align_val_t : size_t {};
- // }
- // }
- //
- // export module mod2;
- // import mod1;
- // export using std::align_val_t;
- //
- // The above example should be disallowed since it violates
- // [basic.def.odr]p14:
- //
- // Each such definition shall consist of the same sequence of tokens
- //
- // The definitions of `std::align_val_t` in two module units have different
- // spellings but we failed to give an error here.
- //
- // See https://github.com/llvm/llvm-project/issues/76638 for details.
+ if (Enum->getIntegerTypeSourceInfo())
AddQualType(Enum->getIntegerType().getCanonicalType());
- }
// Filter out sub-Decls which will not be processed in order to get an
// accurate count of Decl's.
diff --git a/contrib/llvm-project/clang/lib/AST/TemplateBase.cpp b/contrib/llvm-project/clang/lib/AST/TemplateBase.cpp
index 2bdbeb08ef20..3310d7dc24c5 100644
--- a/contrib/llvm-project/clang/lib/AST/TemplateBase.cpp
+++ b/contrib/llvm-project/clang/lib/AST/TemplateBase.cpp
@@ -450,7 +450,8 @@ bool TemplateArgument::structurallyEquals(const TemplateArgument &Other) const {
getAsIntegral() == Other.getAsIntegral();
case StructuralValue: {
- if (getStructuralValueType() != Other.getStructuralValueType())
+ if (getStructuralValueType().getCanonicalType() !=
+ Other.getStructuralValueType().getCanonicalType())
return false;
llvm::FoldingSetNodeID A, B;
diff --git a/contrib/llvm-project/clang/lib/AST/TextNodeDumper.cpp b/contrib/llvm-project/clang/lib/AST/TextNodeDumper.cpp
index 48c6729a6738..ecf5de0be543 100644
--- a/contrib/llvm-project/clang/lib/AST/TextNodeDumper.cpp
+++ b/contrib/llvm-project/clang/lib/AST/TextNodeDumper.cpp
@@ -1623,6 +1623,9 @@ void TextNodeDumper::VisitVectorType(const VectorType *T) {
case VectorKind::RVVFixedLengthData:
OS << " fixed-length rvv data vector";
break;
+ case VectorKind::RVVFixedLengthMask:
+ OS << " fixed-length rvv mask vector";
+ break;
}
OS << " " << T->getNumElements();
}
diff --git a/contrib/llvm-project/clang/lib/AST/Type.cpp b/contrib/llvm-project/clang/lib/AST/Type.cpp
index 3db5ae182f32..d4103025591e 100644
--- a/contrib/llvm-project/clang/lib/AST/Type.cpp
+++ b/contrib/llvm-project/clang/lib/AST/Type.cpp
@@ -2479,6 +2479,9 @@ bool Type::isRVVVLSBuiltinType() const {
IsFP, IsBF) \
case BuiltinType::Id: \
return NF == 1;
+#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \
+ case BuiltinType::Id: \
+ return true;
#include "clang/Basic/RISCVVTypes.def"
default:
return false;
@@ -2491,7 +2494,17 @@ QualType Type::getRVVEltType(const ASTContext &Ctx) const {
assert(isRVVVLSBuiltinType() && "unsupported type!");
const BuiltinType *BTy = castAs<BuiltinType>();
- return Ctx.getBuiltinVectorTypeInfo(BTy).ElementType;
+
+ switch (BTy->getKind()) {
+#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \
+ case BuiltinType::Id: \
+ return Ctx.UnsignedCharTy;
+ default:
+ return Ctx.getBuiltinVectorTypeInfo(BTy).ElementType;
+#include "clang/Basic/RISCVVTypes.def"
+ }
+
+ llvm_unreachable("Unhandled type");
}
bool QualType::isPODType(const ASTContext &Context) const {
diff --git a/contrib/llvm-project/clang/lib/AST/TypePrinter.cpp b/contrib/llvm-project/clang/lib/AST/TypePrinter.cpp
index 80b42c8f84a0..e9b6e810b02e 100644
--- a/contrib/llvm-project/clang/lib/AST/TypePrinter.cpp
+++ b/contrib/llvm-project/clang/lib/AST/TypePrinter.cpp
@@ -694,6 +694,7 @@ void TypePrinter::printVectorBefore(const VectorType *T, raw_ostream &OS) {
printBefore(T->getElementType(), OS);
break;
case VectorKind::RVVFixedLengthData:
+ case VectorKind::RVVFixedLengthMask:
// FIXME: We prefer to print the size directly here, but have no way
// to get the size of the type.
OS << "__attribute__((__riscv_rvv_vector_bits__(";
@@ -773,6 +774,7 @@ void TypePrinter::printDependentVectorBefore(
printBefore(T->getElementType(), OS);
break;
case VectorKind::RVVFixedLengthData:
+ case VectorKind::RVVFixedLengthMask:
// FIXME: We prefer to print the size directly here, but have no way
// to get the size of the type.
OS << "__attribute__((__riscv_rvv_vector_bits__(";
diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp b/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp
index d47181bfca4f..336b7a5e3d72 100644
--- a/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp
+++ b/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp
@@ -387,6 +387,11 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4");
+ // These macros are set when Clang can parse declarations with these
+ // attributes.
+ Builder.defineMacro("__ARM_STATE_ZA", "1");
+ Builder.defineMacro("__ARM_STATE_ZT0", "1");
+
// 0xe implies support for half, single and double precision operations.
if (FPU & FPUMode)
Builder.defineMacro("__ARM_FP", "0xE");
@@ -431,6 +436,17 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasSVE2 && HasSVE2SM4)
Builder.defineMacro("__ARM_FEATURE_SVE2_SM4", "1");
+ if (HasSME) {
+ Builder.defineMacro("__ARM_FEATURE_SME");
+ Builder.defineMacro("__ARM_FEATURE_LOCALLY_STREAMING", "1");
+ }
+
+ if (HasSME2) {
+ Builder.defineMacro("__ARM_FEATURE_SME");
+ Builder.defineMacro("__ARM_FEATURE_SME2");
+ Builder.defineMacro("__ARM_FEATURE_LOCALLY_STREAMING", "1");
+ }
+
if (HasCRC)
Builder.defineMacro("__ARM_FEATURE_CRC32", "1");
@@ -686,6 +702,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
.Case("sve2-sha3", FPU & SveMode && HasSVE2SHA3)
.Case("sve2-sm4", FPU & SveMode && HasSVE2SM4)
.Case("sme", HasSME)
+ .Case("sme2", HasSME2)
.Case("sme-f64f64", HasSMEF64F64)
.Case("sme-i16i64", HasSMEI16I64)
.Case("sme-fa64", HasSMEFA64)
@@ -806,6 +823,12 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasBFloat16 = true;
HasFullFP16 = true;
}
+ if (Feature == "+sme2") {
+ HasSME = true;
+ HasSME2 = true;
+ HasBFloat16 = true;
+ HasFullFP16 = true;
+ }
if (Feature == "+sme-f64f64") {
HasSME = true;
HasSMEF64F64 = true;
diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.h b/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.h
index f0e0782e7abe..9699222b0bf7 100644
--- a/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.h
+++ b/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.h
@@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
bool HasCCDP = false;
bool HasFRInt3264 = false;
bool HasSME = false;
+ bool HasSME2 = false;
bool HasSMEF64F64 = false;
bool HasSMEI16I64 = false;
bool HasSB = false;
diff --git a/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp
index ec203f6f28bc..7877e20d77f7 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/BackendUtil.cpp
@@ -401,6 +401,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags,
Options.UniqueBasicBlockSectionNames =
CodeGenOpts.UniqueBasicBlockSectionNames;
Options.TLSSize = CodeGenOpts.TLSSize;
+ Options.EnableTLSDESC = CodeGenOpts.EnableTLSDESC;
Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection;
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp
index 7ef764b8e1ac..a4f26a6f0eb1 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18279,65 +18279,216 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
- case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: {
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
// These operations perform a matrix multiplication and accumulation of
// the form:
// D = A * B + C
- // The return type always matches the type of matrix C.
- unsigned ArgForMatchingRetType;
+ // We need to specify one type for matrices AB and one for matrices CD.
+ // Sparse matrix operations can have different types for A and B as well as
+ // an additional type for sparsity index.
+ // Destination type should be put before types used for source operands.
+ SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
+ // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
+ // There is no need for the variable opsel argument, so always set it to
+ // "false".
+ bool AppendFalseForOpselArg = false;
unsigned BuiltinWMMAOp;
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
- ArgForMatchingRetType = 2;
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
- ArgForMatchingRetType = 2;
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
+ AppendFalseForOpselArg = true;
+ LLVM_FALLTHROUGH;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
- ArgForMatchingRetType = 2;
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
+ AppendFalseForOpselArg = true;
+ LLVM_FALLTHROUGH;
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
- ArgForMatchingRetType = 2;
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
- ArgForMatchingRetType = 2;
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
- ArgForMatchingRetType = 2;
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
- ArgForMatchingRetType = 4;
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
+ ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
- ArgForMatchingRetType = 4;
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
+ ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
+ ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
+ ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
+ ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
+ ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
+ break;
}
SmallVector<Value *, 6> Args;
for (int i = 0, e = E->getNumArgs(); i != e; ++i)
Args.push_back(EmitScalarExpr(E->getArg(i)));
+ if (AppendFalseForOpselArg)
+ Args.push_back(Builder.getFalse());
- Function *F = CGM.getIntrinsic(BuiltinWMMAOp,
- {Args[ArgForMatchingRetType]->getType()});
+ SmallVector<llvm::Type *, 6> ArgTypes;
+ for (auto ArgIdx : ArgsForMatchingMatrixTypes)
+ ArgTypes.push_back(Args[ArgIdx]->getType());
+ Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
return Builder.CreateCall(F, Args);
}
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp
index c5f6b6d3a99f..f8f997909977 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/CGExpr.cpp
@@ -5846,6 +5846,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
// destruction order is not necessarily reverse construction order.
// FIXME: Revisit this based on C++ committee response to unimplementability.
EvaluationOrder Order = EvaluationOrder::Default;
+ bool StaticOperator = false;
if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(E)) {
if (OCE->isAssignmentOp())
Order = EvaluationOrder::ForceRightToLeft;
@@ -5863,10 +5864,22 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
break;
}
}
+
+ if (const auto *MD =
+ dyn_cast_if_present<CXXMethodDecl>(OCE->getCalleeDecl());
+ MD && MD->isStatic())
+ StaticOperator = true;
}
- EmitCallArgs(Args, dyn_cast<FunctionProtoType>(FnType), E->arguments(),
- E->getDirectCallee(), /*ParamsToSkip*/ 0, Order);
+ auto Arguments = E->arguments();
+ if (StaticOperator) {
+ // If we're calling a static operator, we need to emit the object argument
+ // and ignore it.
+ EmitIgnoredExpr(E->getArg(0));
+ Arguments = drop_begin(Arguments, 1);
+ }
+ EmitCallArgs(Args, dyn_cast<FunctionProtoType>(FnType), Arguments,
+ E->getDirectCallee(), /*ParamsToSkip=*/0, Order);
const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall(
Args, FnType, /*ChainCall=*/Chain);
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp
index 5eca00f22bb8..0c43317642bc 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -1812,8 +1812,10 @@ struct CounterCoverageMappingBuilder
assert(S->isConstexpr());
// evaluate constant condition...
- const auto *E = cast<ConstantExpr>(S->getCond());
- const bool isTrue = E->getResultAsAPSInt().getExtValue();
+ const bool isTrue =
+ S->getCond()
+ ->EvaluateKnownConstInt(CVM.getCodeGenModule().getContext())
+ .getBoolValue();
extendRegion(S);
diff --git a/contrib/llvm-project/clang/lib/CodeGen/Targets/RISCV.cpp b/contrib/llvm-project/clang/lib/CodeGen/Targets/RISCV.cpp
index 0851d1993d0c..02c86ad2e58c 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/Targets/RISCV.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/Targets/RISCV.cpp
@@ -321,20 +321,28 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const {
assert(Ty->isVectorType() && "expected vector type!");
const auto *VT = Ty->castAs<VectorType>();
- assert(VT->getVectorKind() == VectorKind::RVVFixedLengthData &&
- "Unexpected vector kind");
-
assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
auto VScale =
getContext().getTargetInfo().getVScaleRange(getContext().getLangOpts());
+
+ unsigned NumElts = VT->getNumElements();
+ llvm::Type *EltType;
+ if (VT->getVectorKind() == VectorKind::RVVFixedLengthMask) {
+ NumElts *= 8;
+ EltType = llvm::Type::getInt1Ty(getVMContext());
+ } else {
+ assert(VT->getVectorKind() == VectorKind::RVVFixedLengthData &&
+ "Unexpected vector kind");
+ EltType = CGT.ConvertType(VT->getElementType());
+ }
+
// The MinNumElts is simplified from equation:
// NumElts / VScale =
// (EltSize * NumElts / (VScale * RVVBitsPerBlock))
// * (RVVBitsPerBlock / EltSize)
llvm::ScalableVectorType *ResType =
- llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()),
- VT->getNumElements() / VScale->first);
+ llvm::ScalableVectorType::get(EltType, NumElts / VScale->first);
return ABIArgInfo::getDirect(ResType);
}
@@ -437,7 +445,8 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
}
if (const VectorType *VT = Ty->getAs<VectorType>())
- if (VT->getVectorKind() == VectorKind::RVVFixedLengthData)
+ if (VT->getVectorKind() == VectorKind::RVVFixedLengthData ||
+ VT->getVectorKind() == VectorKind::RVVFixedLengthMask)
return coerceVLSVector(Ty);
// Aggregates which are <= 2*XLen will be passed in registers if possible,
diff --git a/contrib/llvm-project/clang/lib/Driver/Driver.cpp b/contrib/llvm-project/clang/lib/Driver/Driver.cpp
index 7109faa1072d..93cddf742d52 100644
--- a/contrib/llvm-project/clang/lib/Driver/Driver.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/Driver.cpp
@@ -4764,9 +4764,9 @@ Action *Driver::ConstructPhaseAction(
case phases::Backend: {
if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {
types::ID Output;
- if (Args.hasArg(options::OPT_ffat_lto_objects))
- Output = Args.hasArg(options::OPT_emit_llvm) ? types::TY_LTO_IR
- : types::TY_PP_Asm;
+ if (Args.hasArg(options::OPT_ffat_lto_objects) &&
+ !Args.hasArg(options::OPT_emit_llvm))
+ Output = types::TY_PP_Asm;
else if (Args.hasArg(options::OPT_S))
Output = types::TY_LTO_IR;
else
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
index 5dc614e11aab..aa344b3465ab 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3942,6 +3942,10 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D,
Args.ClaimAllArgs(options::OPT_fmodules_disable_diagnostic_validation);
}
+ // FIXME: We provisionally don't check ODR violations for decls in the global
+ // module fragment.
+ CmdArgs.push_back("-fskip-odr-check-in-gmf");
+
// Claim `-fmodule-output` and `-fmodule-output=` to avoid unused warnings.
Args.ClaimAllArgs(options::OPT_fmodule_output);
Args.ClaimAllArgs(options::OPT_fmodule_output_EQ);
@@ -5779,6 +5783,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// NVPTX/AMDGPU does not care about the code model and will accept
// whatever works for the host.
Ok = true;
+ } else if (Triple.isSPARC64()) {
+ if (CM == "medlow")
+ CM = "small";
+ else if (CM == "medmid")
+ CM = "medium";
+ else if (CM == "medany")
+ CM = "large";
+ Ok = CM == "small" || CM == "medium" || CM == "large";
}
if (Ok) {
CmdArgs.push_back(Args.MakeArgString("-mcmodel=" + CM));
@@ -5822,6 +5834,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_mtls_size_EQ);
}
+ if (isTLSDESCEnabled(TC, Args))
+ CmdArgs.push_back("-enable-tlsdesc");
+
// Add the target cpu
std::string CPU = getCPUName(D, Args, Triple, /*FromAs*/ false);
if (!CPU.empty()) {
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
index fadaf3e60c66..2b916f000336 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -729,6 +729,33 @@ bool tools::isUseSeparateSections(const llvm::Triple &Triple) {
return Triple.isPS();
}
+bool tools::isTLSDESCEnabled(const ToolChain &TC,
+ const llvm::opt::ArgList &Args) {
+ const llvm::Triple &Triple = TC.getEffectiveTriple();
+ Arg *A = Args.getLastArg(options::OPT_mtls_dialect_EQ);
+ if (!A)
+ return Triple.hasDefaultTLSDESC();
+ StringRef V = A->getValue();
+ bool SupportedArgument = false, EnableTLSDESC = false;
+ bool Unsupported = !Triple.isOSBinFormatELF();
+ if (Triple.isRISCV()) {
+ SupportedArgument = V == "desc" || V == "trad";
+ EnableTLSDESC = V == "desc";
+ } else if (Triple.isX86()) {
+ SupportedArgument = V == "gnu";
+ } else {
+ Unsupported = true;
+ }
+ if (Unsupported) {
+ TC.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
+ << A->getSpelling() << Triple.getTriple();
+ } else if (!SupportedArgument) {
+ TC.getDriver().Diag(diag::err_drv_unsupported_option_argument_for_target)
+ << A->getSpelling() << V << Triple.getTriple();
+ }
+ return EnableTLSDESC;
+}
+
void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
ArgStringList &CmdArgs, const InputInfo &Output,
const InputInfo &Input, bool IsThinLTO) {
@@ -783,6 +810,28 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
"-generate-arange-section"));
}
+ // Pass vector library arguments to LTO.
+ Arg *ArgVecLib = Args.getLastArg(options::OPT_fveclib);
+ if (ArgVecLib && ArgVecLib->getNumValues() == 1) {
+ // Map the vector library names from clang front-end to opt front-end. The
+ // values are taken from the TargetLibraryInfo class command line options.
+ std::optional<StringRef> OptVal =
+ llvm::StringSwitch<std::optional<StringRef>>(ArgVecLib->getValue())
+ .Case("Accelerate", "Accelerate")
+ .Case("LIBMVEC", "LIBMVEC-X86")
+ .Case("MASSV", "MASSV")
+ .Case("SVML", "SVML")
+ .Case("SLEEF", "sleefgnuabi")
+ .Case("Darwin_libsystem_m", "Darwin_libsystem_m")
+ .Case("ArmPL", "ArmPL")
+ .Case("none", "none")
+ .Default(std::nullopt);
+
+ if (OptVal)
+ CmdArgs.push_back(Args.MakeArgString(
+ Twine(PluginOptPrefix) + "-vector-library=" + OptVal.value()));
+ }
+
// Try to pass driver level flags relevant to LTO code generation down to
// the plugin.
@@ -988,6 +1037,9 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
CmdArgs.push_back(
Args.MakeArgString(Twine(PluginOptPrefix) + "-emulated-tls"));
}
+ if (isTLSDESCEnabled(ToolChain, Args))
+ CmdArgs.push_back(
+ Args.MakeArgString(Twine(PluginOptPrefix) + "-enable-tlsdesc"));
if (Args.hasFlag(options::OPT_fstack_size_section,
options::OPT_fno_stack_size_section, false))
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.h
index 25d68345a9f9..807867f13a5c 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -144,6 +144,9 @@ llvm::StringRef getLTOParallelism(const llvm::opt::ArgList &Args,
bool areOptimizationsEnabled(const llvm::opt::ArgList &Args);
bool isUseSeparateSections(const llvm::Triple &Triple);
+// Parse -mtls-dialect=. Return true if the target supports both general-dynamic
+// and TLSDESC, and TLSDESC is requested.
+bool isTLSDESCEnabled(const ToolChain &TC, const llvm::opt::ArgList &Args);
/// \p EnvVar is split by system delimiter for environment variables.
/// If \p ArgName is "-I", "-L", or an empty string, each entry from \p EnvVar
diff --git a/contrib/llvm-project/clang/lib/Format/Format.cpp b/contrib/llvm-project/clang/lib/Format/Format.cpp
index ff326dc78478..10fe35c79a4f 100644
--- a/contrib/llvm-project/clang/lib/Format/Format.cpp
+++ b/contrib/llvm-project/clang/lib/Format/Format.cpp
@@ -504,22 +504,6 @@ struct ScalarEnumerationTraits<FormatStyle::QualifierAlignmentStyle> {
}
};
-template <>
-struct MappingTraits<
- FormatStyle::SpaceBeforeParensCustom::AfterPlacementOperatorStyle> {
- static void
- mapping(IO &IO,
- FormatStyle::SpaceBeforeParensCustom::AfterPlacementOperatorStyle
- &Value) {
- IO.enumCase(Value, "Always",
- FormatStyle::SpaceBeforeParensCustom::APO_Always);
- IO.enumCase(Value, "Never",
- FormatStyle::SpaceBeforeParensCustom::APO_Never);
- IO.enumCase(Value, "Leave",
- FormatStyle::SpaceBeforeParensCustom::APO_Leave);
- }
-};
-
template <> struct MappingTraits<FormatStyle::RawStringFormat> {
static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) {
IO.mapOptional("Language", Format.Language);
@@ -1388,12 +1372,9 @@ static void expandPresetsSpaceBeforeParens(FormatStyle &Expanded) {
return;
// Reset all flags
Expanded.SpaceBeforeParensOptions = {};
+ Expanded.SpaceBeforeParensOptions.AfterPlacementOperator = true;
switch (Expanded.SpaceBeforeParens) {
- case FormatStyle::SBPO_Never:
- Expanded.SpaceBeforeParensOptions.AfterPlacementOperator =
- FormatStyle::SpaceBeforeParensCustom::APO_Never;
- break;
case FormatStyle::SBPO_ControlStatements:
Expanded.SpaceBeforeParensOptions.AfterControlStatements = true;
Expanded.SpaceBeforeParensOptions.AfterForeachMacros = true;
@@ -1405,8 +1386,6 @@ static void expandPresetsSpaceBeforeParens(FormatStyle &Expanded) {
case FormatStyle::SBPO_NonEmptyParentheses:
Expanded.SpaceBeforeParensOptions.BeforeNonEmptyParentheses = true;
break;
- case FormatStyle::SBPO_Always:
- break;
default:
break;
}
diff --git a/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp
index 25fcceb87864..d0c4273cfc7e 100644
--- a/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp
+++ b/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp
@@ -2488,6 +2488,8 @@ private:
(Tok.Next->Next->is(tok::numeric_constant) || Line.InPPDirective)) {
return false;
}
+ if (Line.InPPDirective && Tok.Next->is(tok::minus))
+ return false;
// Search for unexpected tokens.
for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
Prev = Prev->Previous) {
@@ -4272,14 +4274,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
Left.isOneOf(tok::kw_new, tok::kw_delete) &&
Right.isNot(TT_OverloadedOperatorLParen) &&
!(Line.MightBeFunctionDecl && Left.is(TT_FunctionDeclarationName))) {
- if (Style.SpaceBeforeParensOptions.AfterPlacementOperator ==
- FormatStyle::SpaceBeforeParensCustom::APO_Always ||
- (Style.SpaceBeforeParensOptions.AfterPlacementOperator ==
- FormatStyle::SpaceBeforeParensCustom::APO_Leave &&
- Right.hasWhitespaceBefore())) {
- return true;
- }
- return false;
+ return Style.SpaceBeforeParensOptions.AfterPlacementOperator;
}
if (Line.Type == LT_ObjCDecl)
return true;
diff --git a/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp b/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp
index 9f82a6d073e3..a980f4bcbae1 100644
--- a/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp
+++ b/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp
@@ -3288,7 +3288,7 @@ void Preprocessor::HandleIfdefDirective(Token &Result,
return;
}
- emitMacroExpansionWarnings(MacroNameTok);
+ emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true);
// Check to see if this is the last token on the #if[n]def line.
CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
diff --git a/contrib/llvm-project/clang/lib/Lex/PPExpressions.cpp b/contrib/llvm-project/clang/lib/Lex/PPExpressions.cpp
index 1feb0eb18d71..8f25c67ec9df 100644
--- a/contrib/llvm-project/clang/lib/Lex/PPExpressions.cpp
+++ b/contrib/llvm-project/clang/lib/Lex/PPExpressions.cpp
@@ -133,7 +133,9 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
Result.Val.setIsUnsigned(false); // Result is signed intmax_t.
DT.IncludedUndefinedIds = !Macro;
- PP.emitMacroExpansionWarnings(PeekTok);
+ PP.emitMacroExpansionWarnings(
+ PeekTok,
+ (II->getName() == "INFINITY" || II->getName() == "NAN") ? true : false);
// If there is a macro, mark it used.
if (Result.Val != 0 && ValueLive)
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaChecking.cpp b/contrib/llvm-project/clang/lib/Sema/SemaChecking.cpp
index 7833d5a2ea20..09b7e1c62fbd 100644
--- a/contrib/llvm-project/clang/lib/Sema/SemaChecking.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaChecking.cpp
@@ -7545,47 +7545,43 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto,
}
}
- // If the callee uses AArch64 SME ZA state but the caller doesn't define
- // any, then this is an error.
- FunctionType::ArmStateValue ArmZAState =
+ FunctionType::ArmStateValue CalleeArmZAState =
FunctionType::getArmZAState(ExtInfo.AArch64SMEAttributes);
- if (ArmZAState != FunctionType::ARM_None) {
+ FunctionType::ArmStateValue CalleeArmZT0State =
+ FunctionType::getArmZT0State(ExtInfo.AArch64SMEAttributes);
+ if (CalleeArmZAState != FunctionType::ARM_None ||
+ CalleeArmZT0State != FunctionType::ARM_None) {
bool CallerHasZAState = false;
+ bool CallerHasZT0State = false;
if (const auto *CallerFD = dyn_cast<FunctionDecl>(CurContext)) {
auto *Attr = CallerFD->getAttr<ArmNewAttr>();
if (Attr && Attr->isNewZA())
CallerHasZAState = true;
- else if (const auto *FPT =
- CallerFD->getType()->getAs<FunctionProtoType>())
- CallerHasZAState = FunctionType::getArmZAState(
- FPT->getExtProtoInfo().AArch64SMEAttributes) !=
- FunctionType::ARM_None;
- }
-
- if (!CallerHasZAState)
- Diag(Loc, diag::err_sme_za_call_no_za_state);
- }
-
- // If the callee uses AArch64 SME ZT0 state but the caller doesn't define
- // any, then this is an error.
- FunctionType::ArmStateValue ArmZT0State =
- FunctionType::getArmZT0State(ExtInfo.AArch64SMEAttributes);
- if (ArmZT0State != FunctionType::ARM_None) {
- bool CallerHasZT0State = false;
- if (const auto *CallerFD = dyn_cast<FunctionDecl>(CurContext)) {
- auto *Attr = CallerFD->getAttr<ArmNewAttr>();
if (Attr && Attr->isNewZT0())
CallerHasZT0State = true;
- else if (const auto *FPT =
- CallerFD->getType()->getAs<FunctionProtoType>())
- CallerHasZT0State =
+ if (const auto *FPT = CallerFD->getType()->getAs<FunctionProtoType>()) {
+ CallerHasZAState |=
+ FunctionType::getArmZAState(
+ FPT->getExtProtoInfo().AArch64SMEAttributes) !=
+ FunctionType::ARM_None;
+ CallerHasZT0State |=
FunctionType::getArmZT0State(
FPT->getExtProtoInfo().AArch64SMEAttributes) !=
FunctionType::ARM_None;
+ }
}
- if (!CallerHasZT0State)
+ if (CalleeArmZAState != FunctionType::ARM_None && !CallerHasZAState)
+ Diag(Loc, diag::err_sme_za_call_no_za_state);
+
+ if (CalleeArmZT0State != FunctionType::ARM_None && !CallerHasZT0State)
Diag(Loc, diag::err_sme_zt0_call_no_zt0_state);
+
+ if (CallerHasZAState && CalleeArmZAState == FunctionType::ARM_None &&
+ CalleeArmZT0State != FunctionType::ARM_None) {
+ Diag(Loc, diag::err_sme_unimplemented_za_save_restore);
+ Diag(Loc, diag::note_sme_use_preserves_za);
+ }
}
}
@@ -7643,9 +7639,8 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
unsigned NumArgs = TheCall->getNumArgs();
Expr *ImplicitThis = nullptr;
- if (IsMemberOperatorCall && !FDecl->isStatic() &&
- !FDecl->hasCXXExplicitFunctionObjectParameter()) {
- // If this is a call to a non-static member operator, hide the first
+ if (IsMemberOperatorCall && !FDecl->hasCXXExplicitFunctionObjectParameter()) {
+ // If this is a call to a member operator, hide the first
// argument from checkCall.
// FIXME: Our choice of AST representation here is less than ideal.
ImplicitThis = Args[0];
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaConcept.cpp b/contrib/llvm-project/clang/lib/Sema/SemaConcept.cpp
index acfc00f41254..88fc846c89e4 100755
--- a/contrib/llvm-project/clang/lib/Sema/SemaConcept.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaConcept.cpp
@@ -612,8 +612,12 @@ bool Sema::SetupConstraintScope(
// If this is a member function, make sure we get the parameters that
// reference the original primary template.
- if (const auto *FromMemTempl =
- PrimaryTemplate->getInstantiatedFromMemberTemplate()) {
+ // We walk up the instantiated template chain so that nested lambdas get
+ // handled properly.
+ for (FunctionTemplateDecl *FromMemTempl =
+ PrimaryTemplate->getInstantiatedFromMemberTemplate();
+ FromMemTempl;
+ FromMemTempl = FromMemTempl->getInstantiatedFromMemberTemplate()) {
if (addInstantiatedParametersToScope(FD, FromMemTempl->getTemplatedDecl(),
Scope, MLTAL))
return true;
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm-project/clang/lib/Sema/SemaDecl.cpp
index f9bf1d14bdc4..a300badc6d02 100644
--- a/contrib/llvm-project/clang/lib/Sema/SemaDecl.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaDecl.cpp
@@ -12752,7 +12752,8 @@ namespace {
}
if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) {
- HandleValue(OVE->getSourceExpr());
+ if (Expr *SE = OVE->getSourceExpr())
+ HandleValue(SE);
return;
}
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp
index 6413a48f809a..4efcb3590355 100644
--- a/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp
@@ -11142,7 +11142,8 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
if (VecType->getVectorKind() == VectorKind::SveFixedLengthData ||
VecType->getVectorKind() == VectorKind::SveFixedLengthPredicate)
return true;
- if (VecType->getVectorKind() == VectorKind::RVVFixedLengthData) {
+ if (VecType->getVectorKind() == VectorKind::RVVFixedLengthData ||
+ VecType->getVectorKind() == VectorKind::RVVFixedLengthMask) {
SVEorRVV = 1;
return true;
}
@@ -11173,7 +11174,8 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
SecondVecType->getVectorKind() ==
VectorKind::SveFixedLengthPredicate)
return true;
- if (SecondVecType->getVectorKind() == VectorKind::RVVFixedLengthData) {
+ if (SecondVecType->getVectorKind() == VectorKind::RVVFixedLengthData ||
+ SecondVecType->getVectorKind() == VectorKind::RVVFixedLengthMask) {
SVEorRVV = 1;
return true;
}
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaOverload.cpp b/contrib/llvm-project/clang/lib/Sema/SemaOverload.cpp
index c9eb67898356..940bcccb9e26 100644
--- a/contrib/llvm-project/clang/lib/Sema/SemaOverload.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaOverload.cpp
@@ -5664,10 +5664,15 @@ static ImplicitConversionSequence TryObjectArgumentInitialization(
assert(FromType->isRecordType());
QualType ClassType = S.Context.getTypeDeclType(ActingContext);
- // [class.dtor]p2: A destructor can be invoked for a const, volatile or
- // const volatile object.
+ // C++98 [class.dtor]p2:
+ // A destructor can be invoked for a const, volatile or const volatile
+ // object.
+ // C++98 [over.match.funcs]p4:
+ // For static member functions, the implicit object parameter is considered
+ // to match any object (since if the function is selected, the object is
+ // discarded).
Qualifiers Quals = Method->getMethodQualifiers();
- if (isa<CXXDestructorDecl>(Method)) {
+ if (isa<CXXDestructorDecl>(Method) || Method->isStatic()) {
Quals.addConst();
Quals.addVolatile();
}
@@ -15061,7 +15066,7 @@ ExprResult Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc,
CXXMethodDecl *Method = cast<CXXMethodDecl>(FnDecl);
SmallVector<Expr *, 2> MethodArgs;
- // Handle 'this' parameter if the selected function is not static.
+ // Initialize the object parameter.
if (Method->isExplicitObjectMemberFunction()) {
ExprResult Res =
InitializeExplicitObjectArgument(*this, Args[0], Method);
@@ -15069,7 +15074,7 @@ ExprResult Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc,
return ExprError();
Args[0] = Res.get();
ArgExpr = Args;
- } else if (Method->isInstance()) {
+ } else {
ExprResult Arg0 = PerformImplicitObjectArgumentInitialization(
Args[0], /*Qualifier=*/nullptr, Best->FoundDecl, Method);
if (Arg0.isInvalid())
@@ -15097,15 +15102,9 @@ ExprResult Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc,
ExprValueKind VK = Expr::getValueKindForType(ResultTy);
ResultTy = ResultTy.getNonLValueExprType(Context);
- CallExpr *TheCall;
- if (Method->isInstance())
- TheCall = CXXOperatorCallExpr::Create(
- Context, OO_Subscript, FnExpr.get(), MethodArgs, ResultTy, VK,
- RLoc, CurFPFeatureOverrides());
- else
- TheCall =
- CallExpr::Create(Context, FnExpr.get(), MethodArgs, ResultTy, VK,
- RLoc, CurFPFeatureOverrides());
+ CallExpr *TheCall = CXXOperatorCallExpr::Create(
+ Context, OO_Subscript, FnExpr.get(), MethodArgs, ResultTy, VK, RLoc,
+ CurFPFeatureOverrides());
if (CheckCallReturnType(FnDecl->getReturnType(), LLoc, TheCall, FnDecl))
return ExprError();
@@ -15733,15 +15732,13 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj,
bool IsError = false;
- // Initialize the implicit object parameter if needed.
- // Since C++23, this could also be a call to a static call operator
- // which we emit as a regular CallExpr.
+ // Initialize the object parameter.
llvm::SmallVector<Expr *, 8> NewArgs;
if (Method->isExplicitObjectMemberFunction()) {
// FIXME: we should do that during the definition of the lambda when we can.
DiagnoseInvalidExplicitObjectParameterInLambda(Method);
PrepareExplicitObjectArgument(*this, Method, Obj, Args, NewArgs);
- } else if (Method->isInstance()) {
+ } else {
ExprResult ObjRes = PerformImplicitObjectArgumentInitialization(
Object.get(), /*Qualifier=*/nullptr, Best->FoundDecl, Method);
if (ObjRes.isInvalid())
@@ -15775,14 +15772,9 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj,
ExprValueKind VK = Expr::getValueKindForType(ResultTy);
ResultTy = ResultTy.getNonLValueExprType(Context);
- CallExpr *TheCall;
- if (Method->isInstance())
- TheCall = CXXOperatorCallExpr::Create(Context, OO_Call, NewFn.get(),
- MethodArgs, ResultTy, VK, RParenLoc,
- CurFPFeatureOverrides());
- else
- TheCall = CallExpr::Create(Context, NewFn.get(), MethodArgs, ResultTy, VK,
- RParenLoc, CurFPFeatureOverrides());
+ CallExpr *TheCall = CXXOperatorCallExpr::Create(
+ Context, OO_Call, NewFn.get(), MethodArgs, ResultTy, VK, RParenLoc,
+ CurFPFeatureOverrides());
if (CheckCallReturnType(Method->getReturnType(), LParenLoc, TheCall, Method))
return true;
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp
index 9bfa71dc8bcf..a381d876a54c 100644
--- a/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp
@@ -7412,9 +7412,9 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
if (ArgResult.isInvalid())
return ExprError();
- // Prior to C++20, enforce restrictions on possible template argument
- // values.
- if (!getLangOpts().CPlusPlus20 && Value.isLValue()) {
+ if (Value.isLValue()) {
+ APValue::LValueBase Base = Value.getLValueBase();
+ auto *VD = const_cast<ValueDecl *>(Base.dyn_cast<const ValueDecl *>());
// For a non-type template-parameter of pointer or reference type,
// the value of the constant expression shall not refer to
assert(ParamType->isPointerType() || ParamType->isReferenceType() ||
@@ -7423,8 +7423,6 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
// -- a string literal
// -- the result of a typeid expression, or
// -- a predefined __func__ variable
- APValue::LValueBase Base = Value.getLValueBase();
- auto *VD = const_cast<ValueDecl *>(Base.dyn_cast<const ValueDecl *>());
if (Base &&
(!VD ||
isa<LifetimeExtendedTemporaryDecl, UnnamedGlobalConstantDecl>(VD))) {
@@ -7432,24 +7430,30 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
<< Arg->getSourceRange();
return ExprError();
}
- // -- a subobject [until C++20]
- if (Value.hasLValuePath() && Value.getLValuePath().size() == 1 &&
- VD && VD->getType()->isArrayType() &&
+
+ if (Value.hasLValuePath() && Value.getLValuePath().size() == 1 && VD &&
+ VD->getType()->isArrayType() &&
Value.getLValuePath()[0].getAsArrayIndex() == 0 &&
!Value.isLValueOnePastTheEnd() && ParamType->isPointerType()) {
- // Per defect report (no number yet):
- // ... other than a pointer to the first element of a complete array
- // object.
- } else if (!Value.hasLValuePath() || Value.getLValuePath().size() ||
- Value.isLValueOnePastTheEnd()) {
- Diag(StartLoc, diag::err_non_type_template_arg_subobject)
- << Value.getAsString(Context, ParamType);
- return ExprError();
+ SugaredConverted = TemplateArgument(VD, ParamType);
+ CanonicalConverted = TemplateArgument(
+ cast<ValueDecl>(VD->getCanonicalDecl()), CanonParamType);
+ return ArgResult.get();
+ }
+
+ // -- a subobject [until C++20]
+ if (!getLangOpts().CPlusPlus20) {
+ if (!Value.hasLValuePath() || Value.getLValuePath().size() ||
+ Value.isLValueOnePastTheEnd()) {
+ Diag(StartLoc, diag::err_non_type_template_arg_subobject)
+ << Value.getAsString(Context, ParamType);
+ return ExprError();
+ }
+ assert((VD || !ParamType->isReferenceType()) &&
+ "null reference should not be a constant expression");
+ assert((!VD || !ParamType->isNullPtrType()) &&
+ "non-null value of type nullptr_t?");
}
- assert((VD || !ParamType->isReferenceType()) &&
- "null reference should not be a constant expression");
- assert((!VD || !ParamType->isNullPtrType()) &&
- "non-null value of type nullptr_t?");
}
if (Value.isAddrLabelDiff())
diff --git a/contrib/llvm-project/clang/lib/Sema/SemaType.cpp b/contrib/llvm-project/clang/lib/Sema/SemaType.cpp
index 9cb6c0a4ef24..92086d7277fd 100644
--- a/contrib/llvm-project/clang/lib/Sema/SemaType.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaType.cpp
@@ -8646,21 +8646,30 @@ static void HandleRISCVRVVVectorBitsTypeAttr(QualType &CurType,
ASTContext::BuiltinVectorTypeInfo Info =
S.Context.getBuiltinVectorTypeInfo(CurType->castAs<BuiltinType>());
- unsigned EltSize = S.Context.getTypeSize(Info.ElementType);
unsigned MinElts = Info.EC.getKnownMinValue();
+ VectorKind VecKind = VectorKind::RVVFixedLengthData;
+ unsigned ExpectedSize = VScale->first * MinElts;
+ QualType EltType = CurType->getRVVEltType(S.Context);
+ unsigned EltSize = S.Context.getTypeSize(EltType);
+ unsigned NumElts;
+ if (Info.ElementType == S.Context.BoolTy) {
+ NumElts = VecSize / S.Context.getCharWidth();
+ VecKind = VectorKind::RVVFixedLengthMask;
+ } else {
+ ExpectedSize *= EltSize;
+ NumElts = VecSize / EltSize;
+ }
+
// The attribute vector size must match -mrvv-vector-bits.
- unsigned ExpectedSize = VScale->first * MinElts * EltSize;
- if (VecSize != ExpectedSize) {
+ if (ExpectedSize % 8 != 0 || VecSize != ExpectedSize) {
S.Diag(Attr.getLoc(), diag::err_attribute_bad_rvv_vector_size)
<< VecSize << ExpectedSize;
Attr.setInvalid();
return;
}
- VectorKind VecKind = VectorKind::RVVFixedLengthData;
- VecSize /= EltSize;
- CurType = S.Context.getVectorType(Info.ElementType, VecSize, VecKind);
+ CurType = S.Context.getVectorType(EltType, NumElts, VecKind);
}
/// Handle OpenCL Access Qualifier Attribute.
diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp
index fecd94e875f6..028610deb300 100644
--- a/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp
+++ b/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp
@@ -9743,6 +9743,9 @@ void ASTReader::finishPendingActions() {
if (!FD->isLateTemplateParsed() &&
!NonConstDefn->isLateTemplateParsed() &&
+ // We only perform ODR checks for decls not in the explicit
+ // global module fragment.
+ !shouldSkipCheckingODR(FD) &&
FD->getODRHash() != NonConstDefn->getODRHash()) {
if (!isa<CXXMethodDecl>(FD)) {
PendingFunctionOdrMergeFailures[FD].push_back(NonConstDefn);
diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp
index a149d8215303..1fadd8039462 100644
--- a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -804,8 +804,10 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) {
ED->setScopedUsingClassTag(EnumDeclBits.getNextBit());
ED->setFixed(EnumDeclBits.getNextBit());
- ED->setHasODRHash(true);
- ED->ODRHash = Record.readInt();
+ if (!shouldSkipCheckingODR(ED)) {
+ ED->setHasODRHash(true);
+ ED->ODRHash = Record.readInt();
+ }
// If this is a definition subject to the ODR, and we already have a
// definition, merge this one into it.
@@ -827,7 +829,10 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) {
Reader.MergedDeclContexts.insert(std::make_pair(ED, OldDef));
ED->demoteThisDefinitionToDeclaration();
Reader.mergeDefinitionVisibility(OldDef, ED);
- if (OldDef->getODRHash() != ED->getODRHash())
+ // We don't want to check the ODR hash value for declarations from global
+ // module fragment.
+ if (!shouldSkipCheckingODR(ED) &&
+ OldDef->getODRHash() != ED->getODRHash())
Reader.PendingEnumOdrMergeFailures[OldDef].push_back(ED);
} else {
OldDef = ED;
@@ -866,6 +871,9 @@ ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) {
void ASTDeclReader::VisitRecordDecl(RecordDecl *RD) {
VisitRecordDeclImpl(RD);
+ // We should only reach here if we're in C/Objective-C. There is no
+ // global module fragment.
+ assert(!shouldSkipCheckingODR(RD));
RD->setODRHash(Record.readInt());
// Maintain the invariant of a redeclaration chain containing only
@@ -1094,8 +1102,10 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
if (FD->isExplicitlyDefaulted())
FD->setDefaultLoc(readSourceLocation());
- FD->ODRHash = Record.readInt();
- FD->setHasODRHash(true);
+ if (!shouldSkipCheckingODR(FD)) {
+ FD->ODRHash = Record.readInt();
+ FD->setHasODRHash(true);
+ }
if (FD->isDefaulted()) {
if (unsigned NumLookups = Record.readInt()) {
@@ -1971,9 +1981,12 @@ void ASTDeclReader::ReadCXXDefinitionData(
#include "clang/AST/CXXRecordDeclDefinitionBits.def"
#undef FIELD
- // Note: the caller has deserialized the IsLambda bit already.
- Data.ODRHash = Record.readInt();
- Data.HasODRHash = true;
+ // We only perform ODR checks for decls not in GMF.
+ if (!shouldSkipCheckingODR(D)) {
+ // Note: the caller has deserialized the IsLambda bit already.
+ Data.ODRHash = Record.readInt();
+ Data.HasODRHash = true;
+ }
if (Record.readInt()) {
Reader.DefinitionSource[D] =
@@ -2134,6 +2147,10 @@ void ASTDeclReader::MergeDefinitionData(
}
}
+ // We don't want to check ODR for decls in the global module fragment.
+ if (shouldSkipCheckingODR(MergeDD.Definition))
+ return;
+
if (D->getODRHash() != MergeDD.ODRHash) {
DetectedOdrViolation = true;
}
@@ -3498,11 +3515,14 @@ ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
// If this declaration is from a merged context, make a note that we need to
// check that the canonical definition of that context contains the decl.
//
+ // Note that we don't perform ODR checks for decls from the global module
+ // fragment.
+ //
// FIXME: We should do something similar if we merge two definitions of the
// same template specialization into the same CXXRecordDecl.
auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext());
if (MergedDCIt != Reader.MergedDeclContexts.end() &&
- MergedDCIt->second == D->getDeclContext())
+ !shouldSkipCheckingODR(D) && MergedDCIt->second == D->getDeclContext())
Reader.PendingOdrMergeChecks.push_back(D);
return FindExistingResult(Reader, D, /*Existing=*/nullptr,
diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTWriter.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTWriter.cpp
index 03bddfe0f504..3b79a9238d1a 100644
--- a/contrib/llvm-project/clang/lib/Serialization/ASTWriter.cpp
+++ b/contrib/llvm-project/clang/lib/Serialization/ASTWriter.cpp
@@ -6022,8 +6022,12 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
Record->push_back(DefinitionBits);
- // getODRHash will compute the ODRHash if it has not been previously computed.
- Record->push_back(D->getODRHash());
+ // We only perform ODR checks for decls not in GMF.
+ if (!shouldSkipCheckingODR(D)) {
+ // getODRHash will compute the ODRHash if it has not been previously
+ // computed.
+ Record->push_back(D->getODRHash());
+ }
bool ModulesDebugInfo =
Writer->Context->getLangOpts().ModulesDebugInfo && !D->isDependentType();
diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp
index bb1f51786d28..f224075643e9 100644
--- a/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -493,7 +493,9 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) {
EnumDeclBits.addBit(D->isFixed());
Record.push_back(EnumDeclBits);
- Record.push_back(D->getODRHash());
+ // We only perform ODR checks for decls not in GMF.
+ if (!shouldSkipCheckingODR(D))
+ Record.push_back(D->getODRHash());
if (MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo()) {
Record.AddDeclRef(MemberInfo->getInstantiatedFrom());
@@ -510,7 +512,7 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) {
!D->isTopLevelDeclInObjCContainer() &&
!CXXRecordDecl::classofKind(D->getKind()) &&
!D->getIntegerTypeSourceInfo() && !D->getMemberSpecializationInfo() &&
- !needsAnonymousDeclarationNumber(D) &&
+ !needsAnonymousDeclarationNumber(D) && !shouldSkipCheckingODR(D) &&
D->getDeclName().getNameKind() == DeclarationName::Identifier)
AbbrevToUse = Writer.getDeclEnumAbbrev();
@@ -701,7 +703,9 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
if (D->isExplicitlyDefaulted())
Record.AddSourceLocation(D->getDefaultLoc());
- Record.push_back(D->getODRHash());
+ // We only perform ODR checks for decls not in GMF.
+ if (!shouldSkipCheckingODR(D))
+ Record.push_back(D->getODRHash());
if (D->isDefaulted()) {
if (auto *FDI = D->getDefaultedFunctionInfo()) {
@@ -1506,7 +1510,8 @@ void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) {
D->getFirstDecl() == D->getMostRecentDecl() && !D->isInvalidDecl() &&
!D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() &&
D->getDeclName().getNameKind() == DeclarationName::Identifier &&
- !D->hasExtInfo() && !D->isExplicitlyDefaulted()) {
+ !shouldSkipCheckingODR(D) && !D->hasExtInfo() &&
+ !D->isExplicitlyDefaulted()) {
if (D->getTemplatedKind() == FunctionDecl::TK_NonTemplate ||
D->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate ||
D->getTemplatedKind() == FunctionDecl::TK_MemberSpecialization ||
diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Environment.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Environment.cpp
index 4f989ed59bee..427f51109853 100644
--- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Environment.cpp
+++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Environment.cpp
@@ -40,8 +40,11 @@ static const Expr *ignoreTransparentExprs(const Expr *E) {
switch (E->getStmtClass()) {
case Stmt::OpaqueValueExprClass:
- E = cast<OpaqueValueExpr>(E)->getSourceExpr();
- break;
+ if (const Expr *SE = cast<OpaqueValueExpr>(E)->getSourceExpr()) {
+ E = SE;
+ break;
+ }
+ return E;
case Stmt::ExprWithCleanupsClass:
E = cast<ExprWithCleanups>(E)->getSubExpr();
break;
@@ -98,7 +101,6 @@ SVal Environment::getSVal(const EnvironmentEntry &Entry,
case Stmt::CXXBindTemporaryExprClass:
case Stmt::ExprWithCleanupsClass:
case Stmt::GenericSelectionExprClass:
- case Stmt::OpaqueValueExprClass:
case Stmt::ConstantExprClass:
case Stmt::ParenExprClass:
case Stmt::SubstNonTypeTemplateParmExprClass:
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/i386/chkstk.S b/contrib/llvm-project/compiler-rt/lib/builtins/i386/chkstk.S
index a84bb0ee3007..cdd9a4c2a575 100644
--- a/contrib/llvm-project/compiler-rt/lib/builtins/i386/chkstk.S
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/i386/chkstk.S
@@ -14,7 +14,6 @@
.text
.balign 4
DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function
-DEFINE_COMPILERRT_FUNCTION(_chkstk)
push %ecx
cmp $0x1000,%eax
lea 8(%esp),%ecx // esp before calling this routine -> ecx
@@ -35,7 +34,6 @@ DEFINE_COMPILERRT_FUNCTION(_chkstk)
push (%eax) // push return address onto the stack
sub %esp,%eax // restore the original value in eax
ret
-END_COMPILERRT_FUNCTION(_chkstk)
END_COMPILERRT_FUNCTION(_alloca)
#endif // __i386__
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/x86_64/chkstk.S b/contrib/llvm-project/compiler-rt/lib/builtins/x86_64/chkstk.S
index 494ee261193b..ad7953a116ac 100644
--- a/contrib/llvm-project/compiler-rt/lib/builtins/x86_64/chkstk.S
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/x86_64/chkstk.S
@@ -18,7 +18,6 @@
.text
.balign 4
DEFINE_COMPILERRT_FUNCTION(___chkstk_ms)
-DEFINE_COMPILERRT_FUNCTION(__chkstk)
push %rcx
push %rax
cmp $0x1000,%rax
@@ -36,7 +35,6 @@ DEFINE_COMPILERRT_FUNCTION(__chkstk)
pop %rax
pop %rcx
ret
-END_COMPILERRT_FUNCTION(__chkstk)
END_COMPILERRT_FUNCTION(___chkstk_ms)
#endif // __x86_64__
diff --git a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c
index c976776ae59e..0751b28f81d0 100644
--- a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c
+++ b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c
@@ -77,7 +77,7 @@ ValueProfNode *EndVNode = &VNodesEnd;
/* lld-link provides __buildid symbol which ponits to the 16 bytes build id when
* using /build-id flag. https://lld.llvm.org/windows_support.html#lld-flags */
#define BUILD_ID_LEN 16
-COMPILER_RT_WEAK extern uint8_t __buildid[BUILD_ID_LEN];
+COMPILER_RT_WEAK uint8_t __buildid[BUILD_ID_LEN];
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
if (*__buildid) {
if (Writer &&
diff --git a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
index 8438e019591b..f6b157c07c65 100644
--- a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
+++ b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
@@ -34,8 +34,10 @@ static bool FrameIsInternal(const SymbolizedStack *frame) {
return true;
const char *file = frame->info.file;
const char *module = frame->info.module;
+ // On Gentoo, the path is g++-*, so there's *not* a missing /.
if (file && (internal_strstr(file, "/compiler-rt/lib/") ||
- internal_strstr(file, "/include/c++/")))
+ internal_strstr(file, "/include/c++/") ||
+ internal_strstr(file, "/include/g++")))
return true;
if (module && (internal_strstr(module, "libclang_rt.")))
return true;
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/copy_move_common.h b/contrib/llvm-project/libcxx/include/__algorithm/copy_move_common.h
index b350507e32ba..0fc7a5e3cee7 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/copy_move_common.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/copy_move_common.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
// Type traits.
@@ -132,4 +135,6 @@ __dispatch_copy_or_move(_InIter __first, _Sent __last, _OutIter __out_first) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_COPY_MOVE_COMMON_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/equal.h b/contrib/llvm-project/libcxx/include/__algorithm/equal.h
index f03f010aa51a..3c0e3060e39a 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/equal.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/equal.h
@@ -30,6 +30,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
@@ -162,4 +165,6 @@ equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_EQUAL_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/equal_range.h b/contrib/llvm-project/libcxx/include/__algorithm/equal_range.h
index 7ce54965fff0..a94290431971 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/equal_range.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/equal_range.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Compare, class _Iter, class _Sent, class _Tp, class _Proj>
@@ -77,4 +80,6 @@ equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_EQUAL_RANGE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/fold.h b/contrib/llvm-project/libcxx/include/__algorithm/fold.h
index 88e6814d5cf9..1a9d76b50d83 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/fold.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/fold.h
@@ -32,6 +32,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 23
@@ -122,4 +125,6 @@ inline constexpr auto fold_left = __fold_left();
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_FOLD_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/in_found_result.h b/contrib/llvm-project/libcxx/include/__algorithm/in_found_result.h
index 88a0255d1698..a67ae387974c 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/in_found_result.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/in_found_result.h
@@ -18,6 +18,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -46,4 +49,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_IN_FOUND_RESULT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/in_fun_result.h b/contrib/llvm-project/libcxx/include/__algorithm/in_fun_result.h
index 6110c1cf86cd..a22069a9a8dd 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/in_fun_result.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/in_fun_result.h
@@ -18,6 +18,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -46,4 +49,6 @@ struct in_fun_result {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_IN_FUN_RESULT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/in_in_out_result.h b/contrib/llvm-project/libcxx/include/__algorithm/in_in_out_result.h
index 95ce4f4fd5bd..ba0380b5c681 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/in_in_out_result.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/in_in_out_result.h
@@ -18,6 +18,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -51,4 +54,6 @@ struct in_in_out_result {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_IN_IN_OUT_RESULT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/in_in_result.h b/contrib/llvm-project/libcxx/include/__algorithm/in_in_result.h
index d1d62dae7f67..994573fc70fd 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/in_in_result.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/in_in_result.h
@@ -18,6 +18,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -48,4 +51,6 @@ struct in_in_result {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_IN_IN_RESULT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/in_out_out_result.h b/contrib/llvm-project/libcxx/include/__algorithm/in_out_out_result.h
index 143642368750..8ceb452841a4 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/in_out_out_result.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/in_out_out_result.h
@@ -18,6 +18,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -49,4 +52,6 @@ struct in_out_out_result {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_IN_OUT_OUT_RESULT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/includes.h b/contrib/llvm-project/libcxx/include/__algorithm/includes.h
index 531752e93175..05d45365eb80 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/includes.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/includes.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Comp, class _Proj1, class _Proj2>
@@ -71,4 +74,6 @@ includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __fi
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_INCLUDES_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/next_permutation.h b/contrib/llvm-project/libcxx/include/__algorithm/next_permutation.h
index d66ea9b97345..011ee028cc2f 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/next_permutation.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/next_permutation.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Compare, class _BidirectionalIterator, class _Sentinel>
@@ -67,4 +70,6 @@ next_permutation(_BidirectionalIterator __first, _BidirectionalIterator __last)
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_NEXT_PERMUTATION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/nth_element.h b/contrib/llvm-project/libcxx/include/__algorithm/nth_element.h
index 37ddfbdacf04..da748d7255ab 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/nth_element.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/nth_element.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _RandomAccessIterator>
@@ -253,4 +256,6 @@ nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomA
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_NTH_ELEMENT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/partial_sort.h b/contrib/llvm-project/libcxx/include/__algorithm/partial_sort.h
index 27511a124229..85a8fdc77aa2 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/partial_sort.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/partial_sort.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, class _Sentinel>
@@ -83,4 +86,6 @@ partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _Ran
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PARTIAL_SORT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/partial_sort_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/partial_sort_copy.h
index e7d8df4de89f..ef7c9d34d949 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/partial_sort_copy.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy,
@@ -98,4 +101,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PARTIAL_SORT_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/partition.h b/contrib/llvm-project/libcxx/include/__algorithm/partition.h
index e2ceb07bf195..824e49b9ec21 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/partition.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/partition.h
@@ -19,6 +19,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Predicate, class _AlgPolicy, class _ForwardIterator, class _Sentinel>
@@ -82,4 +85,6 @@ partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PARTITION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/prev_permutation.h b/contrib/llvm-project/libcxx/include/__algorithm/prev_permutation.h
index 3e4bbb3fbb16..8d15b6806401 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/prev_permutation.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/prev_permutation.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Compare, class _BidirectionalIterator, class _Sentinel>
@@ -67,4 +70,6 @@ prev_permutation(_BidirectionalIterator __first, _BidirectionalIterator __last)
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PREV_PERMUTATION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_any_all_none_of.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_any_all_none_of.h
index d93fdba2224c..4b1e0e61b542 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_any_all_none_of.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_any_all_none_of.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -144,4 +147,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_ANY_ALL_NONE_OF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h
index ab2e3172b8b6..14a0d76741d4 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -194,4 +197,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_TRANSFORM_REDUCE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_copy.h
index 19f275a0d5d9..1069dcec0e11 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_copy.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -113,4 +116,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_count.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_count.h
index 28806fca0637..2781f6bfd3c9 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_count.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_count.h
@@ -29,6 +29,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -113,4 +116,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_COUNT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_equal.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_equal.h
index b343d2675980..d235c0f4f419 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_equal.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_equal.h
@@ -21,6 +21,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -167,4 +170,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_EQUAL_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_fill.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_fill.h
index 3057dcc04f1a..488b49a0feec 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_fill.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_fill.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -108,4 +111,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_FILL_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_find.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_find.h
index adc05ea1a9e5..5b694db68aea 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_find.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_find.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -133,4 +136,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_FIND_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_for_each.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_for_each.h
index 819a43d685ab..bb7b5a61a6dc 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_for_each.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_for_each.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -100,4 +103,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_FOR_EACH_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_generate.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_generate.h
index 56538392d5b5..7133c6f4f4c6 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_generate.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_generate.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -106,4 +109,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_GENERATE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_is_partitioned.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_is_partitioned.h
index 39cf6369339d..b65430212207 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_is_partitioned.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_is_partitioned.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -69,4 +72,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_IS_PARITTIONED
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_merge.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_merge.h
index ed8014510863..3d262db6bc0c 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_merge.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_merge.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -84,4 +87,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_MERGE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_move.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_move.h
index 52baab57591e..d8441f1a6c2e 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_move.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_move.h
@@ -27,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -76,4 +79,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_MOVE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_replace.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_replace.h
index 05dee3f6a4f3..b1caf3fd4ac0 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_replace.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_replace.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -239,4 +242,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_REPLACE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_rotate_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_rotate_copy.h
index 33dc9a3635f7..346aab1d4a55 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_rotate_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_rotate_copy.h
@@ -19,6 +19,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -77,4 +80,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_ROTATE_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_sort.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_sort.h
index 3e71e0aa5ae0..a931f768111a 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_sort.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_sort.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_SORT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_stable_sort.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_stable_sort.h
index c9d375535fc4..8ea0bb3f9a8d 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_stable_sort.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_stable_sort.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -53,4 +56,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_STABLE_SORT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_transform.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_transform.h
index aad59d1f30e6..f95938782fc3 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_transform.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_transform.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -112,4 +115,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_PSTL_TRANSFORM_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_all_of.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_all_of.h
index 39a2ae4de01e..8976541d590c 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_all_of.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_all_of.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -66,4 +69,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_ALL_OF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_any_of.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_any_of.h
index 2ca8531102ea..7c775f5f64de 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_any_of.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_any_of.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -66,4 +69,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_ANY_OF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_binary_search.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_binary_search.h
index 22008e0f1bc8..f3b7842d5ccc 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_binary_search.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_binary_search.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -65,4 +68,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_BINARY_SEARCH_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_clamp.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_clamp.h
index a1185e7278f0..f5ef5fd7f26e 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_clamp.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_clamp.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -58,4 +61,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_CLAMP_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_contains.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_contains.h
index f92fcec587d8..00d0e5401988 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_contains.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_contains.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 23
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -58,4 +61,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 23
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_CONTAINS_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy.h
index 1c87f074e7ca..e1d6d32f05f7 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -63,4 +66,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_backward.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_backward.h
index 865e944d4384..93e326042503 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_backward.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_backward.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -61,4 +64,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_COPY_BACKWARD_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_if.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_if.h
index b77dbd37fcee..4b41d2154e7f 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_if.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_if.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -79,4 +82,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_COPY_IF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_n.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_n.h
index 99e8eee14d0f..4353fa99278c 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_n.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_copy_n.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -73,4 +76,6 @@ inline constexpr auto copy_n = __copy_n::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_COPY_N_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_count.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_count.h
index 4c8f1b2cbea7..a8965c1b961f 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_count.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_count.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -58,4 +61,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_COUNT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_count_if.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_count_if.h
index 92f37d049e0c..71b942dd5322 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_count_if.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_count_if.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -71,4 +74,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_COUNT_IF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_ends_with.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_ends_with.h
index 2afb74bff0f1..c2a3cae9f3b1 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_ends_with.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_ends_with.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 23
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -193,4 +196,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 23
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_ENDS_WITH_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_equal.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_equal.h
index 4cb1f7df1952..31c7ee261da6 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_equal.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_equal.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -101,4 +104,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_EQUAL_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_equal_range.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_equal_range.h
index 1ff8856ca03f..4c1c3834ba9f 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_equal_range.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_equal_range.h
@@ -30,6 +30,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -72,4 +75,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_EQUAL_RANGE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_fill.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_fill.h
index 88a892f5c278..7a177d85e9f0 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_fill.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_fill.h
@@ -20,6 +20,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -54,4 +57,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_FILL_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_fill_n.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_fill_n.h
index dbd8ec27aef9..a6e988c0089c 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_fill_n.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_fill_n.h
@@ -17,6 +17,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -45,4 +48,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_FILL_N_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find.h
index de870e381184..7459fad717a5 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -72,4 +75,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_FIND_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_end.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_end.h
index 2c57ad424bfd..0bda4f3e1cea 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_end.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_end.h
@@ -27,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -95,4 +98,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_FIND_END_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_first_of.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_first_of.h
index ec6d52c63250..63a7b8335faa 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_first_of.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_first_of.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -98,4 +101,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_FIND_FIRST_OF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if.h
index af54a5007ee2..52ae55ce96c3 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -67,4 +70,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_FIND_IF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if_not.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if_not.h
index a18bea43165e..60c6796cbbfc 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if_not.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_find_if_not.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -61,4 +64,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_FIND_IF_NOT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each.h
index 7878ed26709f..225dc774c876 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each_n.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each_n.h
index 53ccb9a6035a..d1fdca34cc5a 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each_n.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_for_each_n.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -56,4 +59,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_generate.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_generate.h
index 3ff1e13c4220..e6467198e6ba 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_generate.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_generate.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -65,4 +68,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_GENERATE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_generate_n.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_generate_n.h
index c025c621a191..cd5fd7483ab2 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_generate_n.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_generate_n.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -57,4 +60,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_includes.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_includes.h
index aa35080c8cfd..0bc4c043bd18 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_includes.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_includes.h
@@ -27,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -90,4 +93,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_INCLUDES_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_inplace_merge.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_inplace_merge.h
index 86001b003d5c..d94c0ad46567 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_inplace_merge.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_inplace_merge.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -76,4 +79,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_INPLACE_MERGE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap.h
index f298c347b747..122368c90d92 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_IS_HEAP_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap_until.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap_until.h
index 73f13fb50440..b2705d37a6d3 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap_until.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_heap_until.h
@@ -27,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_IS_HEAP_UNTIL_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_partitioned.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_partitioned.h
index 76db870efc70..c6a585c9f510 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_partitioned.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_partitioned.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -78,4 +81,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_IS_PARTITIONED_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_permutation.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_permutation.h
index 2b99839bc66f..e0423d722b5b 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_permutation.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_permutation.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -99,4 +102,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_IS_PERMUTATION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted.h
index 3eb2c768d66a..d71035d5aa1d 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -59,4 +62,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP__ALGORITHM_RANGES_IS_SORTED_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted_until.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted_until.h
index 19e9875d2757..dcfb6a4e1813 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted_until.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_is_sorted_until.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP__ALGORITHM_RANGES_IS_SORTED_UNTIL_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_iterator_concept.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_iterator_concept.h
index 9a9203040336..2af891d3af00 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_iterator_concept.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_iterator_concept.h
@@ -18,6 +18,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -48,4 +51,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_ITERATOR_CONCEPT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_lexicographical_compare.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_lexicographical_compare.h
index 5b843dfd7b31..90e96b546516 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_lexicographical_compare.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_lexicographical_compare.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -98,4 +101,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_lower_bound.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_lower_bound.h
index 58b3f815b96a..ab1f80e7ab77 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_lower_bound.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_lower_bound.h
@@ -27,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -65,4 +68,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_LOWER_BOUND_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_make_heap.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_make_heap.h
index f17eabff43d2..fe9c024fbf8a 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_make_heap.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_make_heap.h
@@ -32,6 +32,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -77,4 +80,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_MAKE_HEAP_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_max_element.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_max_element.h
index 2ba97042f1f6..83adf49b61ad 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_max_element.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_max_element.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -61,4 +64,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_MAX_ELEMENT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_merge.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_merge.h
index 7f49154ec922..bdf9a62d90bd 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_merge.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_merge.h
@@ -27,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -130,4 +133,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_MERGE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_min_element.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_min_element.h
index 07826a0e6b81..4b9cb76da578 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_min_element.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_min_element.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_MIN_ELEMENT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_minmax_element.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_minmax_element.h
index a52319f6b5d3..5132856ebcd5 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_minmax_element.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_minmax_element.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -70,4 +73,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_MINMAX_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_mismatch.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_mismatch.h
index db9bfc8e87db..037af3912623 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_mismatch.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_mismatch.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -86,4 +89,6 @@ constexpr inline auto mismatch = __mismatch::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_MISMATCH_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_move.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_move.h
index 8bd2409f891c..be869f36c973 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_move.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_move.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -66,4 +69,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_MOVE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_move_backward.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_move_backward.h
index ee390a40e489..6d4071a33b81 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_move_backward.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_move_backward.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -68,4 +71,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_MOVE_BACKWARD_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_next_permutation.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_next_permutation.h
index 9ebab3ea7c13..18535e0a6254 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_next_permutation.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_next_permutation.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -70,4 +73,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_NEXT_PERMUTATION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_none_of.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_none_of.h
index b0d363895e00..59bd87997d44 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_none_of.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_none_of.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -67,4 +70,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_NONE_OF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_nth_element.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_nth_element.h
index 7abdbd0889e0..90ade9efe10d 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_nth_element.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_nth_element.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -76,4 +79,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_NTH_ELEMENT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort.h
index 9ec8882097d7..c67247d2e0a7 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort.h
@@ -33,6 +33,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort_copy.h
index eba7d9ac4165..b3bdeb78fb6f 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partial_sort_copy.h
@@ -30,6 +30,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -106,4 +109,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition.h
index 89d192b51fd3..a67ac4c96757 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition.h
@@ -32,6 +32,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -80,4 +83,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_PARTITION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_copy.h
index 6a16b02db3e5..d60c865dd2a8 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_copy.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -102,4 +105,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_PARTITION_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_point.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_point.h
index 6fc20e7d00e9..c5b11b5fed19 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_point.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_partition_point.h
@@ -27,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -85,4 +88,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_PARTITION_POINT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_pop_heap.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_pop_heap.h
index 364cfe94b161..01f92c0f2288 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_pop_heap.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_pop_heap.h
@@ -32,6 +32,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -78,4 +81,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_POP_HEAP_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_prev_permutation.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_prev_permutation.h
index ae7a68cce5fd..225cee9b75ec 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_prev_permutation.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_prev_permutation.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -70,4 +73,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_PREV_PERMUTATION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_push_heap.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_push_heap.h
index 1ed9c953f54c..9d187af38c53 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_push_heap.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_push_heap.h
@@ -32,6 +32,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -77,4 +80,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_PUSH_HEAP_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove.h
index e27c4bdd733d..315bed8fba77 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -60,4 +63,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy.h
index 5158a78e4814..84529eceac68 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy_if.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy_if.h
index c07b4813d7d0..56fe01753312 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy_if.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_copy_if.h
@@ -29,6 +29,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -87,4 +90,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_IF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_if.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_if.h
index 4b7aa2d2be78..943dbdd73807 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_if.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_remove_if.h
@@ -27,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -81,4 +84,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_IF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace.h
index b66a41aa8d0d..2b88dc032972 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -60,4 +63,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy.h
index a7627024812f..633f993e5c94 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -85,4 +88,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy_if.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy_if.h
index 10ed1fda6c5c..e065c3ac0acc 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy_if.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_copy_if.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -90,4 +93,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_IF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_if.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_if.h
index 519fa32029ac..6445f42aea19 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_if.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_replace_if.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_IF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_reverse_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_reverse_copy.h
index 35b9edba0bfb..60043787a717 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_reverse_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_reverse_copy.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -62,4 +65,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_REVERSE_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate.h
index ebed9bbd5426..8d33a6f0799b 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -63,4 +66,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_ROTATE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate_copy.h
index ab76c0944c47..26fe110b5389 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_rotate_copy.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -60,4 +63,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_ROTATE_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_sample.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_sample.h
index d347d82205a8..e4f60a7b66be 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_sample.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_sample.h
@@ -27,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -66,4 +69,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SAMPLE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_search_n.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_search_n.h
index 4e53f30f71f9..4c1d73d8e6c3 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_search_n.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_search_n.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -108,4 +111,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SEARCH_N_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_difference.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_difference.h
index a9453ed336f5..0841fb4ffd0c 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_difference.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_difference.h
@@ -30,6 +30,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -100,4 +103,7 @@ inline constexpr auto set_difference = __set_difference::__fn{};
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SET_DIFFERENCE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_intersection.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_intersection.h
index 4cdcbb75051a..9427379745b6 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_intersection.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_intersection.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -105,4 +108,7 @@ inline constexpr auto set_intersection = __set_intersection::__fn{};
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SET_INTERSECTION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_symmetric_difference.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_symmetric_difference.h
index d8710a1c47b0..995eb0999d94 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_symmetric_difference.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_symmetric_difference.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -105,4 +108,7 @@ inline constexpr auto set_symmetric_difference = __set_symmetric_difference::__f
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SET_SYMMETRIC_DIFFERENCE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_union.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_union.h
index c627166fffed..e870e390cc66 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_union.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_set_union.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -107,4 +110,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SET_UNION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_shuffle.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_shuffle.h
index fca420058dec..ab98ea22caab 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_shuffle.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_shuffle.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -64,4 +67,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SHUFFLE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_sort.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_sort.h
index 2ad0e0c233be..0296c146b3ed 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_sort.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_sort.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -76,4 +79,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SORT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_sort_heap.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_sort_heap.h
index 365c7dba6156..bab30df1708c 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_sort_heap.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_sort_heap.h
@@ -32,6 +32,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -77,4 +80,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SORT_HEAP_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_partition.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_partition.h
index 44937fa58990..f34027ff772c 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_partition.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_partition.h
@@ -34,6 +34,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -84,4 +87,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_STABLE_PARTITION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_sort.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_sort.h
index a4eed3836356..93909e253cc0 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_sort.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_stable_sort.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_STABLE_SORT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_starts_with.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_starts_with.h
index 7da78001d814..90e184aa9bcc 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_starts_with.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_starts_with.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 23
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -87,4 +90,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 23
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_swap_ranges.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_swap_ranges.h
index 1d0ebc0d5221..b6d9f618395a 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_swap_ranges.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_swap_ranges.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -62,4 +65,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_SWAP_RANGES_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_transform.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_transform.h
index f66a07ac026e..7850ec4f8465 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_transform.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_transform.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -169,4 +172,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_TRANSFORM_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_unique.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_unique.h
index b17e01fc5057..7340310eb36a 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_unique.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_unique.h
@@ -32,6 +32,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_UNIQUE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/ranges_unique_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/ranges_unique_copy.h
index 7e89f9d97af7..61133885ae80 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/ranges_unique_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/ranges_unique_copy.h
@@ -32,6 +32,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -112,4 +115,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_RANGES_UNIQUE_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/remove.h b/contrib/llvm-project/libcxx/include/__algorithm/remove.h
index 2b9d4ff26ed2..1498852c4361 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/remove.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/remove.h
@@ -18,6 +18,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Tp>
@@ -38,4 +41,6 @@ remove(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_REMOVE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/remove_if.h b/contrib/llvm-project/libcxx/include/__algorithm/remove_if.h
index 6eceddce8d56..c77b78023f52 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/remove_if.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/remove_if.h
@@ -17,6 +17,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Predicate>
@@ -37,4 +40,6 @@ remove_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_REMOVE_IF_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/reverse.h b/contrib/llvm-project/libcxx/include/__algorithm/reverse.h
index 6bd0aa393280..4167c9116d96 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/reverse.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/reverse.h
@@ -19,6 +19,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _BidirectionalIterator>
@@ -54,4 +57,6 @@ reverse(_BidirectionalIterator __first, _BidirectionalIterator __last) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_REVERSE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/rotate.h b/contrib/llvm-project/libcxx/include/__algorithm/rotate.h
index d8162b1a94b2..9a4d07883e32 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/rotate.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/rotate.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _ForwardIterator>
@@ -190,4 +193,6 @@ rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __l
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_ROTATE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/set_difference.h b/contrib/llvm-project/libcxx/include/__algorithm/set_difference.h
index a924702ce5f2..f414bcecb50d 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/set_difference.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/set_difference.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Comp, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter>
@@ -71,4 +74,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_d
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_SET_DIFFERENCE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/set_intersection.h b/contrib/llvm-project/libcxx/include/__algorithm/set_intersection.h
index f2603fe1365a..73d888d1b038 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/set_intersection.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/set_intersection.h
@@ -21,6 +21,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InIter1, class _InIter2, class _OutIter>
@@ -95,4 +98,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_i
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_SET_INTERSECTION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/set_symmetric_difference.h b/contrib/llvm-project/libcxx/include/__algorithm/set_symmetric_difference.h
index 832c3979bfd7..db36665a6136 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/set_symmetric_difference.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/set_symmetric_difference.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InIter1, class _InIter2, class _OutIter>
@@ -101,4 +104,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_symmetri
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_SET_SYMMETRIC_DIFFERENCE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/set_union.h b/contrib/llvm-project/libcxx/include/__algorithm/set_union.h
index cf48adae03be..a79c50fd3cf2 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/set_union.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/set_union.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InIter1, class _InIter2, class _OutIter>
@@ -97,4 +100,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_union(
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_SET_UNION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/shift_left.h b/contrib/llvm-project/libcxx/include/__algorithm/shift_left.h
index 645c58c29119..06cd7c5f8764 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/shift_left.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/shift_left.h
@@ -17,6 +17,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -51,4 +54,6 @@ shift_left(_ForwardIterator __first,
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_SHIFT_LEFT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/shift_right.h b/contrib/llvm-project/libcxx/include/__algorithm/shift_right.h
index 73ef98bd39de..01853057fc47 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/shift_right.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/shift_right.h
@@ -20,6 +20,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -97,4 +100,6 @@ shift_right(_ForwardIterator __first,
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_SHIFT_RIGHT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/sort.h b/contrib/llvm-project/libcxx/include/__algorithm/sort.h
index 451133a2d193..8a5e0211cdf4 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/sort.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/sort.h
@@ -39,6 +39,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
// stable, 2-3 compares, 0-2 swaps
@@ -1009,4 +1012,6 @@ sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_SORT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/sort_heap.h b/contrib/llvm-project/libcxx/include/__algorithm/sort_heap.h
index 0a6d992d0090..060fc33c3c6e 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/sort_heap.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/sort_heap.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
@@ -55,4 +58,6 @@ sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_SORT_HEAP_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/stable_partition.h b/contrib/llvm-project/libcxx/include/__algorithm/stable_partition.h
index 8762abcf18e1..8bb1eaf2d224 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/stable_partition.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/stable_partition.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Predicate, class _ForwardIterator, class _Distance, class _Pair>
@@ -299,4 +302,6 @@ stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate _
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_STABLE_PARTITION_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/stable_sort.h b/contrib/llvm-project/libcxx/include/__algorithm/stable_sort.h
index ffc6e4ce2818..9be192bd65a6 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/stable_sort.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/stable_sort.h
@@ -29,6 +29,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
@@ -265,4 +268,6 @@ inline _LIBCPP_HIDE_FROM_ABI void stable_sort(_RandomAccessIterator __first, _Ra
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_STABLE_SORT_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/swap_ranges.h b/contrib/llvm-project/libcxx/include/__algorithm/swap_ranges.h
index 7fab5c49a656..54b453b72360 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/swap_ranges.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/swap_ranges.h
@@ -18,6 +18,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
// 2+2 iterators: the shorter size will be used.
@@ -54,4 +57,6 @@ swap_ranges(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardItera
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_SWAP_RANGES_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/unique.h b/contrib/llvm-project/libcxx/include/__algorithm/unique.h
index 1717a00c8a93..056373d06fe4 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/unique.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/unique.h
@@ -21,6 +21,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
// unique
@@ -56,4 +59,6 @@ unique(_ForwardIterator __first, _ForwardIterator __last) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_UNIQUE_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/unique_copy.h b/contrib/llvm-project/libcxx/include/__algorithm/unique_copy.h
index 81fcd50f011d..16ce80cab32f 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/unique_copy.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/unique_copy.h
@@ -23,6 +23,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __unique_copy_tags {
@@ -119,4 +122,6 @@ unique_copy(_InputIterator __first, _InputIterator __last, _OutputIterator __res
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_UNIQUE_COPY_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/unwrap_iter.h b/contrib/llvm-project/libcxx/include/__algorithm/unwrap_iter.h
index a298a2b27105..50d815c97088 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/unwrap_iter.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/unwrap_iter.h
@@ -80,6 +80,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _OrigIter __rewrap_iter(_OrigIter __orig
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_PUSH_MACROS
+_LIBCPP_POP_MACROS
#endif // _LIBCPP___ALGORITHM_UNWRAP_ITER_H
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/unwrap_range.h b/contrib/llvm-project/libcxx/include/__algorithm/unwrap_range.h
index 053fd550b302..2d4b9bb5545a 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/unwrap_range.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/unwrap_range.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
// __unwrap_range and __rewrap_range are used to unwrap ranges which may have different iterator and sentinel types.
@@ -91,4 +94,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Iter __rewrap_range(_Iter __orig_iter,
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_UNWRAP_RANGE_H
diff --git a/contrib/llvm-project/libcxx/include/__config b/contrib/llvm-project/libcxx/include/__config
index 8248cf3d2239..fa5535facb14 100644
--- a/contrib/llvm-project/libcxx/include/__config
+++ b/contrib/llvm-project/libcxx/include/__config
@@ -176,11 +176,6 @@
// The implementation moved to the header, but we still export the symbols from
// the dylib for backwards compatibility.
# define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10
-// Save memory by providing the allocator more freedom to allocate the most
-// efficient size class by dropping the alignment requirements for std::string's
-// pointer from 16 to 8. This changes the output of std::string::max_size,
-// which makes it ABI breaking
-# define _LIBCPP_ABI_STRING_8_BYTE_ALIGNMENT
# elif _LIBCPP_ABI_VERSION == 1
# if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF))
// Enable compiling copies of now inline methods into the dylib to support
@@ -1283,8 +1278,8 @@ __sanitizer_verify_double_ended_contiguous_container(const void*, const void*, c
# endif // _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES
// clang-format off
-# define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") _Pragma("push_macro(\"refresh()\")") _Pragma("push_macro(\"move(int, int)\")") _Pragma("push_macro(\"erase()\")")
-# define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") _Pragma("pop_macro(\"refresh()\")") _Pragma("pop_macro(\"move(int, int)\")") _Pragma("pop_macro(\"erase()\")")
+# define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") _Pragma("push_macro(\"refresh\")") _Pragma("push_macro(\"move\")") _Pragma("push_macro(\"erase\")")
+# define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") _Pragma("pop_macro(\"refresh\")") _Pragma("pop_macro(\"move\")") _Pragma("pop_macro(\"erase\")")
// clang-format on
# ifndef _LIBCPP_NO_AUTO_LINK
diff --git a/contrib/llvm-project/libcxx/include/__filesystem/directory_iterator.h b/contrib/llvm-project/libcxx/include/__filesystem/directory_iterator.h
index 5287a4d8b055..a5aa5ff5432d 100644
--- a/contrib/llvm-project/libcxx/include/__filesystem/directory_iterator.h
+++ b/contrib/llvm-project/libcxx/include/__filesystem/directory_iterator.h
@@ -29,6 +29,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM)
_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
@@ -144,4 +147,6 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool
#endif // _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM)
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FILESYSTEM_DIRECTORY_ITERATOR_H
diff --git a/contrib/llvm-project/libcxx/include/__filesystem/path.h b/contrib/llvm-project/libcxx/include/__filesystem/path.h
index 1ff992dd64e6..8c7d426f7a6f 100644
--- a/contrib/llvm-project/libcxx/include/__filesystem/path.h
+++ b/contrib/llvm-project/libcxx/include/__filesystem/path.h
@@ -36,6 +36,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
@@ -925,4 +928,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FILESYSTEM_PATH_H
diff --git a/contrib/llvm-project/libcxx/include/__filesystem/recursive_directory_iterator.h b/contrib/llvm-project/libcxx/include/__filesystem/recursive_directory_iterator.h
index 7519cc2f2932..a8af4f73b14a 100644
--- a/contrib/llvm-project/libcxx/include/__filesystem/recursive_directory_iterator.h
+++ b/contrib/llvm-project/libcxx/include/__filesystem/recursive_directory_iterator.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM)
_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
@@ -157,4 +160,6 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool
#endif // _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM)
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H
diff --git a/contrib/llvm-project/libcxx/include/__format/format_arg.h b/contrib/llvm-project/libcxx/include/__format/format_arg.h
index 10fca15d5a7a..34ed9bcd6d63 100644
--- a/contrib/llvm-project/libcxx/include/__format/format_arg.h
+++ b/contrib/llvm-project/libcxx/include/__format/format_arg.h
@@ -30,6 +30,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -289,4 +292,6 @@ _LIBCPP_HIDE_FROM_ABI decltype(auto) visit_format_arg(_Visitor&& __vis, basic_fo
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FORMAT_FORMAT_ARG_H
diff --git a/contrib/llvm-project/libcxx/include/__format/format_context.h b/contrib/llvm-project/libcxx/include/__format/format_context.h
index 5b252b81f691..edb0348b34f3 100644
--- a/contrib/llvm-project/libcxx/include/__format/format_context.h
+++ b/contrib/llvm-project/libcxx/include/__format/format_context.h
@@ -35,6 +35,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -205,4 +208,6 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(basic_format_context);
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FORMAT_FORMAT_CONTEXT_H
diff --git a/contrib/llvm-project/libcxx/include/__format/format_functions.h b/contrib/llvm-project/libcxx/include/__format/format_functions.h
index 015bff70f51d..cf833ad20554 100644
--- a/contrib/llvm-project/libcxx/include/__format/format_functions.h
+++ b/contrib/llvm-project/libcxx/include/__format/format_functions.h
@@ -48,6 +48,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -674,4 +677,6 @@ formatted_size(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args)
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FORMAT_FORMAT_FUNCTIONS
diff --git a/contrib/llvm-project/libcxx/include/__format/formatter_output.h b/contrib/llvm-project/libcxx/include/__format/formatter_output.h
index eebe880d69ef..d5038eb158b0 100644
--- a/contrib/llvm-project/libcxx/include/__format/formatter_output.h
+++ b/contrib/llvm-project/libcxx/include/__format/formatter_output.h
@@ -35,6 +35,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -325,4 +328,6 @@ _LIBCPP_HIDE_FROM_ABI int __truncate(basic_string_view<_CharT>& __str, int __pre
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FORMAT_FORMATTER_OUTPUT_H
diff --git a/contrib/llvm-project/libcxx/include/__format/write_escaped.h b/contrib/llvm-project/libcxx/include/__format/write_escaped.h
index ec1283a173e9..43a074dd8d70 100644
--- a/contrib/llvm-project/libcxx/include/__format/write_escaped.h
+++ b/contrib/llvm-project/libcxx/include/__format/write_escaped.h
@@ -30,6 +30,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __formatter {
@@ -218,4 +221,6 @@ __format_escaped_string(basic_string_view<_CharT> __values,
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H
diff --git a/contrib/llvm-project/libcxx/include/__functional/function.h b/contrib/llvm-project/libcxx/include/__functional/function.h
index 6505bb587173..416c26a0c73f 100644
--- a/contrib/llvm-project/libcxx/include/__functional/function.h
+++ b/contrib/llvm-project/libcxx/include/__functional/function.h
@@ -45,6 +45,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#ifndef _LIBCPP_CXX03_LANG
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -1032,4 +1035,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_CXX03_LANG
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FUNCTIONAL_FUNCTION_H
diff --git a/contrib/llvm-project/libcxx/include/__iterator/cpp17_iterator_concepts.h b/contrib/llvm-project/libcxx/include/__iterator/cpp17_iterator_concepts.h
index c4f49fe74227..d1ad2b4e2848 100644
--- a/contrib/llvm-project/libcxx/include/__iterator/cpp17_iterator_concepts.h
+++ b/contrib/llvm-project/libcxx/include/__iterator/cpp17_iterator_concepts.h
@@ -29,6 +29,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -182,4 +185,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ITERATOR_CPP17_ITERATOR_CONCEPTS_H
diff --git a/contrib/llvm-project/libcxx/include/__iterator/iterator_with_data.h b/contrib/llvm-project/libcxx/include/__iterator/iterator_with_data.h
index 06c2fa699c30..afdc0a4e12e2 100644
--- a/contrib/llvm-project/libcxx/include/__iterator/iterator_with_data.h
+++ b/contrib/llvm-project/libcxx/include/__iterator/iterator_with_data.h
@@ -24,6 +24,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -97,4 +100,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ITERATOR_ITERATOR_WITH_DATA_H
diff --git a/contrib/llvm-project/libcxx/include/__memory/ranges_uninitialized_algorithms.h b/contrib/llvm-project/libcxx/include/__memory/ranges_uninitialized_algorithms.h
index d836d00820a6..90090055bbbb 100644
--- a/contrib/llvm-project/libcxx/include/__memory/ranges_uninitialized_algorithms.h
+++ b/contrib/llvm-project/libcxx/include/__memory/ranges_uninitialized_algorithms.h
@@ -31,6 +31,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -317,4 +320,6 @@ inline constexpr auto uninitialized_move_n = __uninitialized_move_n::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___MEMORY_RANGES_UNINITIALIZED_ALGORITHMS_H
diff --git a/contrib/llvm-project/libcxx/include/__memory/raw_storage_iterator.h b/contrib/llvm-project/libcxx/include/__memory/raw_storage_iterator.h
index 33790a397c84..774878aa1c5e 100644
--- a/contrib/llvm-project/libcxx/include/__memory/raw_storage_iterator.h
+++ b/contrib/llvm-project/libcxx/include/__memory/raw_storage_iterator.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR)
@@ -79,4 +82,6 @@ public:
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___MEMORY_RAW_STORAGE_ITERATOR_H
diff --git a/contrib/llvm-project/libcxx/include/__memory/shared_ptr.h b/contrib/llvm-project/libcxx/include/__memory/shared_ptr.h
index 9a73d439306d..e6de615d76fa 100644
--- a/contrib/llvm-project/libcxx/include/__memory/shared_ptr.h
+++ b/contrib/llvm-project/libcxx/include/__memory/shared_ptr.h
@@ -61,6 +61,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
// NOTE: Relaxed and acq/rel atomics (for increment and decrement respectively)
@@ -1662,4 +1665,6 @@ inline _LIBCPP_HIDE_FROM_ABI bool atomic_compare_exchange_weak_explicit(
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___MEMORY_SHARED_PTR_H
diff --git a/contrib/llvm-project/libcxx/include/__memory/uninitialized_algorithms.h b/contrib/llvm-project/libcxx/include/__memory/uninitialized_algorithms.h
index 2a4ecf655be2..9aff93a89648 100644
--- a/contrib/llvm-project/libcxx/include/__memory/uninitialized_algorithms.h
+++ b/contrib/llvm-project/libcxx/include/__memory/uninitialized_algorithms.h
@@ -42,6 +42,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
struct __always_false {
@@ -648,4 +651,6 @@ __uninitialized_allocator_move_if_noexcept(_Alloc&, _Iter1 __first1, _Iter1 __la
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___MEMORY_UNINITIALIZED_ALGORITHMS_H
diff --git a/contrib/llvm-project/libcxx/include/__mutex/once_flag.h b/contrib/llvm-project/libcxx/include/__mutex/once_flag.h
index 5a6f8e09055f..9d7baecbc708 100644
--- a/contrib/llvm-project/libcxx/include/__mutex/once_flag.h
+++ b/contrib/llvm-project/libcxx/include/__mutex/once_flag.h
@@ -25,6 +25,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
struct _LIBCPP_TEMPLATE_VIS once_flag;
@@ -151,4 +154,6 @@ inline _LIBCPP_HIDE_FROM_ABI void call_once(once_flag& __flag, const _Callable&
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___MUTEX_ONCE_FLAG_H
diff --git a/contrib/llvm-project/libcxx/include/__numeric/pstl_reduce.h b/contrib/llvm-project/libcxx/include/__numeric/pstl_reduce.h
index b19972a46db7..f9f666c2bb38 100644
--- a/contrib/llvm-project/libcxx/include/__numeric/pstl_reduce.h
+++ b/contrib/llvm-project/libcxx/include/__numeric/pstl_reduce.h
@@ -20,6 +20,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -101,4 +104,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___NUMERIC_PSTL_REDUCE_H
diff --git a/contrib/llvm-project/libcxx/include/__numeric/pstl_transform_reduce.h b/contrib/llvm-project/libcxx/include/__numeric/pstl_transform_reduce.h
index 112772604666..2f412d41f7f2 100644
--- a/contrib/llvm-project/libcxx/include/__numeric/pstl_transform_reduce.h
+++ b/contrib/llvm-project/libcxx/include/__numeric/pstl_transform_reduce.h
@@ -22,6 +22,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -148,4 +151,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___NUMERIC_PSTL_TRANSFORM_REDUCE_H
diff --git a/contrib/llvm-project/libcxx/include/__numeric/reduce.h b/contrib/llvm-project/libcxx/include/__numeric/reduce.h
index 1aeefce132b2..6c205bf581fb 100644
--- a/contrib/llvm-project/libcxx/include/__numeric/reduce.h
+++ b/contrib/llvm-project/libcxx/include/__numeric/reduce.h
@@ -19,6 +19,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 17
@@ -45,4 +48,6 @@ reduce(_InputIterator __first, _InputIterator __last) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___NUMERIC_REDUCE_H
diff --git a/contrib/llvm-project/libcxx/include/__numeric/saturation_arithmetic.h b/contrib/llvm-project/libcxx/include/__numeric/saturation_arithmetic.h
index 50274c6bbd9f..0e6f455cf228 100644
--- a/contrib/llvm-project/libcxx/include/__numeric/saturation_arithmetic.h
+++ b/contrib/llvm-project/libcxx/include/__numeric/saturation_arithmetic.h
@@ -19,6 +19,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 26
@@ -107,4 +110,6 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Rp saturate_cast(_Tp __x) noexcept {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___NUMERIC_SATURATION_ARITHMETIC_H
diff --git a/contrib/llvm-project/libcxx/include/__numeric/transform_reduce.h b/contrib/llvm-project/libcxx/include/__numeric/transform_reduce.h
index 6c0a81e5e4b0..f1150510f0c3 100644
--- a/contrib/llvm-project/libcxx/include/__numeric/transform_reduce.h
+++ b/contrib/llvm-project/libcxx/include/__numeric/transform_reduce.h
@@ -18,6 +18,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 17
@@ -51,4 +54,6 @@ transform_reduce(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterat
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/counted.h b/contrib/llvm-project/libcxx/include/__ranges/counted.h
index 337634895766..83d76f8fd210 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/counted.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/counted.h
@@ -29,6 +29,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -82,4 +85,6 @@ inline constexpr auto counted = __counted::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_COUNTED_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/drop_while_view.h b/contrib/llvm-project/libcxx/include/__ranges/drop_while_view.h
index 4e3ef61678f4..92f48bd0ecfb 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/drop_while_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/drop_while_view.h
@@ -37,6 +37,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -128,4 +131,6 @@ inline constexpr auto drop_while = __drop_while::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_DROP_WHILE_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/elements_view.h b/contrib/llvm-project/libcxx/include/__ranges/elements_view.h
index 325e4c9dca63..989d36fbcaaa 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/elements_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/elements_view.h
@@ -43,6 +43,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -410,4 +413,6 @@ inline constexpr auto values = elements<1>;
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_ELEMENTS_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/filter_view.h b/contrib/llvm-project/libcxx/include/__ranges/filter_view.h
index 6e6719c14470..5b938dd4c16e 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/filter_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/filter_view.h
@@ -44,6 +44,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -252,4 +255,6 @@ inline constexpr auto filter = __filter::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_FILTER_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/iota_view.h b/contrib/llvm-project/libcxx/include/__ranges/iota_view.h
index c6c9618cfe6c..c8314dd848b4 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/iota_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/iota_view.h
@@ -41,6 +41,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -395,4 +398,6 @@ inline constexpr auto iota = __iota::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_IOTA_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/join_view.h b/contrib/llvm-project/libcxx/include/__ranges/join_view.h
index 22473059133f..9c2c77995539 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/join_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/join_view.h
@@ -41,6 +41,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -415,4 +418,6 @@ struct __segmented_iterator_traits<_JoinViewIterator> {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_JOIN_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/lazy_split_view.h b/contrib/llvm-project/libcxx/include/__ranges/lazy_split_view.h
index e96398b14b58..6aedfdabffe3 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/lazy_split_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/lazy_split_view.h
@@ -47,6 +47,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -433,4 +436,6 @@ inline constexpr auto lazy_split = __lazy_split_view::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_LAZY_SPLIT_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/repeat_view.h b/contrib/llvm-project/libcxx/include/__ranges/repeat_view.h
index d9759abe1cba..d08f0e0d4e9f 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/repeat_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/repeat_view.h
@@ -34,6 +34,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 23
@@ -257,4 +260,6 @@ inline constexpr bool __is_repeat_specialization<repeat_view<_Tp, _Bound>> = tru
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_REPEAT_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/reverse_view.h b/contrib/llvm-project/libcxx/include/__ranges/reverse_view.h
index f7846259810c..ddbe8908414f 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/reverse_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/reverse_view.h
@@ -33,6 +33,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -196,4 +199,6 @@ inline constexpr auto reverse = __reverse::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_REVERSE_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/single_view.h b/contrib/llvm-project/libcxx/include/__ranges/single_view.h
index ead597a9be17..f91c7c352636 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/single_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/single_view.h
@@ -26,6 +26,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -101,4 +104,6 @@ inline constexpr auto single = __single_view::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_SINGLE_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/split_view.h b/contrib/llvm-project/libcxx/include/__ranges/split_view.h
index 7f03be3c346a..98f17be04f62 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/split_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/split_view.h
@@ -36,6 +36,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -224,4 +227,6 @@ inline constexpr auto split = __split_view::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_SPLIT_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/take_while_view.h b/contrib/llvm-project/libcxx/include/__ranges/take_while_view.h
index 46cfe4f70ac8..77ea9f7bb813 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/take_while_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/take_while_view.h
@@ -35,6 +35,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -162,4 +165,6 @@ inline constexpr auto take_while = __take_while::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_TAKE_WHILE_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__ranges/transform_view.h b/contrib/llvm-project/libcxx/include/__ranges/transform_view.h
index 3c8d825789cb..dc3aaa59ed8c 100644
--- a/contrib/llvm-project/libcxx/include/__ranges/transform_view.h
+++ b/contrib/llvm-project/libcxx/include/__ranges/transform_view.h
@@ -47,6 +47,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 20
@@ -416,4 +419,6 @@ inline constexpr auto transform = __transform::__fn{};
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___RANGES_TRANSFORM_VIEW_H
diff --git a/contrib/llvm-project/libcxx/include/__thread/jthread.h b/contrib/llvm-project/libcxx/include/__thread/jthread.h
index fc86b13afb13..2fbc8a36755e 100644
--- a/contrib/llvm-project/libcxx/include/__thread/jthread.h
+++ b/contrib/llvm-project/libcxx/include/__thread/jthread.h
@@ -28,6 +28,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_STOP_TOKEN)
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -127,4 +130,6 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_STOP_TOKEN)
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___THREAD_JTHREAD_H
diff --git a/contrib/llvm-project/libcxx/include/__thread/thread.h b/contrib/llvm-project/libcxx/include/__thread/thread.h
index 463bbd677255..0ecaac1b011b 100644
--- a/contrib/llvm-project/libcxx/include/__thread/thread.h
+++ b/contrib/llvm-project/libcxx/include/__thread/thread.h
@@ -32,6 +32,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
@@ -251,4 +254,6 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(thread& __x, thread& __y) _NOEXCEPT { __x
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___THREAD_THREAD_H
diff --git a/contrib/llvm-project/libcxx/include/array b/contrib/llvm-project/libcxx/include/array
index dcb419f536dc..41f016a4859a 100644
--- a/contrib/llvm-project/libcxx/include/array
+++ b/contrib/llvm-project/libcxx/include/array
@@ -159,6 +159,9 @@ template <size_t I, class T, size_t N> const T&& get(const array<T, N>&&) noexce
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp, size_t _Size>
@@ -493,6 +496,8 @@ to_array(_Tp (&&__arr)[_Size]) noexcept(is_nothrow_move_constructible_v<_Tp>) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <algorithm>
# include <concepts>
diff --git a/contrib/llvm-project/libcxx/include/condition_variable b/contrib/llvm-project/libcxx/include/condition_variable
index e375c986e7f1..6aac3c13ef4a 100644
--- a/contrib/llvm-project/libcxx/include/condition_variable
+++ b/contrib/llvm-project/libcxx/include/condition_variable
@@ -139,6 +139,9 @@ public:
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#ifndef _LIBCPP_HAS_NO_THREADS
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -348,6 +351,8 @@ _LIBCPP_END_NAMESPACE_STD
#endif // !_LIBCPP_HAS_NO_THREADS
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <atomic>
# include <concepts>
diff --git a/contrib/llvm-project/libcxx/include/experimental/iterator b/contrib/llvm-project/libcxx/include/experimental/iterator
index 5bb1dd1ada63..e9c1fb6924ec 100644
--- a/contrib/llvm-project/libcxx/include/experimental/iterator
+++ b/contrib/llvm-project/libcxx/include/experimental/iterator
@@ -64,6 +64,9 @@ namespace std {
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
#if _LIBCPP_STD_VER >= 14
_LIBCPP_BEGIN_NAMESPACE_LFTS
@@ -115,6 +118,8 @@ _LIBCPP_END_NAMESPACE_LFTS
#endif // _LIBCPP_STD_VER >= 14
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <iosfwd>
# include <type_traits>
diff --git a/contrib/llvm-project/libcxx/include/future b/contrib/llvm-project/libcxx/include/future
index 5602ae41c142..4eeb401c9bbc 100644
--- a/contrib/llvm-project/libcxx/include/future
+++ b/contrib/llvm-project/libcxx/include/future
@@ -401,6 +401,9 @@ template <class R, class Alloc> struct uses_allocator<packaged_task<R>, Alloc>;
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
// enum class future_errc
@@ -2044,6 +2047,8 @@ inline shared_future<void> future<void>::share() _NOEXCEPT { return shared_futur
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17
# include <chrono>
#endif
diff --git a/contrib/llvm-project/libcxx/include/ios b/contrib/llvm-project/libcxx/include/ios
index d36f5fb2ca28..8465860d08dc 100644
--- a/contrib/llvm-project/libcxx/include/ios
+++ b/contrib/llvm-project/libcxx/include/ios
@@ -242,6 +242,9 @@ storage-class-specifier const error_category& iostream_category() noexcept;
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
typedef ptrdiff_t streamsize;
@@ -820,6 +823,8 @@ _LIBCPP_HIDE_FROM_ABI inline ios_base& defaultfloat(ios_base& __str) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <atomic>
# include <concepts>
diff --git a/contrib/llvm-project/libcxx/include/map b/contrib/llvm-project/libcxx/include/map
index f122f2ebb15b..2edbc0cf6245 100644
--- a/contrib/llvm-project/libcxx/include/map
+++ b/contrib/llvm-project/libcxx/include/map
@@ -617,6 +617,9 @@ erase_if(multimap<Key, T, Compare, Allocator>& c, Predicate pred); // C++20
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Key,
@@ -2182,6 +2185,8 @@ using multimap _LIBCPP_AVAILABILITY_PMR =
_LIBCPP_END_NAMESPACE_STD
#endif
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <concepts>
# include <cstdlib>
diff --git a/contrib/llvm-project/libcxx/include/ostream b/contrib/llvm-project/libcxx/include/ostream
index e2b2c0cbaaf2..180adda201d8 100644
--- a/contrib/llvm-project/libcxx/include/ostream
+++ b/contrib/llvm-project/libcxx/include/ostream
@@ -199,6 +199,9 @@ void vprint_nonunicode(ostream& os, string_view fmt, format_args args);
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _CharT, class _Traits>
@@ -1169,6 +1172,8 @@ println(ostream& __os, format_string<_Args...> __fmt, _Args&&... __args) {
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <atomic>
# include <concepts>
diff --git a/contrib/llvm-project/libcxx/include/queue b/contrib/llvm-project/libcxx/include/queue
index 692e38bb3522..76ef85945662 100644
--- a/contrib/llvm-project/libcxx/include/queue
+++ b/contrib/llvm-project/libcxx/include/queue
@@ -283,6 +283,9 @@ template <class T, class Container, class Compare>
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp, class _Container = deque<_Tp> >
@@ -971,6 +974,8 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator<priority_queue<_Tp, _Container, _Comp
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <concepts>
# include <cstdlib>
diff --git a/contrib/llvm-project/libcxx/include/set b/contrib/llvm-project/libcxx/include/set
index 55ba8f8208be..7f8245f8b605 100644
--- a/contrib/llvm-project/libcxx/include/set
+++ b/contrib/llvm-project/libcxx/include/set
@@ -552,6 +552,9 @@ erase_if(multiset<Key, Compare, Allocator>& c, Predicate pred); // C++20
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Key, class _Compare, class _Allocator>
@@ -1488,6 +1491,8 @@ using multiset _LIBCPP_AVAILABILITY_PMR = std::multiset<_KeyT, _CompareT, polymo
_LIBCPP_END_NAMESPACE_STD
#endif
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <concepts>
# include <cstdlib>
diff --git a/contrib/llvm-project/libcxx/include/stack b/contrib/llvm-project/libcxx/include/stack
index 546380b0aacd..f1f6ee8482fd 100644
--- a/contrib/llvm-project/libcxx/include/stack
+++ b/contrib/llvm-project/libcxx/include/stack
@@ -138,6 +138,9 @@ template <class T, class Container>
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp, class _Container = deque<_Tp> >
@@ -366,6 +369,8 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator<stack<_Tp, _Container>, _Alloc> : pub
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <concepts>
# include <functional>
diff --git a/contrib/llvm-project/libcxx/include/string b/contrib/llvm-project/libcxx/include/string
index e97139206d4f..ba169c3dbfc9 100644
--- a/contrib/llvm-project/libcxx/include/string
+++ b/contrib/llvm-project/libcxx/include/string
@@ -1937,12 +1937,7 @@ private:
return (__s + (__a - 1)) & ~(__a - 1);
}
enum {
- __alignment =
-#ifdef _LIBCPP_ABI_STRING_8_BYTE_ALIGNMENT
- 8
-#else
- 16
-#endif
+ __alignment = 8
};
static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __recommend(size_type __s) _NOEXCEPT {
if (__s < __min_cap) {
diff --git a/contrib/llvm-project/libcxx/include/strstream b/contrib/llvm-project/libcxx/include/strstream
index 7843184e4da4..e20c86baa6df 100644
--- a/contrib/llvm-project/libcxx/include/strstream
+++ b/contrib/llvm-project/libcxx/include/strstream
@@ -139,6 +139,9 @@ private:
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
class _LIBCPP_DEPRECATED _LIBCPP_EXPORTED_FROM_ABI strstreambuf : public streambuf {
@@ -340,4 +343,6 @@ private:
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP_STRSTREAM
diff --git a/contrib/llvm-project/libcxx/include/unordered_map b/contrib/llvm-project/libcxx/include/unordered_map
index 4be25fc1cdd8..2c1782dc879e 100644
--- a/contrib/llvm-project/libcxx/include/unordered_map
+++ b/contrib/llvm-project/libcxx/include/unordered_map
@@ -625,6 +625,9 @@ template <class Key, class T, class Hash, class Pred, class Alloc>
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Key,
@@ -2544,6 +2547,8 @@ using unordered_multimap _LIBCPP_AVAILABILITY_PMR =
_LIBCPP_END_NAMESPACE_STD
#endif
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <algorithm>
# include <bit>
diff --git a/contrib/llvm-project/libcxx/include/unordered_set b/contrib/llvm-project/libcxx/include/unordered_set
index 6414885f4c51..50b616907f00 100644
--- a/contrib/llvm-project/libcxx/include/unordered_set
+++ b/contrib/llvm-project/libcxx/include/unordered_set
@@ -570,6 +570,9 @@ template <class Value, class Hash, class Pred, class Alloc>
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Value, class _Hash, class _Pred, class _Alloc>
@@ -1810,6 +1813,8 @@ using unordered_multiset _LIBCPP_AVAILABILITY_PMR =
_LIBCPP_END_NAMESPACE_STD
#endif
+_LIBCPP_POP_MACROS
+
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <concepts>
# include <cstdlib>
diff --git a/contrib/llvm-project/libcxx/include/version b/contrib/llvm-project/libcxx/include/version
index 9e26da8c1b24..d356976d6454 100644
--- a/contrib/llvm-project/libcxx/include/version
+++ b/contrib/llvm-project/libcxx/include/version
@@ -266,7 +266,9 @@ __cpp_lib_within_lifetime 202306L <type_traits>
# define __cpp_lib_make_reverse_iterator 201402L
# define __cpp_lib_make_unique 201304L
# define __cpp_lib_null_iterators 201304L
-# define __cpp_lib_quoted_string_io 201304L
+# if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+# define __cpp_lib_quoted_string_io 201304L
+# endif
# define __cpp_lib_result_of_sfinae 201210L
# define __cpp_lib_robust_nonmodifying_seq_ops 201304L
# if !defined(_LIBCPP_HAS_NO_THREADS)
@@ -294,7 +296,7 @@ __cpp_lib_within_lifetime 202306L <type_traits>
# define __cpp_lib_clamp 201603L
# define __cpp_lib_enable_shared_from_this 201603L
// # define __cpp_lib_execution 201603L
-# if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY
+# if !defined(_LIBCPP_HAS_NO_FILESYSTEM) && _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY
# define __cpp_lib_filesystem 201703L
# endif
# define __cpp_lib_gcd_lcm 201606L
@@ -323,7 +325,9 @@ __cpp_lib_within_lifetime 202306L <type_traits>
// # define __cpp_lib_parallel_algorithm 201603L
# define __cpp_lib_raw_memory_algorithms 201606L
# define __cpp_lib_sample 201603L
-# define __cpp_lib_scoped_lock 201703L
+# if !defined(_LIBCPP_HAS_NO_THREADS)
+# define __cpp_lib_scoped_lock 201703L
+# endif
# if !defined(_LIBCPP_HAS_NO_THREADS)
# define __cpp_lib_shared_mutex 201505L
# endif
@@ -496,7 +500,9 @@ __cpp_lib_within_lifetime 202306L <type_traits>
// # define __cpp_lib_freestanding_optional 202311L
// # define __cpp_lib_freestanding_string_view 202311L
// # define __cpp_lib_freestanding_variant 202311L
-# define __cpp_lib_fstream_native_handle 202306L
+# if !defined(_LIBCPP_HAS_NO_FILESYSTEM) && !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+# define __cpp_lib_fstream_native_handle 202306L
+# endif
// # define __cpp_lib_function_ref 202306L
// # define __cpp_lib_hazard_pointer 202306L
// # define __cpp_lib_linalg 202311L
diff --git a/contrib/llvm-project/libcxx/modules/std/atomic.inc b/contrib/llvm-project/libcxx/modules/std/atomic.inc
index 5139b7531093..88b31ccdb208 100644
--- a/contrib/llvm-project/libcxx/modules/std/atomic.inc
+++ b/contrib/llvm-project/libcxx/modules/std/atomic.inc
@@ -60,7 +60,9 @@ export namespace std {
using std::atomic_char;
using std::atomic_char16_t;
using std::atomic_char32_t;
+#ifndef _LIBCPP_HAS_NO_CHAR8_T
using std::atomic_char8_t;
+#endif
using std::atomic_int;
using std::atomic_llong;
using std::atomic_long;
diff --git a/contrib/llvm-project/libcxx/modules/std/iosfwd.inc b/contrib/llvm-project/libcxx/modules/std/iosfwd.inc
index ec8b434ca0c5..410fb6aefed8 100644
--- a/contrib/llvm-project/libcxx/modules/std/iosfwd.inc
+++ b/contrib/llvm-project/libcxx/modules/std/iosfwd.inc
@@ -14,7 +14,9 @@ export namespace std {
#endif
using std::u16streampos;
using std::u32streampos;
+#ifndef _LIBCPP_HAS_NO_CHAR8_T
using std::u8streampos;
+#endif
using std::basic_osyncstream;
using std::basic_syncbuf;
diff --git a/contrib/llvm-project/libcxx/modules/std/string.inc b/contrib/llvm-project/libcxx/modules/std/string.inc
index c83ee7643f87..9808a96215a1 100644
--- a/contrib/llvm-project/libcxx/modules/std/string.inc
+++ b/contrib/llvm-project/libcxx/modules/std/string.inc
@@ -34,7 +34,9 @@ export namespace std {
using std::string;
using std::u16string;
using std::u32string;
+#ifndef _LIBCPP_HAS_NO_CHAR8_T
using std::u8string;
+#endif
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
using std::wstring;
#endif
@@ -58,7 +60,9 @@ export namespace std {
using std::pmr::string;
using std::pmr::u16string;
using std::pmr::u32string;
+#ifndef _LIBCPP_HAS_NO_CHAR8_T
using std::pmr::u8string;
+#endif
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
using std::pmr::wstring;
#endif
diff --git a/contrib/llvm-project/libcxx/modules/std/string_view.inc b/contrib/llvm-project/libcxx/modules/std/string_view.inc
index 1fa63a773953..f4f9d80ddb83 100644
--- a/contrib/llvm-project/libcxx/modules/std/string_view.inc
+++ b/contrib/llvm-project/libcxx/modules/std/string_view.inc
@@ -27,7 +27,9 @@ export namespace std {
using std::string_view;
using std::u16string_view;
using std::u32string_view;
+#ifndef _LIBCPP_HAS_NO_CHAR8_T
using std::u8string_view;
+#endif
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
using std::wstring_view;
#endif
diff --git a/contrib/llvm-project/lld/ELF/Arch/RISCV.cpp b/contrib/llvm-project/lld/ELF/Arch/RISCV.cpp
index d7d3d3e47814..8ce92b4badfb 100644
--- a/contrib/llvm-project/lld/ELF/Arch/RISCV.cpp
+++ b/contrib/llvm-project/lld/ELF/Arch/RISCV.cpp
@@ -61,6 +61,7 @@ enum Op {
AUIPC = 0x17,
JALR = 0x67,
LD = 0x3003,
+ LUI = 0x37,
LW = 0x2003,
SRLI = 0x5013,
SUB = 0x40000033,
@@ -73,6 +74,7 @@ enum Reg {
X_T0 = 5,
X_T1 = 6,
X_T2 = 7,
+ X_A0 = 10,
X_T3 = 28,
};
@@ -102,6 +104,26 @@ static uint32_t setLO12_S(uint32_t insn, uint32_t imm) {
(extractBits(imm, 4, 0) << 7);
}
+namespace {
+struct SymbolAnchor {
+ uint64_t offset;
+ Defined *d;
+ bool end; // true for the anchor of st_value+st_size
+};
+} // namespace
+
+struct elf::RISCVRelaxAux {
+ // This records symbol start and end offsets which will be adjusted according
+ // to the nearest relocDeltas element.
+ SmallVector<SymbolAnchor, 0> anchors;
+ // For relocations[i], the actual offset is
+ // r_offset - (i ? relocDeltas[i-1] : 0).
+ std::unique_ptr<uint32_t[]> relocDeltas;
+ // For relocations[i], the actual type is relocTypes[i].
+ std::unique_ptr<RelType[]> relocTypes;
+ SmallVector<uint32_t, 0> writes;
+};
+
RISCV::RISCV() {
copyRel = R_RISCV_COPY;
pltRel = R_RISCV_JUMP_SLOT;
@@ -119,6 +141,7 @@ RISCV::RISCV() {
tlsGotRel = R_RISCV_TLS_TPREL32;
}
gotRel = symbolicRel;
+ tlsDescRel = R_RISCV_TLSDESC;
// .got[0] = _DYNAMIC
gotHeaderEntriesNum = 1;
@@ -187,6 +210,8 @@ int64_t RISCV::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_RISCV_JUMP_SLOT:
// These relocations are defined as not having an implicit addend.
return 0;
+ case R_RISCV_TLSDESC:
+ return config->is64 ? read64le(buf + 8) : read32le(buf + 4);
}
}
@@ -295,6 +320,12 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
case R_RISCV_PCREL_LO12_I:
case R_RISCV_PCREL_LO12_S:
return R_RISCV_PC_INDIRECT;
+ case R_RISCV_TLSDESC_HI20:
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ case R_RISCV_TLSDESC_ADD_LO12:
+ return R_TLSDESC_PC;
+ case R_RISCV_TLSDESC_CALL:
+ return R_TLSDESC_CALL;
case R_RISCV_TLS_GD_HI20:
return R_TLSGD_PC;
case R_RISCV_TLS_GOT_HI20:
@@ -419,6 +450,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_RISCV_GOT_HI20:
case R_RISCV_PCREL_HI20:
+ case R_RISCV_TLSDESC_HI20:
case R_RISCV_TLS_GD_HI20:
case R_RISCV_TLS_GOT_HI20:
case R_RISCV_TPREL_HI20:
@@ -430,6 +462,8 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
}
case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ case R_RISCV_TLSDESC_ADD_LO12:
case R_RISCV_TPREL_LO12_I:
case R_RISCV_LO12_I: {
uint64_t hi = (val + 0x800) >> 12;
@@ -513,32 +547,133 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
break;
case R_RISCV_RELAX:
- return; // Ignored (for now)
-
+ return;
+ case R_RISCV_TLSDESC:
+ // The addend is stored in the second word.
+ if (config->is64)
+ write64le(loc + 8, val);
+ else
+ write32le(loc + 4, val);
+ break;
default:
llvm_unreachable("unknown relocation");
}
}
+static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
+ return i + 1 != relocs.size() && relocs[i + 1].type == R_RISCV_RELAX;
+}
+
+static void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) {
+ switch (rel.type) {
+ case R_RISCV_TLSDESC_HI20:
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ write32le(loc, 0x00000013); // nop
+ break;
+ case R_RISCV_TLSDESC_ADD_LO12:
+ write32le(loc, utype(AUIPC, X_A0, hi20(val))); // auipc a0,<hi20>
+ break;
+ case R_RISCV_TLSDESC_CALL:
+ if (config->is64)
+ write32le(loc, itype(LD, X_A0, X_A0, lo12(val))); // ld a0,<lo12>(a0)
+ else
+ write32le(loc, itype(LW, X_A0, X_A0, lo12(val))); // lw a0,<lo12>(a0)
+ break;
+ default:
+ llvm_unreachable("unsupported relocation for TLSDESC to IE");
+ }
+}
+
+static void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
+ switch (rel.type) {
+ case R_RISCV_TLSDESC_HI20:
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ write32le(loc, 0x00000013); // nop
+ return;
+ case R_RISCV_TLSDESC_ADD_LO12:
+ if (isInt<12>(val))
+ write32le(loc, 0x00000013); // nop
+ else
+ write32le(loc, utype(LUI, X_A0, hi20(val))); // lui a0,<hi20>
+ return;
+ case R_RISCV_TLSDESC_CALL:
+ if (isInt<12>(val))
+ write32le(loc, itype(ADDI, X_A0, 0, val)); // addi a0,zero,<lo12>
+ else
+ write32le(loc, itype(ADDI, X_A0, X_A0, lo12(val))); // addi a0,a0,<lo12>
+ return;
+ default:
+ llvm_unreachable("unsupported relocation for TLSDESC to LE");
+ }
+}
+
void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
uint64_t secAddr = sec.getOutputSection()->addr;
if (auto *s = dyn_cast<InputSection>(&sec))
secAddr += s->outSecOff;
else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
secAddr += ehIn->getParent()->outSecOff;
- for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) {
- const Relocation &rel = sec.relocs()[i];
+ uint64_t tlsdescVal = 0;
+ bool tlsdescRelax = false, isToLe = false;
+ const ArrayRef<Relocation> relocs = sec.relocs();
+ for (size_t i = 0, size = relocs.size(); i != size; ++i) {
+ const Relocation &rel = relocs[i];
uint8_t *loc = buf + rel.offset;
- const uint64_t val =
+ uint64_t val =
sec.getRelocTargetVA(sec.file, rel.type, rel.addend,
secAddr + rel.offset, *rel.sym, rel.expr);
switch (rel.expr) {
case R_RELAX_HINT:
+ continue;
+ case R_TLSDESC_PC:
+ // For R_RISCV_TLSDESC_HI20, store &got(sym)-PC to be used by the
+ // following two instructions L[DW] and ADDI.
+ if (rel.type == R_RISCV_TLSDESC_HI20)
+ tlsdescVal = val;
+ else
+ val = tlsdescVal;
break;
+ case R_RELAX_TLS_GD_TO_IE:
+ // Only R_RISCV_TLSDESC_HI20 reaches here. tlsdescVal will be finalized
+ // after we see R_RISCV_TLSDESC_ADD_LO12 in the R_RELAX_TLS_GD_TO_LE case.
+ // The net effect is that tlsdescVal will be smaller than `val` to take
+ // into account of NOP instructions (in the absence of R_RISCV_RELAX)
+ // before AUIPC.
+ tlsdescVal = val + rel.offset;
+ isToLe = false;
+ tlsdescRelax = relaxable(relocs, i);
+ if (!tlsdescRelax)
+ tlsdescToIe(loc, rel, val);
+ continue;
+ case R_RELAX_TLS_GD_TO_LE:
+ // See the comment in handleTlsRelocation. For TLSDESC=>IE,
+ // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12,CALL} also reach here. If isToIe is
+ // true, this is actually TLSDESC=>IE optimization.
+ if (rel.type == R_RISCV_TLSDESC_HI20) {
+ tlsdescVal = val;
+ isToLe = true;
+ tlsdescRelax = relaxable(relocs, i);
+ } else {
+ if (!isToLe && rel.type == R_RISCV_TLSDESC_ADD_LO12)
+ tlsdescVal -= rel.offset;
+ val = tlsdescVal;
+ }
+ // When NOP conversion is eligible and relaxation applies, don't write a
+ // NOP in case an unrelated instruction follows the current instruction.
+ if (tlsdescRelax &&
+ (rel.type == R_RISCV_TLSDESC_HI20 ||
+ rel.type == R_RISCV_TLSDESC_LOAD_LO12 ||
+ (rel.type == R_RISCV_TLSDESC_ADD_LO12 && isToLe && !hi20(val))))
+ continue;
+ if (isToLe)
+ tlsdescToLe(loc, rel, val);
+ else
+ tlsdescToIe(loc, rel, val);
+ continue;
case R_RISCV_LEB128:
if (i + 1 < size) {
- const Relocation &rel1 = sec.relocs()[i + 1];
+ const Relocation &rel1 = relocs[i + 1];
if (rel.type == R_RISCV_SET_ULEB128 &&
rel1.type == R_RISCV_SUB_ULEB128 && rel.offset == rel1.offset) {
auto val = rel.sym->getVA(rel.addend) - rel1.sym->getVA(rel1.addend);
@@ -554,32 +689,12 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128");
return;
default:
- relocate(loc, rel, val);
break;
}
+ relocate(loc, rel, val);
}
}
-namespace {
-struct SymbolAnchor {
- uint64_t offset;
- Defined *d;
- bool end; // true for the anchor of st_value+st_size
-};
-} // namespace
-
-struct elf::RISCVRelaxAux {
- // This records symbol start and end offsets which will be adjusted according
- // to the nearest relocDeltas element.
- SmallVector<SymbolAnchor, 0> anchors;
- // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] :
- // 0).
- std::unique_ptr<uint32_t[]> relocDeltas;
- // For relocations[i], the actual type is relocTypes[i].
- std::unique_ptr<RelType[]> relocTypes;
- SmallVector<uint32_t, 0> writes;
-};
-
static void initSymbolAnchors() {
SmallVector<InputSection *, 0> storage;
for (OutputSection *osec : outputSections) {
@@ -715,14 +830,16 @@ static void relaxHi20Lo12(const InputSection &sec, size_t i, uint64_t loc,
static bool relax(InputSection &sec) {
const uint64_t secAddr = sec.getVA();
+ const MutableArrayRef<Relocation> relocs = sec.relocs();
auto &aux = *sec.relaxAux;
bool changed = false;
ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
uint64_t delta = 0;
+ bool tlsdescRelax = false, toLeShortForm = false;
- std::fill_n(aux.relocTypes.get(), sec.relocs().size(), R_RISCV_NONE);
+ std::fill_n(aux.relocTypes.get(), relocs.size(), R_RISCV_NONE);
aux.writes.clear();
- for (auto [i, r] : llvm::enumerate(sec.relocs())) {
+ for (auto [i, r] : llvm::enumerate(relocs)) {
const uint64_t loc = secAddr + r.offset - delta;
uint32_t &cur = aux.relocDeltas[i], remove = 0;
switch (r.type) {
@@ -743,25 +860,37 @@ static bool relax(InputSection &sec) {
}
case R_RISCV_CALL:
case R_RISCV_CALL_PLT:
- if (i + 1 != sec.relocs().size() &&
- sec.relocs()[i + 1].type == R_RISCV_RELAX)
+ if (relaxable(relocs, i))
relaxCall(sec, i, loc, r, remove);
break;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
case R_RISCV_TPREL_LO12_I:
case R_RISCV_TPREL_LO12_S:
- if (i + 1 != sec.relocs().size() &&
- sec.relocs()[i + 1].type == R_RISCV_RELAX)
+ if (relaxable(relocs, i))
relaxTlsLe(sec, i, loc, r, remove);
break;
case R_RISCV_HI20:
case R_RISCV_LO12_I:
case R_RISCV_LO12_S:
- if (i + 1 != sec.relocs().size() &&
- sec.relocs()[i + 1].type == R_RISCV_RELAX)
+ if (relaxable(relocs, i))
relaxHi20Lo12(sec, i, loc, r, remove);
break;
+ case R_RISCV_TLSDESC_HI20:
+ // For TLSDESC=>LE, we can use the short form if hi20 is zero.
+ tlsdescRelax = relaxable(relocs, i);
+ toLeShortForm = tlsdescRelax && r.expr == R_RELAX_TLS_GD_TO_LE &&
+ !hi20(r.sym->getVA(r.addend));
+ [[fallthrough]];
+ case R_RISCV_TLSDESC_LOAD_LO12:
+ // For TLSDESC=>LE/IE, AUIPC and L[DW] are removed if relaxable.
+ if (tlsdescRelax && r.expr != R_TLSDESC_PC)
+ remove = 4;
+ break;
+ case R_RISCV_TLSDESC_ADD_LO12:
+ if (toLeShortForm)
+ remove = 4;
+ break;
}
// For all anchors whose offsets are <= r.offset, they are preceded by
diff --git a/contrib/llvm-project/lld/ELF/InputFiles.cpp b/contrib/llvm-project/lld/ELF/InputFiles.cpp
index 75e5ee1d0da4..a292e873e72f 100644
--- a/contrib/llvm-project/lld/ELF/InputFiles.cpp
+++ b/contrib/llvm-project/lld/ELF/InputFiles.cpp
@@ -1788,7 +1788,12 @@ void BinaryFile::parse() {
}
InputFile *elf::createInternalFile(StringRef name) {
- return make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name));
+ auto *file =
+ make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name));
+ // References from an internal file do not lead to --warn-backrefs
+ // diagnostics.
+ file->groupId = 0;
+ return file;
}
ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName,
diff --git a/contrib/llvm-project/lld/ELF/InputSection.cpp b/contrib/llvm-project/lld/ELF/InputSection.cpp
index c728dd6c6306..0e0b9783bd88 100644
--- a/contrib/llvm-project/lld/ELF/InputSection.cpp
+++ b/contrib/llvm-project/lld/ELF/InputSection.cpp
@@ -961,12 +961,11 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
// vector. The computed value is st_value plus a non-negative offset.
// Negative values are invalid, so -1 can be used as the tombstone value.
//
- // If the referenced symbol is discarded (made Undefined), or the
- // section defining the referenced symbol is garbage collected,
- // sym.getOutputSection() is nullptr. `ds->folded` catches the ICF folded
- // case. However, resolving a relocation in .debug_line to -1 would stop
- // debugger users from setting breakpoints on the folded-in function, so
- // exclude .debug_line.
+ // If the referenced symbol is relative to a discarded section (due to
+ // --gc-sections, COMDAT, etc), it has been converted to a Undefined.
+ // `ds->folded` catches the ICF folded case. However, resolving a
+ // relocation in .debug_line to -1 would stop debugger users from setting
+ // breakpoints on the folded-in function, so exclude .debug_line.
//
// For pre-DWARF-v5 .debug_loc and .debug_ranges, -1 is a reserved value
// (base address selection entry), use 1 (which is used by GNU ld for
@@ -974,7 +973,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
//
// TODO To reduce disruption, we use 0 instead of -1 as the tombstone
// value. Enable -1 in a future release.
- if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) {
+ if (!ds || (ds->folded && !isDebugLine)) {
// If -z dead-reloc-in-nonalloc= is specified, respect it.
uint64_t value = SignExtend64<bits>(*tombstone);
// For a 32-bit local TU reference in .debug_names, X86_64::relocate
diff --git a/contrib/llvm-project/lld/ELF/Relocations.cpp b/contrib/llvm-project/lld/ELF/Relocations.cpp
index b6a317bc3b6d..79c8230724ad 100644
--- a/contrib/llvm-project/lld/ELF/Relocations.cpp
+++ b/contrib/llvm-project/lld/ELF/Relocations.cpp
@@ -1274,29 +1274,34 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
if (config->emachine == EM_MIPS)
return handleMipsTlsRelocation(type, sym, c, offset, addend, expr);
+ bool isRISCV = config->emachine == EM_RISCV;
if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
R_TLSDESC_GOTPLT>(expr) &&
config->shared) {
+ // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not
+ // set NEEDS_TLSDESC on the label.
if (expr != R_TLSDESC_CALL) {
- sym.setFlags(NEEDS_TLSDESC);
+ if (!isRISCV || type == R_RISCV_TLSDESC_HI20)
+ sym.setFlags(NEEDS_TLSDESC);
c.addReloc({expr, type, offset, addend, &sym});
}
return 1;
}
// ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
- // relaxation.
+ // optimizations.
+ // RISC-V supports TLSDESC to IE/LE optimizations.
// For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
- // relaxation as well.
- bool toExecRelax = !config->shared && config->emachine != EM_ARM &&
- config->emachine != EM_HEXAGON &&
- config->emachine != EM_LOONGARCH &&
- config->emachine != EM_RISCV &&
- !c.file->ppc64DisableTLSRelax;
+ // optimization as well.
+ bool execOptimize =
+ !config->shared && config->emachine != EM_ARM &&
+ config->emachine != EM_HEXAGON && config->emachine != EM_LOONGARCH &&
+ !(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) &&
+ !c.file->ppc64DisableTLSRelax;
// If we are producing an executable and the symbol is non-preemptable, it
- // must be defined and the code sequence can be relaxed to use Local-Exec.
+ // must be defined and the code sequence can be optimized to use Local-Exec.
//
// ARM and RISC-V do not support any relaxations for TLS relocations, however,
// we can omit the DTPMOD dynamic relocations and resolve them at link time
@@ -1309,8 +1314,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
// module index, with a special value of 0 for the current module. GOT[e1] is
// unused. There only needs to be one module index entry.
if (oneof<R_TLSLD_GOT, R_TLSLD_GOTPLT, R_TLSLD_PC, R_TLSLD_HINT>(expr)) {
- // Local-Dynamic relocs can be relaxed to Local-Exec.
- if (toExecRelax) {
+ // Local-Dynamic relocs can be optimized to Local-Exec.
+ if (execOptimize) {
c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE), type,
offset, addend, &sym});
return target->getTlsGdRelaxSkip(type);
@@ -1322,16 +1327,17 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
return 1;
}
- // Local-Dynamic relocs can be relaxed to Local-Exec.
+ // Local-Dynamic relocs can be optimized to Local-Exec.
if (expr == R_DTPREL) {
- if (toExecRelax)
+ if (execOptimize)
expr = target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE);
c.addReloc({expr, type, offset, addend, &sym});
return 1;
}
// Local-Dynamic sequence where offset of tls variable relative to dynamic
- // thread pointer is stored in the got. This cannot be relaxed to Local-Exec.
+ // thread pointer is stored in the got. This cannot be optimized to
+ // Local-Exec.
if (expr == R_TLSLD_GOT_OFF) {
sym.setFlags(NEEDS_GOT_DTPREL);
c.addReloc({expr, type, offset, addend, &sym});
@@ -1341,14 +1347,18 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC,
R_LOONGARCH_TLSGD_PAGE_PC>(expr)) {
- if (!toExecRelax) {
+ if (!execOptimize) {
sym.setFlags(NEEDS_TLSGD);
c.addReloc({expr, type, offset, addend, &sym});
return 1;
}
- // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec
+ // Global-Dynamic/TLSDESC can be optimized to Initial-Exec or Local-Exec
// depending on the symbol being locally defined or not.
+ //
+ // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a non-preemptible
+ // label, so the LE optimization will be categorized as
+ // R_RELAX_TLS_GD_TO_LE. We fix the categorization in RISCV::relocateAlloc.
if (sym.isPreemptible) {
sym.setFlags(NEEDS_TLSGD_TO_IE);
c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type,
@@ -1363,9 +1373,9 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
if (oneof<R_GOT, R_GOTPLT, R_GOT_PC, R_AARCH64_GOT_PAGE_PC,
R_LOONGARCH_GOT_PAGE_PC, R_GOT_OFF, R_TLSIE_HINT>(expr)) {
ctx.hasTlsIe.store(true, std::memory_order_relaxed);
- // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally
- // defined.
- if (toExecRelax && isLocalInExecutable) {
+ // Initial-Exec relocs can be optimized to Local-Exec if the symbol is
+ // locally defined.
+ if (execOptimize && isLocalInExecutable) {
c.addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
} else if (expr != R_TLSIE_HINT) {
sym.setFlags(NEEDS_TLSIE);
@@ -1463,7 +1473,7 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) {
in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
}
- // Process TLS relocations, including relaxing TLS relocations. Note that
+ // Process TLS relocations, including TLS optimizations. Note that
// R_TPREL and R_TPREL_NEG relocations are resolved in processAux.
if (sym.isTls()) {
if (unsigned processed =
diff --git a/contrib/llvm-project/lld/ELF/Writer.cpp b/contrib/llvm-project/lld/ELF/Writer.cpp
index 6f66f3615fa4..501c10f35849 100644
--- a/contrib/llvm-project/lld/ELF/Writer.cpp
+++ b/contrib/llvm-project/lld/ELF/Writer.cpp
@@ -1518,12 +1518,12 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
if (auto *osd = dyn_cast<OutputDesc>(cmd))
osd->osec.sortRank = getSectionRank(osd->osec);
if (!script->hasSectionsCommand) {
- // We know that all the OutputSections are contiguous in this case.
- auto isSection = [](SectionCommand *cmd) { return isa<OutputDesc>(cmd); };
- std::stable_sort(
- llvm::find_if(script->sectionCommands, isSection),
- llvm::find_if(llvm::reverse(script->sectionCommands), isSection).base(),
- compareSections);
+ // OutputDescs are mostly contiguous, but may be interleaved with
+ // SymbolAssignments in the presence of INSERT commands.
+ auto mid = std::stable_partition(
+ script->sectionCommands.begin(), script->sectionCommands.end(),
+ [](SectionCommand *cmd) { return isa<OutputDesc>(cmd); });
+ std::stable_sort(script->sectionCommands.begin(), mid, compareSections);
}
// Process INSERT commands and update output section attributes. From this
diff --git a/contrib/llvm-project/lld/docs/ReleaseNotes.rst b/contrib/llvm-project/lld/docs/ReleaseNotes.rst
index 01669543cd50..fa0e7f2bc0b3 100644
--- a/contrib/llvm-project/lld/docs/ReleaseNotes.rst
+++ b/contrib/llvm-project/lld/docs/ReleaseNotes.rst
@@ -29,8 +29,50 @@ ELF Improvements
* ``--fat-lto-objects`` option is added to support LLVM FatLTO.
Without ``--fat-lto-objects``, LLD will link LLVM FatLTO objects using the
relocatable object file. (`D146778 <https://reviews.llvm.org/D146778>`_)
+* ``-Bsymbolic-non-weak`` is added to directly bind non-weak definitions.
+ (`D158322 <https://reviews.llvm.org/D158322>`_)
+* ``--lto-validate-all-vtables-have-type-infos``, which complements
+ ``--lto-whole-program-visibility``, is added to disable unsafe whole-program
+ devirtualization. ``--lto-known-safe-vtables=<glob>`` can be used
+ to mark known-safe vtable symbols.
+ (`D155659 <https://reviews.llvm.org/D155659>`_)
+* ``--save-temps --lto-emit-asm`` now derives ELF/asm file names from bitcode file names.
+ ``ld.lld --save-temps a.o d/b.o -o out`` will create ELF relocatable files
+ ``out.lto.a.o``/``d/out.lto.b.o`` instead of ``out1.lto.o``/``out2.lto.o``.
+ (`#78835 <https://github.com/llvm/llvm-project/pull/78835>`_)
+* ``--no-allow-shlib-undefined`` now reports errors for DSO referencing
+ non-exported definitions.
+ (`#70769 <https://github.com/llvm/llvm-project/pull/70769>`_)
* common-page-size can now be larger than the system page-size.
(`#57618 <https://github.com/llvm/llvm-project/issues/57618>`_)
+* When call graph profile information is available due to instrumentation or
+ sample PGO, input sections are now sorted using the new ``cdsort`` algorithm,
+ better than the previous ``hfsort`` algorithm.
+ (`D152840 <https://reviews.llvm.org/D152840>`_)
+* Symbol assignments like ``a = DEFINED(a) ? a : 0;`` are now handled.
+ (`#65866 <https://github.com/llvm/llvm-project/pull/65866>`_)
+* ``OVERLAY`` now supports optional start address and LMA
+ (`#77272 <https://github.com/llvm/llvm-project/pull/77272>`_)
+* Relocations referencing a symbol defined in ``/DISCARD/`` section now lead to
+ an error.
+ (`#69295 <https://github.com/llvm/llvm-project/pull/69295>`_)
+* For AArch64 MTE, global variable descriptors have been implemented.
+ (`D152921 <https://reviews.llvm.org/D152921>`_)
+* ``R_AARCH64_GOTPCREL32`` is now supported.
+ (`#72584 <https://github.com/llvm/llvm-project/pull/72584>`_)
+* ``R_LARCH_PCREL20_S2``/``R_LARCH_ADD6``/``R_LARCH_CALL36`` and extreme code
+ model relocations are now supported.
+* ``--emit-relocs`` is now supported for RISC-V linker relaxation.
+ (`D159082 <https://reviews.llvm.org/D159082>`_)
+* Call relaxation respects RVC when mixing +c and -c relocatable files.
+ (`#73977 <https://github.com/llvm/llvm-project/pull/73977>`_)
+* ``R_RISCV_GOT32_PCREL`` is now supported.
+ (`#72587 <https://github.com/llvm/llvm-project/pull/72587>`_)
+* ``R_RISCV_SET_ULEB128``/``R_RISCV_SUB_ULEB128`` relocations are now supported.
+ (`#72610 <https://github.com/llvm/llvm-project/pull/72610>`_)
+ (`#77261 <https://github.com/llvm/llvm-project/pull/77261>`_)
+* RISC-V TLSDESC is now supported.
+ (`#79239 <https://github.com/llvm/llvm-project/pull/79239>`_)
Breaking changes
----------------
@@ -41,9 +83,29 @@ COFF Improvements
* Added support for ``--time-trace`` and associated ``--time-trace-granularity``.
This generates a .json profile trace of the linker execution.
+* LLD now prefers library paths specified with ``-libpath:`` over the implicitly
+ detected toolchain paths.
+
MinGW Improvements
------------------
+* Added support for many LTO and ThinLTO options (most LTO options supported
+ by the ELF driver, that are implemented by the COFF backend as well,
+ should be supported now).
+
+* LLD no longer tries to autodetect and use library paths from MSVC/WinSDK
+ installations when run in MinGW mode; that mode of operation shouldn't
+ ever be needed in MinGW mode, and could be a source of unexpected
+ behaviours.
+
+* The ``--icf=safe`` option now works as expected; it was previously a no-op.
+
+* More correctly handle LTO of files that define ``__imp_`` prefixed dllimport
+ redirections.
+
+* The strip flags ``-S`` and ``-s`` now can be used to strip out DWARF debug
+ info and symbol tables while emitting a PDB debug info file.
+
MachO Improvements
------------------
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
index d6f732d35fd4..e8e4f491be5a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -287,6 +287,10 @@ public:
/// store %l, ...
bool MayBeCrossIteration = false;
+ /// Whether alias analysis is allowed to use the dominator tree, for use by
+ /// passes that lazily update the DT while performing AA queries.
+ bool UseDominatorTree = true;
+
AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {}
};
@@ -668,6 +672,9 @@ public:
void enableCrossIterationMode() {
AAQI.MayBeCrossIteration = true;
}
+
+ /// Disable the use of the dominator tree during alias analysis queries.
+ void disableDominatorTree() { AAQI.UseDominatorTree = false; }
};
/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index afc1811239f2..7eca82729430 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -43,20 +43,26 @@ class BasicAAResult : public AAResultBase {
const Function &F;
const TargetLibraryInfo &TLI;
AssumptionCache &AC;
- DominatorTree *DT;
+ /// Use getDT() instead of accessing this member directly, in order to
+ /// respect the AAQI.UseDominatorTree option.
+ DominatorTree *DT_;
+
+ DominatorTree *getDT(const AAQueryInfo &AAQI) const {
+ return AAQI.UseDominatorTree ? DT_ : nullptr;
+ }
public:
BasicAAResult(const DataLayout &DL, const Function &F,
const TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree *DT = nullptr)
- : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT) {}
+ : DL(DL), F(F), TLI(TLI), AC(AC), DT_(DT) {}
BasicAAResult(const BasicAAResult &Arg)
: AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC),
- DT(Arg.DT) {}
+ DT_(Arg.DT_) {}
BasicAAResult(BasicAAResult &&Arg)
: AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI),
- AC(Arg.AC), DT(Arg.DT) {}
+ AC(Arg.AC), DT_(Arg.DT_) {}
/// Handle invalidation events in the new pass manager.
bool invalidate(Function &Fn, const PreservedAnalyses &PA,
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 6b9d17818201..91e1872e9bd6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -122,16 +122,23 @@ public:
}
BranchProbabilityInfo(BranchProbabilityInfo &&Arg)
- : Probs(std::move(Arg.Probs)), LastF(Arg.LastF),
- EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {}
+ : Handles(std::move(Arg.Handles)), Probs(std::move(Arg.Probs)),
+ LastF(Arg.LastF),
+ EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {
+ for (auto &Handle : Handles)
+ Handle.setBPI(this);
+ }
BranchProbabilityInfo(const BranchProbabilityInfo &) = delete;
BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete;
BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) {
releaseMemory();
+ Handles = std::move(RHS.Handles);
Probs = std::move(RHS.Probs);
EstimatedBlockWeight = std::move(RHS.EstimatedBlockWeight);
+ for (auto &Handle : Handles)
+ Handle.setBPI(this);
return *this;
}
@@ -279,6 +286,8 @@ private:
}
public:
+ void setBPI(BranchProbabilityInfo *BPI) { this->BPI = BPI; }
+
BasicBlockCallbackVH(const Value *V, BranchProbabilityInfo *BPI = nullptr)
: CallbackVH(const_cast<Value *>(V)), BPI(BPI) {}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h
index 2880ed33a34c..0926093bba99 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h
@@ -18,7 +18,7 @@
namespace llvm {
-class AAResults;
+class BatchAAResults;
class AssumptionCache;
class DataLayout;
class DominatorTree;
@@ -129,11 +129,10 @@ extern cl::opt<unsigned> DefMaxInstsToScan;
/// location in memory, as opposed to the value operand of a store.
///
/// \returns The found value, or nullptr if no value is found.
-Value *FindAvailableLoadedValue(LoadInst *Load,
- BasicBlock *ScanBB,
+Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan = DefMaxInstsToScan,
- AAResults *AA = nullptr,
+ BatchAAResults *AA = nullptr,
bool *IsLoadCSE = nullptr,
unsigned *NumScanedInst = nullptr);
@@ -141,7 +140,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
/// FindAvailableLoadedValue() for the case where we are not interested in
/// finding the closest clobbering instruction if no available load is found.
/// This overload cannot be used to scan across multiple blocks.
-Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
+Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
+ bool *IsLoadCSE,
unsigned MaxInstsToScan = DefMaxInstsToScan);
/// Scan backwards to see if we have the value of the given pointer available
@@ -170,7 +170,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy,
bool AtLeastAtomic, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
- unsigned MaxInstsToScan, AAResults *AA,
+ unsigned MaxInstsToScan, BatchAAResults *AA,
bool *IsLoadCSE, unsigned *NumScanedInst);
/// Returns true if a pointer value \p A can be replace with another pointer
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
index f09e12f3038c..07edf68c667a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
@@ -771,8 +771,8 @@ TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8")
-TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4")
+TLI_DEFINE_VECFUNC("modf", "_ZGVsNxvl8_modf", SCALABLE(2), NOMASK, "_ZGVsNxvl8")
+TLI_DEFINE_VECFUNC("modff", "_ZGVsNxvl4_modff", SCALABLE(4), NOMASK, "_ZGVsNxvl4")
TLI_DEFINE_VECFUNC("nextafter", "_ZGVsMxvv_nextafter", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("nextafterf", "_ZGVsMxvv_nextafterf", SCALABLE(4), MASKED, "_ZGVsMxvv")
@@ -787,11 +787,11 @@ TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("sincos", "_ZGVsMxvl8l8_sincos", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
-TLI_DEFINE_VECFUNC("sincosf", "_ZGVsMxvl4l4_sincosf", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+TLI_DEFINE_VECFUNC("sincos", "_ZGVsNxvl8l8_sincos", SCALABLE(2), NOMASK, "_ZGVsNxvl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "_ZGVsNxvl4l4_sincosf", SCALABLE(4), NOMASK, "_ZGVsNxvl4l4")
-TLI_DEFINE_VECFUNC("sincospi", "_ZGVsMxvl8l8_sincospi", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
-TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+TLI_DEFINE_VECFUNC("sincospi", "_ZGVsNxvl8l8_sincospi", SCALABLE(2), NOMASK, "_ZGVsNxvl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "_ZGVsNxvl4l4_sincospif", SCALABLE(4), NOMASK, "_ZGVsNxvl4l4")
TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -1005,8 +1005,6 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED, "
TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8")
TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4")
-TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8")
-TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4")
TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
@@ -1035,13 +1033,9 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_Z
TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
-TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
-TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
-TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
-TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h
index 76bb34d270a2..1d40b1cbb0ea 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h
@@ -193,11 +193,18 @@ void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs);
void computeAndAddLiveIns(LivePhysRegs &LiveRegs,
MachineBasicBlock &MBB);
-/// Convenience function for recomputing live-in's for \p MBB.
-static inline void recomputeLiveIns(MachineBasicBlock &MBB) {
+/// Convenience function for recomputing live-in's for a MBB. Returns true if
+/// any changes were made.
+static inline bool recomputeLiveIns(MachineBasicBlock &MBB) {
LivePhysRegs LPR;
+ auto oldLiveIns = MBB.getLiveIns();
+
MBB.clearLiveIns();
computeAndAddLiveIns(LPR, MBB);
+ MBB.sortUniqueLiveIns();
+
+ auto newLiveIns = MBB.getLiveIns();
+ return oldLiveIns != newLiveIns;
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index c84fd281c6a5..dc2035fa598c 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -111,6 +111,10 @@ public:
RegisterMaskPair(MCPhysReg PhysReg, LaneBitmask LaneMask)
: PhysReg(PhysReg), LaneMask(LaneMask) {}
+
+ bool operator==(const RegisterMaskPair &other) const {
+ return PhysReg == other.PhysReg && LaneMask == other.LaneMask;
+ }
};
private:
@@ -473,6 +477,8 @@ public:
/// Remove entry from the livein set and return iterator to the next.
livein_iterator removeLiveIn(livein_iterator I);
+ std::vector<RegisterMaskPair> getLiveIns() const { return LiveIns; }
+
class liveout_iterator {
public:
using iterator_category = std::input_iterator_tag;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index e6db9da5526a..c5f43d17d1c1 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2601,6 +2601,11 @@ def int_amdgcn_ds_bvh_stack_rtn :
[ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
+def int_amdgcn_s_wait_event_export_ready :
+ ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">,
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]
+>;
+
// WMMA (Wave Matrix Multiply-Accumulate) intrinsics
//
// These operations perform a matrix multiplication and accumulation of
@@ -2608,10 +2613,10 @@ def int_amdgcn_ds_bvh_stack_rtn :
class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> :
Intrinsic<
- [CD], // %D
+ [CD], // %D
[
AB, // %A
- AB, // %B
+ LLVMMatchType<1>, // %B
LLVMMatchType<0>, // %C
],
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]
@@ -2619,49 +2624,50 @@ class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> :
class AMDGPUWmmaIntrinsicOPSEL<LLVMType AB, LLVMType CD> :
Intrinsic<
- [CD], // %D
+ [CD], // %D
[
AB, // %A
- AB, // %B
+ LLVMMatchType<1>, // %B
LLVMMatchType<0>, // %C
- llvm_i1_ty, // %high
+ llvm_i1_ty, // %high (op_sel) for GFX11, 0 for GFX12
],
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
class AMDGPUWmmaIntrinsicIU<LLVMType AB, LLVMType CD> :
Intrinsic<
- [CD], // %D
+ [CD], // %D
[
llvm_i1_ty, // %A_sign
AB, // %A
llvm_i1_ty, // %B_sign
- AB, // %B
+ LLVMMatchType<1>, // %B
LLVMMatchType<0>, // %C
llvm_i1_ty, // %clamp
],
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
-def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_v16f16_ty, llvm_anyfloat_ty>;
-def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_v16i16_ty, llvm_anyfloat_ty>;
-// The regular, untied f16/bf16 wmma intrinsics only write to one half
-// of the registers (set via the op_sel bit).
-// The content of the other 16-bit of the registers is undefined.
-def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>;
-def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>;
-// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix
-// registers to the input accumulator registers.
-// Essentially, the content of the other 16-bit is preserved from the input.
-def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>;
-def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>;
-def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_v4i32_ty, llvm_anyint_ty>;
-def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_v2i32_ty, llvm_anyint_ty>;
+// WMMA GFX11Only
-def int_amdgcn_s_wait_event_export_ready :
- ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">,
- Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]
->;
+// The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit.
+// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix registers to the input accumulator registers.
+// The content of the other 16-bit half is preserved from the input.
+def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty, llvm_anyint_ty>;
+
+// WMMA GFX11Plus
+
+def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_anyfloat_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>;
+
+// GFX11: The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit.
+// The content of the other 16-bit half is undefined.
+// GFX12: The op_sel bit must be 0.
+def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty, llvm_anyint_ty>;
//===----------------------------------------------------------------------===//
// GFX12 Intrinsics
@@ -2681,6 +2687,65 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var"
[IntrNoMem, IntrConvergent, IntrWillReturn,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
+
+// WMMA (Wave Matrix Multiply-Accumulate) intrinsics
+//
+// These operations perform a matrix multiplication and accumulation of
+// the form: D = A * B + C .
+
+// A and B are <8 x fp8> or <8 x bf8>, but since fp8 and bf8 are not supported by llvm we use <2 x i32>.
+def int_amdgcn_wmma_f32_16x16x16_fp8_fp8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f32_16x16x16_fp8_bf8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f32_16x16x16_bf8_fp8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f32_16x16x16_bf8_bf8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+// A and B are <16 x iu4>.
+def int_amdgcn_wmma_i32_16x16x32_iu4 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>;
+
+// SWMMAC (Wave Matrix(sparse) Multiply-Accumulate) intrinsics
+//
+// These operations perform a sparse matrix multiplication and accumulation of
+// the form: D = A * B + C.
+// A is sparse matrix, half the size of B, and is expanded using sparsity index.
+
+class AMDGPUSWmmacIntrinsicIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
+ Intrinsic<
+ [CD], // %D
+ [
+ A, // %A
+ B, // %B
+ LLVMMatchType<0>, // %C
+ Index // %Sparsity index for A
+ ],
+ [IntrNoMem, IntrConvergent, IntrWillReturn]
+>;
+
+class AMDGPUSWmmacIntrinsicIUIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
+ Intrinsic<
+ [CD], // %D
+ [
+ llvm_i1_ty, // %A_sign
+ A, // %A
+ llvm_i1_ty, // %B_sign
+ B, // %B
+ LLVMMatchType<0>, // %C
+ Index, // %Sparsity index for A
+ llvm_i1_ty, // %clamp
+ ],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>]
+>;
+
+def int_amdgcn_swmmac_f32_16x16x32_f16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_bf16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f16_16x16x32_f16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_bf16_16x16x32_bf16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_i32_16x16x32_iu8 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_i32_16x16x32_iu4 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_i32_16x16x64_iu4 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_fp8_fp8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_fp8_bf8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_bf8_fp8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_bf8_bf8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+
def int_amdgcn_global_atomic_ordered_add_b64 : AMDGPUAtomicRtn<llvm_i64_ty, global_ptr_ty>;
def int_amdgcn_flat_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
@@ -2712,6 +2777,10 @@ class AMDGPULoadTr<LLVMType ptr_ty>:
def int_amdgcn_global_load_tr : AMDGPULoadTr<global_ptr_ty>;
+// i32 @llvm.amdgcn.wave.id()
+def int_amdgcn_wave_id :
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h b/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h
index ed244febc38d..77d32cc7fb37 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h
@@ -46,11 +46,12 @@ enum {
// Broadcast type.
// (stored in bits 12 - 14)
TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3,
- TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_SH = 4 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_W = 0 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_D = 1 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_Q = 2 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SS = 3 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SD = 4 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SH = 5 << TB_BCAST_TYPE_SHIFT,
TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT,
// Unused bits 15-16
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td
index 82c4c7b23a49..b5419cb9f386 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td
@@ -152,6 +152,34 @@ class CheckImmOperand_s<int Index, string Value> : CheckOperandBase<Index> {
string ImmVal = Value;
}
+// Check that the operand at position `Index` is less than `Imm`.
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperandLT<int Index, int Imm> : CheckOperandBase<Index> {
+ int ImmVal = Imm;
+}
+
+// Check that the operand at position `Index` is greater than `Imm`.
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperandGT<int Index, int Imm> : CheckOperandBase<Index> {
+ int ImmVal = Imm;
+}
+
+// Check that the operand at position `Index` is less than or equal to `Imm`.
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperandLE<int Index, int Imm> : CheckNot<CheckImmOperandGT<Index, Imm>>;
+
+// Check that the operand at position `Index` is greater than or equal to `Imm`.
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperandGE<int Index, int Imm> : CheckNot<CheckImmOperandLT<Index, Imm>>;
+
// Expands to a call to `FunctionMapper` if field `FunctionMapper` is set.
// Otherwise, it expands to a CheckNot<CheckInvalidRegOperand<Index>>.
class CheckRegOperandSimple<int Index> : CheckOperandBase<Index>;
@@ -203,6 +231,12 @@ class CheckAll<list<MCInstPredicate> Sequence>
class CheckAny<list<MCInstPredicate> Sequence>
: CheckPredicateSequence<Sequence>;
+// Check that the operand at position `Index` is in range [Start, End].
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against range [Start, End].
+class CheckImmOperandRange<int Index, int Start, int End>
+ : CheckAll<[CheckImmOperandGE<Index, Start>, CheckImmOperandLE<Index, End>]>;
// Used to expand the body of a function predicate. See the definition of
// TIIPredicate below.
diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 623fdc21ba65..6d82748d8004 100644
--- a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -813,7 +813,8 @@ struct CpuAlias {
StringRef Name;
};
-inline constexpr CpuAlias CpuAliases[] = {{"grace", "neoverse-v2"}};
+inline constexpr CpuAlias CpuAliases[] = {{"cobalt-100", "neoverse-n2"},
+ {"grace", "neoverse-v2"}};
bool getExtensionFeatures(
const AArch64::ExtensionBitset &Extensions,
diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h
index 870dc75b1c1f..49ec8de9c528 100644
--- a/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h
+++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h
@@ -1033,11 +1033,11 @@ public:
isWindowsCygwinEnvironment() || isOHOSFamily();
}
- /// Tests whether the target uses TLS Descriptor by default.
+ /// True if the target supports both general-dynamic and TLSDESC, and TLSDESC
+ /// is enabled by default.
bool hasDefaultTLSDESC() const {
// TODO: Improve check for other platforms, like Android, and RISC-V
- // Note: This is currently only used on RISC-V.
- return isOSBinFormatELF() && isAArch64();
+ return false;
}
/// Tests whether the target uses -data-sections as default.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 3178e2d27816..1028b52a7912 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -89,7 +89,7 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
// may be created without handles to some analyses and in that case don't
// depend on them.
if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) ||
- (DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)))
+ (DT_ && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)))
return true;
// Otherwise this analysis result remains valid.
@@ -1063,6 +1063,7 @@ AliasResult BasicAAResult::aliasGEP(
: AliasResult::MayAlias;
}
+ DominatorTree *DT = getDT(AAQI);
DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT);
DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT);
@@ -1556,6 +1557,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
const Value *HintO1 = getUnderlyingObject(Hint1);
const Value *HintO2 = getUnderlyingObject(Hint2);
+ DominatorTree *DT = getDT(AAQI);
auto ValidAssumeForPtrContext = [&](const Value *Ptr) {
if (const Instruction *PtrI = dyn_cast<Instruction>(Ptr)) {
return isValidAssumeForContext(Assume, PtrI, DT,
@@ -1735,7 +1737,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
if (!Inst || Inst->getParent()->isEntryBlock())
return true;
- return isNotInCycle(Inst, DT, /*LI*/ nullptr);
+ return isNotInCycle(Inst, getDT(AAQI), /*LI*/ nullptr);
}
/// Computes the symbolic difference between two de-composed GEPs.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
index 1ebc593016bc..16635097d20a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
@@ -657,11 +657,12 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
BasicBlock::iterator BBI = L->getIterator();
BasicBlock *BB = L->getParent();
SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+ BatchAAResults BatchAA(*AA);
for (;;) {
if (!VisitedBlocks.insert(BB).second)
break;
if (Value *U =
- FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA))
+ FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA))
return findValueImpl(U, OffsetOk, Visited);
if (BBI != BB->begin())
break;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
index 97d21db86abf..6bf0d2f56eb4 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
@@ -450,11 +450,10 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
"to scan backward from a given instruction, when searching for "
"available loaded value"));
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
- BasicBlock *ScanBB,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
- AAResults *AA, bool *IsLoad,
+ BatchAAResults *AA, bool *IsLoad,
unsigned *NumScanedInst) {
// Don't CSE load that is volatile or anything stronger than unordered.
if (!Load->isUnordered())
@@ -583,7 +582,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
Value *llvm::findAvailablePtrLoadStore(
const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic,
BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan,
- AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
+ BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
if (MaxInstsToScan == 0)
MaxInstsToScan = ~0U;
@@ -664,7 +663,7 @@ Value *llvm::findAvailablePtrLoadStore(
return nullptr;
}
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
bool *IsLoadCSE,
unsigned MaxInstsToScan) {
const DataLayout &DL = Load->getModule()->getDataLayout();
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7e67c9015282..dd6b88fee415 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -657,16 +657,18 @@ public:
AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA,
- PredicatedScalarEvolution &PSE)
- : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) {
+ PredicatedScalarEvolution &PSE,
+ SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
+ : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
+ LoopAliasScopes(LoopAliasScopes) {
// We're analyzing dependences across loop iterations.
BAA.enableCrossIterationMode();
}
/// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
- Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
+ Value *Ptr = const_cast<Value *>(Loc.Ptr);
+ AST.add(adjustLoc(Loc));
Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
@@ -674,8 +676,8 @@ public:
/// Register a store.
void addStore(MemoryLocation &Loc, Type *AccessTy) {
- Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
+ Value *Ptr = const_cast<Value *>(Loc.Ptr);
+ AST.add(adjustLoc(Loc));
Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
}
@@ -731,6 +733,32 @@ public:
private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
+ /// Adjust the MemoryLocation so that it represents accesses to this
+ /// location across all iterations, rather than a single one.
+ MemoryLocation adjustLoc(MemoryLocation Loc) const {
+ // The accessed location varies within the loop, but remains within the
+ // underlying object.
+ Loc.Size = LocationSize::beforeOrAfterPointer();
+ Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
+ Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
+ return Loc;
+ }
+
+ /// Drop alias scopes that are only valid within a single loop iteration.
+ MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
+ if (!ScopeList)
+ return nullptr;
+
+ // For the sake of simplicity, drop the whole scope list if any scope is
+ // iteration-local.
+ if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
+ return LoopAliasScopes.contains(cast<MDNode>(Scope));
+ }))
+ return nullptr;
+
+ return ScopeList;
+ }
+
/// Go over all memory access and check whether runtime pointer checks
/// are needed and build sets of dependency check candidates.
void processMemAccesses();
@@ -775,6 +803,10 @@ private:
PredicatedScalarEvolution &PSE;
DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
+
+ /// Alias scopes that are declared inside the loop, and as such not valid
+ /// across iterations.
+ SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
};
} // end anonymous namespace
@@ -2283,6 +2315,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// Holds the Load and Store instructions.
SmallVector<LoadInst *, 16> Loads;
SmallVector<StoreInst *, 16> Stores;
+ SmallPtrSet<MDNode *, 8> LoopAliasScopes;
// Holds all the different accesses in the loop.
unsigned NumReads = 0;
@@ -2326,6 +2359,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (HasComplexMemInst)
continue;
+ // Record alias scopes defined inside the loop.
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ for (Metadata *Op : Decl->getScopeList()->operands())
+ LoopAliasScopes.insert(cast<MDNode>(Op));
+
// Many math library functions read the rounding mode. We will only
// vectorize a loop if it contains known function calls that don't set
// the flag. Therefore, it is safe to ignore this read from memory.
@@ -2407,7 +2445,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
}
MemoryDepChecker::DepCandidates DependentAccesses;
- AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
+ AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
+ LoopAliasScopes);
// Holds the analyzed pointers. We don't want to call getUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
index e87ae7d71fff..aa550f0b6a7b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -692,25 +692,9 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
continue;
// Determine incoming value and add it as incoming from IncBB.
- if (MemoryUseOrDef *IncMUD = dyn_cast<MemoryUseOrDef>(IncomingAccess)) {
- if (!MSSA->isLiveOnEntryDef(IncMUD)) {
- Instruction *IncI = IncMUD->getMemoryInst();
- assert(IncI && "Found MemoryUseOrDef with no Instruction.");
- if (Instruction *NewIncI =
- cast_or_null<Instruction>(VMap.lookup(IncI))) {
- IncMUD = MSSA->getMemoryAccess(NewIncI);
- assert(IncMUD &&
- "MemoryUseOrDef cannot be null, all preds processed.");
- }
- }
- NewPhi->addIncoming(IncMUD, IncBB);
- } else {
- MemoryPhi *IncPhi = cast<MemoryPhi>(IncomingAccess);
- if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi))
- NewPhi->addIncoming(NewDefPhi, IncBB);
- else
- NewPhi->addIncoming(IncPhi, IncBB);
- }
+ NewPhi->addIncoming(
+ getNewDefiningAccessForClone(IncomingAccess, VMap, MPhiMap, MSSA),
+ IncBB);
}
if (auto *SingleAccess = onlySingleValue(NewPhi)) {
MPhiMap[Phi] = SingleAccess;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index a9f78358e57b..ecf7bc30913f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -2048,8 +2048,10 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
FBB->erase(FBB->begin(), FIB);
if (UpdateLiveIns) {
- recomputeLiveIns(*TBB);
- recomputeLiveIns(*FBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*TBB) || recomputeLiveIns(*FBB);
+ } while (anyChange);
}
++NumHoist;
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index da8e1d87319d..a357b4cb4921 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -14,6 +14,7 @@
#include "llvm/ProfileData/Coverage/CoverageMapping.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -583,6 +584,160 @@ static unsigned getMaxBitmapSize(const CounterMappingContext &Ctx,
return MaxBitmapID + (SizeInBits / CHAR_BIT);
}
+namespace {
+
+/// Collect Decisions, Branchs, and Expansions and associate them.
+class MCDCDecisionRecorder {
+private:
+ /// This holds the DecisionRegion and MCDCBranches under it.
+ /// Also traverses Expansion(s).
+ /// The Decision has the number of MCDCBranches and will complete
+ /// when it is filled with unique ConditionID of MCDCBranches.
+ struct DecisionRecord {
+ const CounterMappingRegion *DecisionRegion;
+
+ /// They are reflected from DecisionRegion for convenience.
+ LineColPair DecisionStartLoc;
+ LineColPair DecisionEndLoc;
+
+ /// This is passed to `MCDCRecordProcessor`, so this should be compatible
+ /// to`ArrayRef<const CounterMappingRegion *>`.
+ SmallVector<const CounterMappingRegion *> MCDCBranches;
+
+ /// IDs that are stored in MCDCBranches
+ /// Complete when all IDs (1 to NumConditions) are met.
+ DenseSet<CounterMappingRegion::MCDCConditionID> ConditionIDs;
+
+ /// Set of IDs of Expansion(s) that are relevant to DecisionRegion
+ /// and its children (via expansions).
+ /// FileID pointed by ExpandedFileID is dedicated to the expansion, so
+ /// the location in the expansion doesn't matter.
+ DenseSet<unsigned> ExpandedFileIDs;
+
+ DecisionRecord(const CounterMappingRegion &Decision)
+ : DecisionRegion(&Decision), DecisionStartLoc(Decision.startLoc()),
+ DecisionEndLoc(Decision.endLoc()) {
+ assert(Decision.Kind == CounterMappingRegion::MCDCDecisionRegion);
+ }
+
+ /// Determine whether DecisionRecord dominates `R`.
+ bool dominates(const CounterMappingRegion &R) const {
+ // Determine whether `R` is included in `DecisionRegion`.
+ if (R.FileID == DecisionRegion->FileID &&
+ R.startLoc() >= DecisionStartLoc && R.endLoc() <= DecisionEndLoc)
+ return true;
+
+ // Determine whether `R` is pointed by any of Expansions.
+ return ExpandedFileIDs.contains(R.FileID);
+ }
+
+ enum Result {
+ NotProcessed = 0, /// Irrelevant to this Decision
+ Processed, /// Added to this Decision
+ Completed, /// Added and filled this Decision
+ };
+
+ /// Add Branch into the Decision
+ /// \param Branch expects MCDCBranchRegion
+ /// \returns NotProcessed/Processed/Completed
+ Result addBranch(const CounterMappingRegion &Branch) {
+ assert(Branch.Kind == CounterMappingRegion::MCDCBranchRegion);
+
+ auto ConditionID = Branch.MCDCParams.ID;
+ assert(ConditionID > 0 && "ConditionID should begin with 1");
+
+ if (ConditionIDs.contains(ConditionID) ||
+ ConditionID > DecisionRegion->MCDCParams.NumConditions)
+ return NotProcessed;
+
+ if (!this->dominates(Branch))
+ return NotProcessed;
+
+ assert(MCDCBranches.size() < DecisionRegion->MCDCParams.NumConditions);
+
+ // Put `ID=1` in front of `MCDCBranches` for convenience
+ // even if `MCDCBranches` is not topological.
+ if (ConditionID == 1)
+ MCDCBranches.insert(MCDCBranches.begin(), &Branch);
+ else
+ MCDCBranches.push_back(&Branch);
+
+ // Mark `ID` as `assigned`.
+ ConditionIDs.insert(ConditionID);
+
+ // `Completed` when `MCDCBranches` is full
+ return (MCDCBranches.size() == DecisionRegion->MCDCParams.NumConditions
+ ? Completed
+ : Processed);
+ }
+
+ /// Record Expansion if it is relevant to this Decision.
+ /// Each `Expansion` may nest.
+ /// \returns true if recorded.
+ bool recordExpansion(const CounterMappingRegion &Expansion) {
+ if (!this->dominates(Expansion))
+ return false;
+
+ ExpandedFileIDs.insert(Expansion.ExpandedFileID);
+ return true;
+ }
+ };
+
+private:
+ /// Decisions in progress
+ /// DecisionRecord is added for each MCDCDecisionRegion.
+ /// DecisionRecord is removed when Decision is completed.
+ SmallVector<DecisionRecord> Decisions;
+
+public:
+ ~MCDCDecisionRecorder() {
+ assert(Decisions.empty() && "All Decisions have not been resolved");
+ }
+
+ /// Register Region and start recording.
+ void registerDecision(const CounterMappingRegion &Decision) {
+ Decisions.emplace_back(Decision);
+ }
+
+ void recordExpansion(const CounterMappingRegion &Expansion) {
+ any_of(Decisions, [&Expansion](auto &Decision) {
+ return Decision.recordExpansion(Expansion);
+ });
+ }
+
+ using DecisionAndBranches =
+ std::pair<const CounterMappingRegion *, /// Decision
+ SmallVector<const CounterMappingRegion *> /// Branches
+ >;
+
+ /// Add MCDCBranchRegion to DecisionRecord.
+ /// \param Branch to be processed
+ /// \returns DecisionsAndBranches if DecisionRecord completed.
+ /// Or returns nullopt.
+ std::optional<DecisionAndBranches>
+ processBranch(const CounterMappingRegion &Branch) {
+ // Seek each Decision and apply Region to it.
+ for (auto DecisionIter = Decisions.begin(), DecisionEnd = Decisions.end();
+ DecisionIter != DecisionEnd; ++DecisionIter)
+ switch (DecisionIter->addBranch(Branch)) {
+ case DecisionRecord::NotProcessed:
+ continue;
+ case DecisionRecord::Processed:
+ return std::nullopt;
+ case DecisionRecord::Completed:
+ DecisionAndBranches Result =
+ std::make_pair(DecisionIter->DecisionRegion,
+ std::move(DecisionIter->MCDCBranches));
+ Decisions.erase(DecisionIter); // No longer used.
+ return Result;
+ }
+
+ llvm_unreachable("Branch not found in Decisions");
+ }
+};
+
+} // namespace
+
Error CoverageMapping::loadFunctionRecord(
const CoverageMappingRecord &Record,
IndexedInstrProfReader &ProfileReader) {
@@ -639,18 +794,13 @@ Error CoverageMapping::loadFunctionRecord(
Record.MappingRegions[0].Count.isZero() && Counts[0] > 0)
return Error::success();
- unsigned NumConds = 0;
- const CounterMappingRegion *MCDCDecision;
- std::vector<const CounterMappingRegion *> MCDCBranches;
-
+ MCDCDecisionRecorder MCDCDecisions;
FunctionRecord Function(OrigFuncName, Record.Filenames);
for (const auto &Region : Record.MappingRegions) {
- // If an MCDCDecisionRegion is seen, track the BranchRegions that follow
- // it according to Region.NumConditions.
+ // MCDCDecisionRegion should be handled first since it overlaps with
+ // others inside.
if (Region.Kind == CounterMappingRegion::MCDCDecisionRegion) {
- assert(NumConds == 0);
- MCDCDecision = &Region;
- NumConds = Region.MCDCParams.NumConditions;
+ MCDCDecisions.registerDecision(Region);
continue;
}
Expected<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
@@ -665,43 +815,47 @@ Error CoverageMapping::loadFunctionRecord(
}
Function.pushRegion(Region, *ExecutionCount, *AltExecutionCount);
- // If a MCDCDecisionRegion was seen, store the BranchRegions that
- // correspond to it in a vector, according to the number of conditions
- // recorded for the region (tracked by NumConds).
- if (NumConds > 0 && Region.Kind == CounterMappingRegion::MCDCBranchRegion) {
- MCDCBranches.push_back(&Region);
-
- // As we move through all of the MCDCBranchRegions that follow the
- // MCDCDecisionRegion, decrement NumConds to make sure we account for
- // them all before we calculate the bitmap of executed test vectors.
- if (--NumConds == 0) {
- // Evaluating the test vector bitmap for the decision region entails
- // calculating precisely what bits are pertinent to this region alone.
- // This is calculated based on the recorded offset into the global
- // profile bitmap; the length is calculated based on the recorded
- // number of conditions.
- Expected<BitVector> ExecutedTestVectorBitmap =
- Ctx.evaluateBitmap(MCDCDecision);
- if (auto E = ExecutedTestVectorBitmap.takeError()) {
- consumeError(std::move(E));
- return Error::success();
- }
+ // Record ExpansionRegion.
+ if (Region.Kind == CounterMappingRegion::ExpansionRegion) {
+ MCDCDecisions.recordExpansion(Region);
+ continue;
+ }
- // Since the bitmap identifies the executed test vectors for an MC/DC
- // DecisionRegion, all of the information is now available to process.
- // This is where the bulk of the MC/DC progressing takes place.
- Expected<MCDCRecord> Record = Ctx.evaluateMCDCRegion(
- *MCDCDecision, *ExecutedTestVectorBitmap, MCDCBranches);
- if (auto E = Record.takeError()) {
- consumeError(std::move(E));
- return Error::success();
- }
+ // Do nothing unless MCDCBranchRegion.
+ if (Region.Kind != CounterMappingRegion::MCDCBranchRegion)
+ continue;
- // Save the MC/DC Record so that it can be visualized later.
- Function.pushMCDCRecord(*Record);
- MCDCBranches.clear();
- }
+ auto Result = MCDCDecisions.processBranch(Region);
+ if (!Result) // Any Decision doesn't complete.
+ continue;
+
+ auto MCDCDecision = Result->first;
+ auto &MCDCBranches = Result->second;
+
+ // Evaluating the test vector bitmap for the decision region entails
+ // calculating precisely what bits are pertinent to this region alone.
+ // This is calculated based on the recorded offset into the global
+ // profile bitmap; the length is calculated based on the recorded
+ // number of conditions.
+ Expected<BitVector> ExecutedTestVectorBitmap =
+ Ctx.evaluateBitmap(MCDCDecision);
+ if (auto E = ExecutedTestVectorBitmap.takeError()) {
+ consumeError(std::move(E));
+ return Error::success();
}
+
+ // Since the bitmap identifies the executed test vectors for an MC/DC
+ // DecisionRegion, all of the information is now available to process.
+ // This is where the bulk of the MC/DC progressing takes place.
+ Expected<MCDCRecord> Record = Ctx.evaluateMCDCRegion(
+ *MCDCDecision, *ExecutedTestVectorBitmap, MCDCBranches);
+ if (auto E = Record.takeError()) {
+ consumeError(std::move(E));
+ return Error::success();
+ }
+
+ // Save the MC/DC Record so that it can be visualized later.
+ Function.pushMCDCRecord(*Record);
}
// Don't create records for (filenames, function) pairs we've already seen.
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index 1c7d8a8909c4..27727f216b05 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -167,7 +167,15 @@ void CoverageMappingWriter::write(raw_ostream &OS) {
return LHS.FileID < RHS.FileID;
if (LHS.startLoc() != RHS.startLoc())
return LHS.startLoc() < RHS.startLoc();
- return LHS.Kind < RHS.Kind;
+
+ // Put `Decision` before `Expansion`.
+ auto getKindKey = [](CounterMappingRegion::RegionKind Kind) {
+ return (Kind == CounterMappingRegion::MCDCDecisionRegion
+ ? 2 * CounterMappingRegion::ExpansionRegion - 1
+ : 2 * Kind);
+ };
+
+ return getKindKey(LHS.Kind) < getKindKey(RHS.Kind);
});
// Write out the fileid -> filename mapping.
diff --git a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
index 3c02492e99f1..db2e4ca92ae4 100644
--- a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
@@ -128,6 +128,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"zicclsm", {1, 0}},
{"ziccrse", {1, 0}},
{"zicntr", {2, 0}},
+ {"zicond", {1, 0}},
{"zicsr", {2, 0}},
{"zifencei", {2, 0}},
{"zihintntl", {1, 0}},
@@ -200,8 +201,6 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
{"zicfilp", {0, 4}},
{"zicfiss", {0, 4}},
- {"zicond", {1, 0}},
-
{"zimop", {0, 1}},
{"ztso", {0, 1}},
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 352c61d48e2f..1af064b6de3c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1544,6 +1544,12 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
return true;
}
+ case AArch64::COALESCER_BARRIER_FPR16:
+ case AArch64::COALESCER_BARRIER_FPR32:
+ case AArch64::COALESCER_BARRIER_FPR64:
+ case AArch64::COALESCER_BARRIER_FPR128:
+ MI.eraseFromParent();
+ return true;
case AArch64::LD1B_2Z_IMM_PSEUDO:
return expandMultiVecPseudo(
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d55deec97600..732e787d2a32 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4339,8 +4339,10 @@ AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
MBB.addSuccessor(LoopMBB);
// Update liveins.
- recomputeLiveIns(*LoopMBB);
- recomputeLiveIns(*ExitMBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB);
+ } while (anyChange);
return ExitMBB->begin();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 332fb3765528..e97f5e322014 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2375,6 +2375,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((AArch64ISD::NodeType)Opcode) {
case AArch64ISD::FIRST_NUMBER:
break;
+ MAKE_CASE(AArch64ISD::COALESCER_BARRIER)
MAKE_CASE(AArch64ISD::SMSTART)
MAKE_CASE(AArch64ISD::SMSTOP)
MAKE_CASE(AArch64ISD::RESTORE_ZA)
@@ -7154,13 +7155,18 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
}
}
+static bool isPassedInFPR(EVT VT) {
+ return VT.isFixedLengthVector() ||
+ (VT.isFloatingPoint() && !VT.isScalableVector());
+}
+
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue AArch64TargetLowering::LowerCallResult(
SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<CCValAssign> &RVLocs, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
- SDValue ThisVal) const {
+ SDValue ThisVal, bool RequiresSMChange) const {
DenseMap<unsigned, SDValue> CopiedRegs;
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -7205,6 +7211,10 @@ SDValue AArch64TargetLowering::LowerCallResult(
break;
}
+ if (RequiresSMChange && isPassedInFPR(VA.getValVT()))
+ Val = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL, Val.getValueType(),
+ Val);
+
InVals.push_back(Val);
}
@@ -7915,6 +7925,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
return ArgReg.Reg == VA.getLocReg();
});
} else {
+ // Add an extra level of indirection for streaming mode changes by
+ // using a pseudo copy node that cannot be rematerialised between a
+ // smstart/smstop and the call by the simple register coalescer.
+ if (RequiresSMChange && isPassedInFPR(Arg.getValueType()))
+ Arg = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL,
+ Arg.getValueType(), Arg);
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
const TargetOptions &Options = DAG.getTarget().Options;
@@ -8151,9 +8167,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- SDValue Result = LowerCallResult(Chain, InGlue, CallConv, IsVarArg, RVLocs,
- DL, DAG, InVals, IsThisReturn,
- IsThisReturn ? OutVals[0] : SDValue());
+ SDValue Result = LowerCallResult(
+ Chain, InGlue, CallConv, IsVarArg, RVLocs, DL, DAG, InVals, IsThisReturn,
+ IsThisReturn ? OutVals[0] : SDValue(), RequiresSMChange);
if (!Ins.empty())
InGlue = Result.getValue(Result->getNumValues() - 1);
@@ -26899,7 +26915,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
return false;
// If the vector is scalable, SVE is enabled, implying support for complex
- // numbers. Otherwirse, we need to ensure complex number support is avaialble
+ // numbers. Otherwise, we need to ensure complex number support is available
if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
return false;
@@ -26915,7 +26931,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
!llvm::isPowerOf2_32(VTyWidth))
return false;
- if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2()) {
+ if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
return 8 <= ScalarWidth && ScalarWidth <= 64;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6505931e17e1..541a810fb5cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -58,6 +58,8 @@ enum NodeType : unsigned {
CALL_BTI, // Function call followed by a BTI instruction.
+ COALESCER_BARRIER,
+
SMSTART,
SMSTOP,
RESTORE_ZA,
@@ -1026,7 +1028,7 @@ private:
const SmallVectorImpl<CCValAssign> &RVLocs,
const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
- SDValue ThisVal) const;
+ SDValue ThisVal, bool RequiresSMChange) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 2e8d8c63d6be..9b4bb7c88bc8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4098,16 +4098,6 @@ AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
return MI.getOperand(Idx);
}
-const MachineOperand &
-AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode");
- case AArch64::LDRBBroX:
- return MI.getOperand(4);
- }
-}
-
static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
Register Reg) {
if (MI.getParent() == nullptr)
@@ -9597,9 +9587,13 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
// Update liveins.
if (MF.getRegInfo().reservedRegsFrozen()) {
- recomputeLiveIns(*LoopTestMBB);
- recomputeLiveIns(*LoopBodyMBB);
- recomputeLiveIns(*ExitMBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*ExitMBB) ||
+ recomputeLiveIns(*LoopBodyMBB) ||
+ recomputeLiveIns(*LoopTestMBB);
+ } while (anyChange);
+ ;
}
return ExitMBB->begin();
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index db24a19fe5f8..6526f6740747 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -111,9 +111,6 @@ public:
/// Returns the immediate offset operator of a load/store.
static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);
- /// Returns the shift amount operator of a load/store.
- static const MachineOperand &getLdStAmountOp(const MachineInstr &MI);
-
/// Returns whether the instruction is FP or NEON.
static bool isFpOrNEON(const MachineInstr &MI);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index e90b8a8ca7ac..926a89466255 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -62,8 +62,6 @@ STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
-STATISTIC(NumConstOffsetFolded,
- "Number of const offset of index address folded");
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
"Controls which pairs are considered for renaming");
@@ -77,11 +75,6 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
cl::Hidden);
-// The LdStConstLimit limits how far we search for const offset instructions
-// when we form index address load/store instructions.
-static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
- cl::init(10), cl::Hidden);
-
// Enable register renaming to find additional store pairing opportunities.
static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
cl::init(true), cl::Hidden);
@@ -178,13 +171,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
int UnscaledOffset, unsigned Limit);
- // Scan the instruction list to find a register assigned with a const
- // value that can be combined with the current instruction (a load or store)
- // using base addressing with writeback. Scan forwards.
- MachineBasicBlock::iterator
- findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
- unsigned &Offset);
-
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan backwards.
@@ -196,19 +182,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
unsigned BaseReg, int Offset);
- bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
- unsigned IndexReg, unsigned &Offset);
-
// Merge a pre- or post-index base register update into a ld/st instruction.
MachineBasicBlock::iterator
mergeUpdateInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update, bool IsPreIdx);
- MachineBasicBlock::iterator
- mergeConstOffsetInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update, unsigned Offset,
- int Scale);
-
// Find and merge zero store instructions.
bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
@@ -221,9 +199,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find and merge a base register updates before or after a ld/st instruction.
bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
- // Find and merge a index ldr/st instructions into a base ld/st instruction.
- bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
-
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -506,16 +481,6 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
}
}
-static unsigned getBaseAddressOpcode(unsigned Opc) {
- // TODO: Add more index address loads/stores.
- switch (Opc) {
- default:
- llvm_unreachable("Opcode has no base address equivalent!");
- case AArch64::LDRBBroX:
- return AArch64::LDRBBui;
- }
-}
-
static unsigned getPostIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
@@ -757,20 +722,6 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
}
}
-// Make sure this is a reg+reg Ld/St
-static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- default:
- return false;
- // Scaled instructions.
- // TODO: Add more index address loads/stores.
- case AArch64::LDRBBroX:
- Scale = 1;
- return true;
- }
-}
-
static bool isRewritableImplicitDef(unsigned Opc) {
switch (Opc) {
default:
@@ -2097,63 +2048,6 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
return NextI;
}
-MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update,
- unsigned Offset, int Scale) {
- assert((Update->getOpcode() == AArch64::MOVKWi) &&
- "Unexpected const mov instruction to merge!");
- MachineBasicBlock::iterator E = I->getParent()->end();
- MachineBasicBlock::iterator NextI = next_nodbg(I, E);
- MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
- MachineInstr &MemMI = *I;
- unsigned Mask = (1 << 12) * Scale - 1;
- unsigned Low = Offset & Mask;
- unsigned High = Offset - Low;
- Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
- Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg();
- MachineInstrBuilder AddMIB, MemMIB;
-
- // Add IndexReg, BaseReg, High (the BaseReg may be SP)
- AddMIB =
- BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
- .addDef(IndexReg)
- .addUse(BaseReg)
- .addImm(High >> 12) // shifted value
- .addImm(12); // shift 12
- (void)AddMIB;
- // Ld/St DestReg, IndexReg, Imm12
- unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
- MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .add(getLdStRegOp(MemMI))
- .add(AArch64InstrInfo::getLdStOffsetOp(MemMI))
- .addImm(Low / Scale)
- .setMemRefs(I->memoperands())
- .setMIFlags(I->mergeFlagsWith(*Update));
- (void)MemMIB;
-
- ++NumConstOffsetFolded;
- LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
- LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
- LLVM_DEBUG(PrevI->print(dbgs()));
- LLVM_DEBUG(dbgs() << " ");
- LLVM_DEBUG(Update->print(dbgs()));
- LLVM_DEBUG(dbgs() << " ");
- LLVM_DEBUG(I->print(dbgs()));
- LLVM_DEBUG(dbgs() << " with instruction:\n ");
- LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
- LLVM_DEBUG(dbgs() << " ");
- LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
- LLVM_DEBUG(dbgs() << "\n");
-
- // Erase the old instructions for the block.
- I->eraseFromParent();
- PrevI->eraseFromParent();
- Update->eraseFromParent();
-
- return NextI;
-}
-
bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
MachineInstr &MI,
unsigned BaseReg, int Offset) {
@@ -2201,31 +2095,6 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
return false;
}
-bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
- MachineInstr &MI,
- unsigned IndexReg,
- unsigned &Offset) {
- // The update instruction source and destination register must be the
- // same as the load/store index register.
- if (MI.getOpcode() == AArch64::MOVKWi &&
- TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
-
- // movz + movk hold a large offset of a Ld/St instruction.
- MachineBasicBlock::iterator B = MI.getParent()->begin();
- MachineBasicBlock::iterator MBBI = &MI;
- MBBI = prev_nodbg(MBBI, B);
- MachineInstr &MovzMI = *MBBI;
- if (MovzMI.getOpcode() == AArch64::MOVZWi) {
- unsigned Low = MovzMI.getOperand(1).getImm();
- unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
- Offset = High + Low;
- // 12-bit optionally shifted immediates are legal for adds.
- return Offset >> 24 == 0;
- }
- }
- return false;
-}
-
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
@@ -2381,60 +2250,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
return E;
}
-MachineBasicBlock::iterator
-AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
- MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
- MachineBasicBlock::iterator B = I->getParent()->begin();
- MachineBasicBlock::iterator E = I->getParent()->end();
- MachineInstr &MemMI = *I;
- MachineBasicBlock::iterator MBBI = I;
-
- // If the load is the first instruction in the block, there's obviously
- // not any matching load or store.
- if (MBBI == B)
- return E;
-
- // Make sure the IndexReg is killed and the shift amount is zero.
- // TODO: Relex this restriction to extend, simplify processing now.
- if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
- !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
- (AArch64InstrInfo::getLdStAmountOp(MemMI).getImm() != 0))
- return E;
-
- Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg();
-
- // Track which register units have been modified and used between the first
- // insn (inclusive) and the second insn.
- ModifiedRegUnits.clear();
- UsedRegUnits.clear();
- unsigned Count = 0;
- do {
- MBBI = prev_nodbg(MBBI, B);
- MachineInstr &MI = *MBBI;
-
- // Don't count transient instructions towards the search limit since there
- // may be different numbers of them if e.g. debug information is present.
- if (!MI.isTransient())
- ++Count;
-
- // If we found a match, return it.
- if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
- return MBBI;
- }
-
- // Update the status of what the instruction clobbered and used.
- LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
-
- // Otherwise, if the index register is used or modified, we have no match,
- // so return early.
- if (!ModifiedRegUnits.available(IndexReg) ||
- !UsedRegUnits.available(IndexReg))
- return E;
-
- } while (MBBI != B && Count < Limit);
- return E;
-}
-
bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
@@ -2619,34 +2434,6 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
return false;
}
-bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
- int Scale) {
- MachineInstr &MI = *MBBI;
- MachineBasicBlock::iterator E = MI.getParent()->end();
- MachineBasicBlock::iterator Update;
-
- // Don't know how to handle unscaled pre/post-index versions below, so bail.
- if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
- return false;
-
- // Look back to try to find a const offset for index LdSt instruction. For
- // example,
- // mov x8, #LargeImm ; = a * (1<<12) + imm12
- // ldr x1, [x0, x8]
- // merged into:
- // add x8, x0, a * (1<<12)
- // ldr x1, [x8, imm12]
- unsigned Offset;
- Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
- if (Update != E && (Offset & (Scale - 1)) == 0) {
- // Merge the imm12 into the ld/st.
- MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
- return true;
- }
-
- return false;
-}
-
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool EnableNarrowZeroStOpt) {
@@ -2725,22 +2512,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
}
- // 5) Find a register assigned with a const value that can be combined with
- // into the load or store. e.g.,
- // mov x8, #LargeImm ; = a * (1<<12) + imm12
- // ldr x1, [x0, x8]
- // ; becomes
- // add x8, x0, a * (1<<12)
- // ldr x1, [x8, imm12]
- for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- MBBI != E;) {
- int Scale;
- if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
- Modified = true;
- else
- ++MBBI;
- }
-
return Modified;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ea9882160d6f..f86e6947c9cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1015,6 +1015,8 @@ bool AArch64RegisterInfo::shouldCoalesce(
MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
const TargetRegisterClass *DstRC, unsigned DstSubReg,
const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
+ MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
+
if (MI->isCopy() &&
((DstRC->getID() == AArch64::GPR64RegClassID) ||
(DstRC->getID() == AArch64::GPR64commonRegClassID)) &&
@@ -1023,5 +1025,38 @@ bool AArch64RegisterInfo::shouldCoalesce(
// which implements a 32 to 64 bit zero extension
// which relies on the upper 32 bits being zeroed.
return false;
+
+ auto IsCoalescerBarrier = [](const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AArch64::COALESCER_BARRIER_FPR16:
+ case AArch64::COALESCER_BARRIER_FPR32:
+ case AArch64::COALESCER_BARRIER_FPR64:
+ case AArch64::COALESCER_BARRIER_FPR128:
+ return true;
+ default:
+ return false;
+ }
+ };
+
+ // For calls that temporarily have to toggle streaming mode as part of the
+ // call-sequence, we need to be more careful when coalescing copy instructions
+ // so that we don't end up coalescing the NEON/FP result or argument register
+ // with a whole Z-register, such that after coalescing the register allocator
+ // will try to spill/reload the entire Z register.
+ //
+ // We do this by checking if the node has any defs/uses that are
+ // COALESCER_BARRIER pseudos. These are 'nops' in practice, but they exist to
+ // instruct the coalescer to avoid coalescing the copy.
+ if (MI->isCopy() && SubReg != DstSubReg &&
+ (AArch64::ZPRRegClass.hasSubClassEq(DstRC) ||
+ AArch64::ZPRRegClass.hasSubClassEq(SrcRC))) {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (any_of(MRI.def_instructions(SrcReg), IsCoalescerBarrier))
+ return false;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (any_of(MRI.use_nodbg_instructions(DstReg), IsCoalescerBarrier))
+ return false;
+ }
+
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index eeae5303a3f8..acf067f2cc5a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -28,6 +28,8 @@ def AArch64_restore_zt : SDNode<"AArch64ISD::RESTORE_ZT", SDTypeProfile<0, 2,
def AArch64_save_zt : SDNode<"AArch64ISD::SAVE_ZT", SDTypeProfile<0, 2,
[SDTCisInt<0>, SDTCisPtrTy<1>]>,
[SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
+def AArch64CoalescerBarrier
+ : SDNode<"AArch64ISD::COALESCER_BARRIER", SDTypeProfile<1, 1, []>, []>;
//===----------------------------------------------------------------------===//
// Instruction naming conventions.
@@ -189,6 +191,26 @@ def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
(MSR 0xde85, GPR64:$val)>;
def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
(MRS 0xde85)>;
+
+multiclass CoalescerBarrierPseudo<RegisterClass rc, list<ValueType> vts> {
+ def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> {
+ let Constraints = "$dst = $src";
+ }
+ foreach vt = vts in {
+ def : Pat<(vt (AArch64CoalescerBarrier (vt rc:$src))),
+ (!cast<Instruction>(NAME) rc:$src)>;
+ }
+}
+
+multiclass CoalescerBarriers {
+ defm _FPR16 : CoalescerBarrierPseudo<FPR16, [bf16, f16]>;
+ defm _FPR32 : CoalescerBarrierPseudo<FPR32, [f32]>;
+ defm _FPR64 : CoalescerBarrierPseudo<FPR64, [f64, v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64, v4bf16]>;
+ defm _FPR128 : CoalescerBarrierPseudo<FPR128, [f128, v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64, v8bf16]>;
+}
+
+defm COALESCER_BARRIER : CoalescerBarriers;
+
} // End let Predicates = [HasSME]
// Pseudo to match to smstart/smstop. This expands:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d611338fc268..992b11da7eee 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -233,15 +233,20 @@ static bool hasPossibleIncompatibleOps(const Function *F) {
bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
- SMEAttrs CallerAttrs(*Caller);
- SMEAttrs CalleeAttrs(*Callee);
+ SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
+
+ // When inlining, we should consider the body of the function, not the
+ // interface.
+ if (CalleeAttrs.hasStreamingBody()) {
+ CalleeAttrs.set(SMEAttrs::SM_Compatible, false);
+ CalleeAttrs.set(SMEAttrs::SM_Enabled, true);
+ }
+
if (CalleeAttrs.hasNewZABody())
return false;
if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
- (CallerAttrs.requiresSMChange(CalleeAttrs) &&
- (!CallerAttrs.hasStreamingInterfaceOrBody() ||
- !CalleeAttrs.hasStreamingBody()))) {
+ CallerAttrs.requiresSMChange(CalleeAttrs)) {
if (hasPossibleIncompatibleOps(Callee))
return false;
}
@@ -4062,4 +4067,4 @@ bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {
cast<BranchInst>(I->getNextNode())->isUnconditional())
return true;
return BaseT::shouldTreatInstructionLikeSelect(I);
-} \ No newline at end of file
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
index cb29d5d94759..250e3e350c02 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1506,6 +1506,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureFlatAtomicFaddF32Inst,
FeatureImageInsts,
FeatureExtendedImageInsts,
+ FeatureFP8ConversionInsts,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
FeatureSALUFloatInsts,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index a19b03b92923..152f495a452b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -59,6 +59,30 @@ def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">,
GIComplexPatternEquiv<WMMAOpSelVOP3PMods>;
+def gi_wmmavisrc :
+ GIComplexOperandMatcher<s32, "selectWMMAVISrc">,
+ GIComplexPatternEquiv<WMMAVISrc>;
+
+def gi_wmmamods :
+ GIComplexOperandMatcher<s32, "selectWMMAModsF32NegAbs">,
+ GIComplexPatternEquiv<WMMAModsF32NegAbs>;
+
+def gi_wmmamodsf16Neg :
+ GIComplexOperandMatcher<s32, "selectWMMAModsF16Neg">,
+ GIComplexPatternEquiv<WMMAModsF16Neg>;
+
+def gi_wmmamodsf16NegAbs :
+ GIComplexOperandMatcher<s32, "selectWMMAModsF16NegAbs">,
+ GIComplexPatternEquiv<WMMAModsF16NegAbs>;
+
+def gi_swmmacindex8 :
+ GIComplexOperandMatcher<s32, "selectSWMMACIndex8">,
+ GIComplexPatternEquiv<SWMMACIndex8>;
+
+def gi_swmmacindex16 :
+ GIComplexOperandMatcher<s32, "selectSWMMACIndex16">,
+ GIComplexPatternEquiv<SWMMACIndex16>;
+
def gi_vop3opselmods :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
GIComplexPatternEquiv<VOP3OpSelMods>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 4c35649cec6c..4f7bf3f7d35e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3048,6 +3048,336 @@ bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
return true;
}
+static MachineSDNode *buildRegSequence32(SmallVectorImpl<SDValue> &Elts,
+ llvm::SelectionDAG *CurDAG,
+ const SDLoc &DL) {
+ unsigned DstRegClass;
+ EVT DstTy;
+ switch (Elts.size()) {
+ case 8:
+ DstRegClass = AMDGPU::VReg_256RegClassID;
+ DstTy = MVT::v8i32;
+ break;
+ case 4:
+ DstRegClass = AMDGPU::VReg_128RegClassID;
+ DstTy = MVT::v4i32;
+ break;
+ case 2:
+ DstRegClass = AMDGPU::VReg_64RegClassID;
+ DstTy = MVT::v2i32;
+ break;
+ default:
+ llvm_unreachable("unhandled Reg sequence size");
+ }
+
+ SmallVector<SDValue, 17> Ops;
+ Ops.push_back(CurDAG->getTargetConstant(DstRegClass, DL, MVT::i32));
+ for (unsigned i = 0; i < Elts.size(); ++i) {
+ Ops.push_back(Elts[i]);
+ Ops.push_back(CurDAG->getTargetConstant(
+ SIRegisterInfo::getSubRegFromChannel(i), DL, MVT::i32));
+ }
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, DstTy, Ops);
+}
+
+static MachineSDNode *buildRegSequence16(SmallVectorImpl<SDValue> &Elts,
+ llvm::SelectionDAG *CurDAG,
+ const SDLoc &DL) {
+ SmallVector<SDValue, 8> PackedElts;
+ assert("unhandled Reg sequence size" &&
+ (Elts.size() == 8 || Elts.size() == 16));
+
+ // Pack 16-bit elements in pairs into 32-bit register. If both elements are
+ // unpacked from 32-bit source use it, otherwise pack them using v_perm.
+ for (unsigned i = 0; i < Elts.size(); i += 2) {
+ SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
+ SDValue HiSrc;
+ if (isExtractHiElt(Elts[i + 1], HiSrc) && LoSrc == HiSrc) {
+ PackedElts.push_back(HiSrc);
+ } else {
+ SDValue PackLoLo = CurDAG->getTargetConstant(0x05040100, DL, MVT::i32);
+ MachineSDNode *Packed =
+ CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64, DL, MVT::i32,
+ {Elts[i + 1], Elts[i], PackLoLo});
+ PackedElts.push_back(SDValue(Packed, 0));
+ }
+ }
+
+ return buildRegSequence32(PackedElts, CurDAG, DL);
+}
+
+static MachineSDNode *buildRegSequence(SmallVectorImpl<SDValue> &Elts,
+ llvm::SelectionDAG *CurDAG,
+ const SDLoc &DL, unsigned ElementSize) {
+ if (ElementSize == 16)
+ return buildRegSequence16(Elts, CurDAG, DL);
+ if (ElementSize == 32)
+ return buildRegSequence32(Elts, CurDAG, DL);
+ llvm_unreachable("Unhandled element size");
+}
+
+static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods,
+ SmallVectorImpl<SDValue> &Elts, SDValue &Src,
+ llvm::SelectionDAG *CurDAG, const SDLoc &DL,
+ unsigned ElementSize) {
+ if (ModOpcode == ISD::FNEG) {
+ Mods |= SISrcMods::NEG;
+ // Check if all elements also have abs modifier
+ SmallVector<SDValue, 8> NegAbsElts;
+ for (auto El : Elts) {
+ if (El.getOpcode() != ISD::FABS)
+ break;
+ NegAbsElts.push_back(El->getOperand(0));
+ }
+ if (Elts.size() != NegAbsElts.size()) {
+ // Neg
+ Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0);
+ } else {
+ // Neg and Abs
+ Mods |= SISrcMods::NEG_HI;
+ Src = SDValue(buildRegSequence(NegAbsElts, CurDAG, DL, ElementSize), 0);
+ }
+ } else {
+ assert(ModOpcode == ISD::FABS);
+ // Abs
+ Mods |= SISrcMods::NEG_HI;
+ Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0);
+ }
+}
+
+// Check all f16 elements for modifiers while looking through b32 and v2b16
+// build vector, stop if element does not satisfy ModifierCheck.
+static void
+checkWMMAElementsModifiersF16(BuildVectorSDNode *BV,
+ std::function<bool(SDValue)> ModifierCheck) {
+ for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
+ if (auto *F16Pair =
+ dyn_cast<BuildVectorSDNode>(stripBitcast(BV->getOperand(i)))) {
+ for (unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
+ SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
+ if (!ModifierCheck(ElF16))
+ break;
+ }
+ }
+ }
+}
+
+bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ Src = In;
+ unsigned Mods = SISrcMods::OP_SEL_1;
+
+ // mods are on f16 elements
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
+ SmallVector<SDValue, 8> EltsF16;
+
+ checkWMMAElementsModifiersF16(BV, [&](SDValue Element) -> bool {
+ if (Element.getOpcode() != ISD::FNEG)
+ return false;
+ EltsF16.push_back(Element.getOperand(0));
+ return true;
+ });
+
+ // All elements have neg modifier
+ if (BV->getNumOperands() * 2 == EltsF16.size()) {
+ Src = SDValue(buildRegSequence16(EltsF16, CurDAG, SDLoc(In)), 0);
+ Mods |= SISrcMods::NEG;
+ Mods |= SISrcMods::NEG_HI;
+ }
+ }
+
+ // mods are on v2f16 elements
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
+ SmallVector<SDValue, 8> EltsV2F16;
+ for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
+ SDValue ElV2f16 = stripBitcast(BV->getOperand(i));
+ // Based on first element decide which mod we match, neg or abs
+ if (ElV2f16.getOpcode() != ISD::FNEG)
+ break;
+ EltsV2F16.push_back(ElV2f16.getOperand(0));
+ }
+
+ // All pairs of elements have neg modifier
+ if (BV->getNumOperands() == EltsV2F16.size()) {
+ Src = SDValue(buildRegSequence32(EltsV2F16, CurDAG, SDLoc(In)), 0);
+ Mods |= SISrcMods::NEG;
+ Mods |= SISrcMods::NEG_HI;
+ }
+ }
+
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ Src = In;
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned ModOpcode;
+
+ // mods are on f16 elements
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
+ SmallVector<SDValue, 8> EltsF16;
+ checkWMMAElementsModifiersF16(BV, [&](SDValue ElF16) -> bool {
+ // Based on first element decide which mod we match, neg or abs
+ if (EltsF16.empty())
+ ModOpcode = (ElF16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
+ if (ElF16.getOpcode() != ModOpcode)
+ return false;
+ EltsF16.push_back(ElF16.getOperand(0));
+ return true;
+ });
+
+ // All elements have ModOpcode modifier
+ if (BV->getNumOperands() * 2 == EltsF16.size())
+ selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, CurDAG, SDLoc(In),
+ 16);
+ }
+
+ // mods are on v2f16 elements
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
+ SmallVector<SDValue, 8> EltsV2F16;
+
+ for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
+ SDValue ElV2f16 = stripBitcast(BV->getOperand(i));
+ // Based on first element decide which mod we match, neg or abs
+ if (EltsV2F16.empty())
+ ModOpcode = (ElV2f16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
+ if (ElV2f16->getOpcode() != ModOpcode)
+ break;
+ EltsV2F16.push_back(ElV2f16->getOperand(0));
+ }
+
+ // All elements have ModOpcode modifier
+ if (BV->getNumOperands() == EltsV2F16.size())
+ selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, CurDAG, SDLoc(In),
+ 32);
+ }
+
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ Src = In;
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned ModOpcode;
+ SmallVector<SDValue, 8> EltsF32;
+
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
+ for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
+ SDValue ElF32 = stripBitcast(BV->getOperand(i));
+ // Based on first element decide which mod we match, neg or abs
+ if (EltsF32.empty())
+ ModOpcode = (ElF32.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
+ if (ElF32.getOpcode() != ModOpcode)
+ break;
+ EltsF32.push_back(ElF32.getOperand(0));
+ }
+
+ // All elements had ModOpcode modifier
+ if (BV->getNumOperands() == EltsF32.size())
+ selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, CurDAG, SDLoc(In),
+ 32);
+ }
+
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(SDValue In, SDValue &Src) const {
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
+ BitVector UndefElements;
+ if (SDValue Splat = BV->getSplatValue(&UndefElements))
+ if (isInlineImmediate(Splat.getNode())) {
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat)) {
+ unsigned Imm = C->getAPIntValue().getSExtValue();
+ Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
+ return true;
+ }
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat)) {
+ unsigned Imm = C->getValueAPF().bitcastToAPInt().getSExtValue();
+ Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
+ return true;
+ }
+ llvm_unreachable("unhandled Constant node");
+ }
+ }
+
+ // 16 bit splat
+ SDValue SplatSrc32 = stripBitcast(In);
+ if (auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32)) {
+ if (SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
+ SDValue SplatSrc16 = stripBitcast(Splat32);
+ if (auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16)) {
+ if (SDValue Splat = SplatSrc16BV->getSplatValue()) {
+
+ // f16
+ if (isInlineImmediate(Splat.getNode())) {
+ const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat);
+ int64_t Imm = C->getValueAPF().bitcastToAPInt().getSExtValue();
+ Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i16);
+ return true;
+ }
+
+ // bf16
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat)) {
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ APInt BF16Value = C->getAPIntValue();
+ APInt F32Value = BF16Value.zext(32).shl(16);
+ if (TII->isInlineConstant(F32Value)) {
+ int64_t Imm = F32Value.getSExtValue();
+ Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(SDValue In, SDValue &Src,
+ SDValue &IndexKey) const {
+ unsigned Key = 0;
+ Src = In;
+
+ if (In.getOpcode() == ISD::SRL) {
+ const llvm::SDValue &ShiftSrc = In.getOperand(0);
+ ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+ if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt &&
+ ShiftAmt->getZExtValue() % 8 == 0) {
+ Key = ShiftAmt->getZExtValue() / 8;
+ Src = ShiftSrc;
+ }
+ }
+
+ IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src,
+ SDValue &IndexKey) const {
+ unsigned Key = 0;
+ Src = In;
+
+ if (In.getOpcode() == ISD::SRL) {
+ const llvm::SDValue &ShiftSrc = In.getOperand(0);
+ ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+ if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt &&
+ ShiftAmt->getZExtValue() == 16) {
+ Key = 1;
+ Src = ShiftSrc;
+ }
+ }
+
+ IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
Src = In;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 8645490f0b16..3b42d88df0c2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -240,6 +240,16 @@ private:
bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
+ bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const;
+ bool SelectWMMAModsF16Neg(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const;
+ bool SelectWMMAVISrc(SDValue In, SDValue &Src) const;
+
+ bool SelectSWMMACIndex8(SDValue In, SDValue &Src, SDValue &IndexKey) const;
+ bool SelectSWMMACIndex16(SDValue In, SDValue &Src, SDValue &IndexKey) const;
+
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 55d95154c758..2af53a664ff1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -577,6 +577,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN});
setMaxAtomicSizeInBitsSupported(64);
+ setMaxDivRemBitWidthSupported(64);
}
bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index fdee74d58d26..f255d098b631 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3956,6 +3956,219 @@ AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
}};
}
+static Register buildRegSequence(SmallVectorImpl<Register> &Elts,
+ MachineInstr *InsertPt,
+ MachineRegisterInfo &MRI) {
+ const TargetRegisterClass *DstRegClass;
+ switch (Elts.size()) {
+ case 8:
+ DstRegClass = &AMDGPU::VReg_256RegClass;
+ break;
+ case 4:
+ DstRegClass = &AMDGPU::VReg_128RegClass;
+ break;
+ case 2:
+ DstRegClass = &AMDGPU::VReg_64RegClass;
+ break;
+ default:
+ llvm_unreachable("unhandled Reg sequence size");
+ }
+
+ MachineIRBuilder B(*InsertPt);
+ auto MIB = B.buildInstr(AMDGPU::REG_SEQUENCE)
+ .addDef(MRI.createVirtualRegister(DstRegClass));
+ for (unsigned i = 0; i < Elts.size(); ++i) {
+ MIB.addReg(Elts[i]);
+ MIB.addImm(SIRegisterInfo::getSubRegFromChannel(i));
+ }
+ return MIB->getOperand(0).getReg();
+}
+
+static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods,
+ SmallVectorImpl<Register> &Elts, Register &Src,
+ MachineInstr *InsertPt,
+ MachineRegisterInfo &MRI) {
+ if (ModOpcode == TargetOpcode::G_FNEG) {
+ Mods |= SISrcMods::NEG;
+ // Check if all elements also have abs modifier
+ SmallVector<Register, 8> NegAbsElts;
+ for (auto El : Elts) {
+ Register FabsSrc;
+ if (!mi_match(El, MRI, m_GFabs(m_Reg(FabsSrc))))
+ break;
+ NegAbsElts.push_back(FabsSrc);
+ }
+ if (Elts.size() != NegAbsElts.size()) {
+ // Neg
+ Src = buildRegSequence(Elts, InsertPt, MRI);
+ } else {
+ // Neg and Abs
+ Mods |= SISrcMods::NEG_HI;
+ Src = buildRegSequence(NegAbsElts, InsertPt, MRI);
+ }
+ } else {
+ assert(ModOpcode == TargetOpcode::G_FABS);
+ // Abs
+ Mods |= SISrcMods::NEG_HI;
+ Src = buildRegSequence(Elts, InsertPt, MRI);
+ }
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(MachineOperand &Root) const {
+ Register Src = Root.getReg();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned ModOpcode;
+ SmallVector<Register, 8> EltsF32;
+
+ if (GBuildVector *BV = dyn_cast<GBuildVector>(MRI->getVRegDef(Src))) {
+ for (unsigned i = 0; i < BV->getNumSources(); ++i) {
+ MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
+ // Based on first element decide which mod we match, neg or abs
+ if (EltsF32.empty())
+ ModOpcode = (ElF32->getOpcode() == AMDGPU::G_FNEG) ? AMDGPU::G_FNEG
+ : AMDGPU::G_FABS;
+ if (ElF32->getOpcode() != ModOpcode)
+ break;
+ EltsF32.push_back(ElF32->getOperand(1).getReg());
+ }
+
+ // All elements had ModOpcode modifier
+ if (BV->getNumSources() == EltsF32.size()) {
+ selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, Root.getParent(),
+ *MRI);
+ }
+ }
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectWMMAModsF16Neg(MachineOperand &Root) const {
+ Register Src = Root.getReg();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ SmallVector<Register, 8> EltsV2F16;
+
+ if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) {
+ for (unsigned i = 0; i < CV->getNumSources(); ++i) {
+ Register FNegSrc;
+ if (!mi_match(CV->getSourceReg(i), *MRI, m_GFNeg(m_Reg(FNegSrc))))
+ break;
+ EltsV2F16.push_back(FNegSrc);
+ }
+
+ // All elements had ModOpcode modifier
+ if (CV->getNumSources() == EltsV2F16.size()) {
+ Mods |= SISrcMods::NEG;
+ Mods |= SISrcMods::NEG_HI;
+ Src = buildRegSequence(EltsV2F16, Root.getParent(), *MRI);
+ }
+ }
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(MachineOperand &Root) const {
+ Register Src = Root.getReg();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned ModOpcode;
+ SmallVector<Register, 8> EltsV2F16;
+
+ if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) {
+ for (unsigned i = 0; i < CV->getNumSources(); ++i) {
+ MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
+ // Based on first element decide which mod we match, neg or abs
+ if (EltsV2F16.empty())
+ ModOpcode = (ElV2F16->getOpcode() == AMDGPU::G_FNEG) ? AMDGPU::G_FNEG
+ : AMDGPU::G_FABS;
+ if (ElV2F16->getOpcode() != ModOpcode)
+ break;
+ EltsV2F16.push_back(ElV2F16->getOperand(1).getReg());
+ }
+
+ // All elements had ModOpcode modifier
+ if (CV->getNumSources() == EltsV2F16.size()) {
+ MachineIRBuilder B(*Root.getParent());
+ selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, Root.getParent(),
+ *MRI);
+ }
+ }
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectWMMAVISrc(MachineOperand &Root) const {
+ std::optional<FPValueAndVReg> FPValReg;
+ if (mi_match(Root.getReg(), *MRI, m_GFCstOrSplat(FPValReg))) {
+ if (TII.isInlineConstant(FPValReg->Value.bitcastToAPInt())) {
+ return {{[=](MachineInstrBuilder &MIB) {
+ MIB.addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
+ }}};
+ }
+ // Non-inlineable splat floats should not fall-through for integer immediate
+ // checks.
+ return {};
+ }
+
+ APInt ICst;
+ if (mi_match(Root.getReg(), *MRI, m_ICstOrSplat(ICst))) {
+ if (TII.isInlineConstant(ICst)) {
+ return {
+ {[=](MachineInstrBuilder &MIB) { MIB.addImm(ICst.getSExtValue()); }}};
+ }
+ }
+
+ return {};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSWMMACIndex8(MachineOperand &Root) const {
+ Register Src =
+ getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg();
+ unsigned Key = 0;
+
+ Register ShiftSrc;
+ std::optional<ValueAndVReg> ShiftAmt;
+ if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) &&
+ MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
+ ShiftAmt->Value.getZExtValue() % 8 == 0) {
+ Key = ShiftAmt->Value.getZExtValue() / 8;
+ Src = ShiftSrc;
+ }
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSWMMACIndex16(MachineOperand &Root) const {
+
+ Register Src =
+ getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg();
+ unsigned Key = 0;
+
+ Register ShiftSrc;
+ std::optional<ValueAndVReg> ShiftAmt;
+ if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) &&
+ MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
+ ShiftAmt->Value.getZExtValue() == 16) {
+ Src = ShiftSrc;
+ Key = 1;
+ }
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key
+ }};
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
Register Src;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 12ea46c2895b..ef7630f137ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -200,6 +200,19 @@ private:
selectWMMAOpSelVOP3PMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectWMMAModsF32NegAbs(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectWMMAModsF16Neg(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectWMMAModsF16NegAbs(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectWMMAVISrc(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectSWMMACIndex8(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectSWMMACIndex16(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
selectVOP3OpSelMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 8e74d4c0e945..17ffb7ec988f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4178,10 +4178,45 @@ bool AMDGPULegalizerInfo::loadInputValue(
Register DstReg, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
- const ArgDescriptor *Arg;
+ const ArgDescriptor *Arg = nullptr;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
- std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
+
+ CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
+ const ArgDescriptor WorkGroupIDX =
+ ArgDescriptor::createRegister(AMDGPU::TTMP9);
+ // If GridZ is not programmed in an entry function then the hardware will set
+ // it to all zeros, so there is no need to mask the GridY value in the low
+ // order bits.
+ const ArgDescriptor WorkGroupIDY = ArgDescriptor::createRegister(
+ AMDGPU::TTMP7,
+ AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
+ const ArgDescriptor WorkGroupIDZ =
+ ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
+ if (ST.hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
+ switch (ArgType) {
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
+ Arg = &WorkGroupIDX;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
+ Arg = &WorkGroupIDY;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
+ Arg = &WorkGroupIDZ;
+ ArgRC = &AMDGPU::SReg_32RegClass;
+ ArgTy = LLT::scalar(32);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Arg)
+ std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
if (!Arg) {
if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) {
@@ -6848,6 +6883,21 @@ bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
+ if (!ST.hasArchitectedSGPRs())
+ return false;
+ LLT S32 = LLT::scalar(32);
+ Register DstReg = MI.getOperand(0).getReg();
+ auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8));
+ auto LSB = B.buildConstant(S32, 25);
+ auto Width = B.buildConstant(S32, 5);
+ B.buildUbfx(DstReg, TTMP8, LSB, Width);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &B = Helper.MIRBuilder;
@@ -6970,6 +7020,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_workgroup_id_z:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_wave_id:
+ return legalizeWaveID(MI, B);
case Intrinsic::amdgcn_lds_kernel_id:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
@@ -7134,6 +7186,29 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID);
case Intrinsic::amdgcn_image_bvh_intersect_ray:
return legalizeBVHIntrinsic(MI, B);
+ case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
+ case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: {
+ Register Index = MI.getOperand(5).getReg();
+ LLT S32 = LLT::scalar(32);
+ if (MRI.getType(Index) != S32)
+ MI.getOperand(5).setReg(B.buildAnyExt(S32, Index).getReg(0));
+ return true;
+ }
+ case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
+ case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
+ case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: {
+ Register Index = MI.getOperand(7).getReg();
+ LLT S32 = LLT::scalar(32);
+ if (MRI.getType(Index) != S32)
+ MI.getOperand(7).setReg(B.buildAnyExt(S32, Index).getReg(0));
+ return true;
+ }
case Intrinsic::amdgcn_fmed3: {
GISelChangeObserver &Observer = Helper.Observer;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 56aabd4f6ab7..ecbe42681c66 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -212,6 +212,7 @@ public:
bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeImageIntrinsic(
MachineInstr &MI, MachineIRBuilder &B,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 5e73411cae9b..c1b244f50d93 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -521,10 +521,18 @@ static Value *promoteAllocaUserToVector(
// For memset, we don't need to know the previous value because we
// currently only allow memsets that cover the whole alloca.
Value *Elt = MSI->getOperand(1);
- if (DL.getTypeStoreSize(VecEltTy) > 1) {
- Value *EltBytes =
- Builder.CreateVectorSplat(DL.getTypeStoreSize(VecEltTy), Elt);
- Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
+ const unsigned BytesPerElt = DL.getTypeStoreSize(VecEltTy);
+ if (BytesPerElt > 1) {
+ Value *EltBytes = Builder.CreateVectorSplat(BytesPerElt, Elt);
+
+ // If the element type of the vector is a pointer, we need to first cast
+ // to an integer, then use a PtrCast.
+ if (VecEltTy->isPointerTy()) {
+ Type *PtrInt = Builder.getIntNTy(BytesPerElt * 8);
+ Elt = Builder.CreateBitCast(EltBytes, PtrInt);
+ Elt = Builder.CreateIntToPtr(Elt, VecEltTy);
+ } else
+ Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
}
return Builder.CreateVectorSplat(VectorTy->getElementCount(), Elt);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index bdd4e891f158..09fac963d222 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4505,6 +4505,22 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
+ case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
+ case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
+ case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
+ case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
+ case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
+ case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
+ case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
+ case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
+ case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
+ case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_log:
case Intrinsic::amdgcn_exp2:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 67263f23b983..bb1c6b733729 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -414,6 +414,22 @@ def : SourceOfDivergence<int_amdgcn_wmma_f16_16x16x16_f16>;
def : SourceOfDivergence<int_amdgcn_wmma_bf16_16x16x16_bf16>;
def : SourceOfDivergence<int_amdgcn_wmma_i32_16x16x16_iu8>;
def : SourceOfDivergence<int_amdgcn_wmma_i32_16x16x16_iu4>;
+def : SourceOfDivergence<int_amdgcn_wmma_f32_16x16x16_fp8_fp8>;
+def : SourceOfDivergence<int_amdgcn_wmma_f32_16x16x16_fp8_bf8>;
+def : SourceOfDivergence<int_amdgcn_wmma_f32_16x16x16_bf8_fp8>;
+def : SourceOfDivergence<int_amdgcn_wmma_f32_16x16x16_bf8_bf8>;
+def : SourceOfDivergence<int_amdgcn_wmma_i32_16x16x32_iu4>;
+def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_f16>;
+def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_bf16>;
+def : SourceOfDivergence<int_amdgcn_swmmac_f16_16x16x32_f16>;
+def : SourceOfDivergence<int_amdgcn_swmmac_bf16_16x16x32_bf16>;
+def : SourceOfDivergence<int_amdgcn_swmmac_i32_16x16x32_iu8>;
+def : SourceOfDivergence<int_amdgcn_swmmac_i32_16x16x32_iu4>;
+def : SourceOfDivergence<int_amdgcn_swmmac_i32_16x16x64_iu4>;
+def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_fp8_fp8>;
+def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_fp8_bf8>;
+def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_bf8_fp8>;
+def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_bf8_bf8>;
def : SourceOfDivergence<int_amdgcn_global_load_tr>;
// The dummy boolean output is divergent from the IR's perspective,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 489cf85693ed..9ab657f4e7bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -151,6 +151,8 @@ public:
ImmTyOpSelHi,
ImmTyNegLo,
ImmTyNegHi,
+ ImmTyIndexKey8bit,
+ ImmTyIndexKey16bit,
ImmTyDPP8,
ImmTyDppCtrl,
ImmTyDppRowMask,
@@ -383,6 +385,8 @@ public:
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isLDS() const { return isImmTy(ImmTyLDS); }
bool isCPol() const { return isImmTy(ImmTyCPol); }
+ bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
+ bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
@@ -656,6 +660,14 @@ public:
return isVISrcF16() || isVISrcB32();
}
+ bool isVISrc_64F16() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
+ }
+
+ bool isVISrc_64B32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
+ }
+
bool isVISrc_64B64() const {
return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
}
@@ -672,6 +684,14 @@ public:
return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
}
+ bool isVISrc_256B32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
+ }
+
+ bool isVISrc_256F32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
+ }
+
bool isVISrc_256B64() const {
return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
}
@@ -1047,6 +1067,8 @@ public:
case ImmTyOffset1: OS << "Offset1"; break;
case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
case ImmTyCPol: OS << "CPol"; break;
+ case ImmTyIndexKey8bit: OS << "index_key"; break;
+ case ImmTyIndexKey16bit: OS << "index_key"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
case ImmTyFORMAT: OS << "FORMAT"; break;
@@ -1604,6 +1626,11 @@ public:
ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
ParseStatus parseVReg32OrOff(OperandVector &Operands);
+ ParseStatus tryParseIndexKey(OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy);
+ ParseStatus parseIndexKey8bit(OperandVector &Operands);
+ ParseStatus parseIndexKey16bit(OperandVector &Operands);
+
ParseStatus parseDfmtNfmt(int64_t &Format);
ParseStatus parseUfmt(int64_t &Format);
ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
@@ -1784,6 +1811,8 @@ public:
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
+ void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
+
void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
@@ -3500,6 +3529,9 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
return !isInlineConstant(Inst, OpIdx);
} else if (MO.isReg()) {
auto Reg = MO.getReg();
+ if (!Reg) {
+ return false;
+ }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
auto PReg = mc2PseudoReg(Reg);
return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
@@ -4364,7 +4396,11 @@ bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
uint64_t TSFlags = MII.get(Opc).TSFlags;
// v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
- if (!(TSFlags & SIInstrFlags::IsDOT))
+ // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
+ // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
+ // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
+ if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
+ !(TSFlags & SIInstrFlags::IsSWMMAC))
return true;
int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
@@ -6465,6 +6501,33 @@ bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
return true;
}
+ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy) {
+ const char *Pref = "index_key";
+ int64_t ImmVal = 0;
+ SMLoc Loc = getLoc();
+ auto Res = parseIntWithPrefix(Pref, ImmVal);
+ if (!Res.isSuccess())
+ return Res;
+
+ if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
+ return Error(Loc, Twine("out of range ", StringRef(Pref)));
+
+ if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
+ return Error(Loc, Twine("out of range ", StringRef(Pref)));
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
+ return ParseStatus::Success;
+}
+
+ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
+ return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
+}
+
+ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
+ return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
+}
+
// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
// values to live in a joint format operand in the MCInst encoding.
ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
@@ -8303,12 +8366,20 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
- Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
+ Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
Inst.addOperand(Inst.getOperand(0));
}
- if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
+ // Adding vdst_in operand is already covered for these DPP instructions in
+ // cvtVOP3DPP.
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
+ !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
assert(!IsPacked);
Inst.addOperand(Inst.getOperand(0));
}
@@ -8329,10 +8400,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
}
int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
- if (NegLoIdx != -1) {
+ if (NegLoIdx != -1)
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
+
+ int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
+ if (NegHiIdx != -1)
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
- }
const int Ops[] = { AMDGPU::OpName::src0,
AMDGPU::OpName::src1,
@@ -8352,11 +8425,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
if (OpSelHiIdx != -1)
OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
- if (NegLoIdx != -1) {
- int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
+ if (NegLoIdx != -1)
NegLo = Inst.getOperand(NegLoIdx).getImm();
+
+ if (NegHiIdx != -1)
NegHi = Inst.getOperand(NegHiIdx).getImm();
- }
for (int J = 0; J < 3; ++J) {
int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
@@ -8392,6 +8465,43 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
cvtVOP3P(Inst, Operands, OptIdx);
}
+static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
+ unsigned i, unsigned Opc, unsigned OpName) {
+ if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
+ ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
+ else
+ ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
+}
+
+void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
+ unsigned Opc = Inst.getOpcode();
+
+ ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
+ addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
+ addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
+ ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
+ ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
+
+ OptionalImmIndexMap OptIdx;
+ for (unsigned i = 5; i < Operands.size(); ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ OptIdx[Op.getImmTy()] = i;
+ }
+
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
+ addOptionalImmOperand(Inst, Operands, OptIdx,
+ AMDGPUOperand::ImmTyIndexKey8bit);
+
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
+ addOptionalImmOperand(Inst, Operands, OptIdx,
+ AMDGPUOperand::ImmTyIndexKey16bit);
+
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
+
+ cvtVOP3P(Inst, Operands, OptIdx);
+}
+
//===----------------------------------------------------------------------===//
// VOPD
//===----------------------------------------------------------------------===//
@@ -8770,6 +8880,22 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
}
}
+ int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+ if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
+ Inst.addOperand(Inst.getOperand(0));
+ }
+
+ bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
+ if (IsVOP3CvtSrDpp) {
+ if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
+ Inst.addOperand(MCOperand::createImm(0));
+ Inst.addOperand(MCOperand::createReg(0));
+ }
+ }
+
auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
MCOI::TIED_TO);
if (TiedTo != -1) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 86096b0d80b4..a9968cfe25b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -260,8 +260,12 @@ DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32)
@@ -704,6 +708,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableWMMAGFX1264, MI, QW, Address, CS);
} while (false);
if (Res && AMDGPU::isMAC(MI.getOpcode())) {
@@ -712,6 +720,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
AMDGPU::OpName::src2_modifiers);
}
+ if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) {
+ // Insert dummy unused src2_modifiers.
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src2_modifiers);
+ }
+
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
!AMDGPU::hasGDS(STI)) {
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
@@ -942,6 +957,7 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
// first add optional MI operands to check FI
DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
+
if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
convertVOP3PDPPInst(MI);
} else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
@@ -951,6 +967,15 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);
+ int VDstInIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+ if (VDstInIdx != -1)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+
+ if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
@@ -977,6 +1002,15 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);
+ int VDstInIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+ if (VDstInIdx != -1)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+
+ if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index b6e4e65ff5b0..08bef7ad3002 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1716,14 +1716,14 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
}
bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
- if (!SIInstrInfo::isWMMA(*MI))
+ if (!SIInstrInfo::isWMMA(*MI) && !SIInstrInfo::isSWMMAC(*MI))
return false;
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- auto IsHazardFn = [MI, TII, TRI](const MachineInstr &I) {
- if (!SIInstrInfo::isWMMA(I))
+ auto IsHazardFn = [MI, TII, TRI, this](const MachineInstr &I) {
+ if (!SIInstrInfo::isWMMA(I) && !SIInstrInfo::isSWMMAC(I))
return false;
// Src0 or Src1 of the current wmma instruction overlaps with the dest of
@@ -1753,6 +1753,7 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
const MachineOperand *Src2Mods =
TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers);
const bool NoSrc2Mods =
+ !Src2Mods ||
(Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0;
// Exception: there is no hazard if the wmma instructions are of the same
// type and there is no input modifier on src2 of the current instruction.
@@ -1760,6 +1761,18 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
TII->pseudoToMCOpcode(MI->getOpcode())));
}
+ // GFX12+ allows overlap of matrix C with PrevDstReg (hardware will stall)
+ // but Index can't overlap with PrevDstReg.
+ if (AMDGPU::isGFX12Plus(ST)) {
+ if (SIInstrInfo::isSWMMAC(*MI)) {
+ const Register CurIndex =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
+ if (TRI->regsOverlap(PrevDstReg, CurIndex))
+ return true;
+ }
+ return false;
+ }
+
return false;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index e73e53aa270f..abfa4a3531e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1275,6 +1275,23 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
(ModIdx != -1) ? MI->getOperand(ModIdx).getImm() : DefaultValue;
}
+ // Print three values of neg/opsel for wmma instructions (prints 0 when there
+ // is no src_modifier operand instead of not printing anything).
+ if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsSWMMAC ||
+ MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsWMMA) {
+ NumOps = 0;
+ int DefaultValue = Mod == SISrcMods::OP_SEL_1;
+ for (int OpName :
+ {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers}) {
+ int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
+ if (Idx != -1)
+ Ops[NumOps++] = MI->getOperand(Idx).getImm();
+ else
+ Ops[NumOps++] = DefaultValue;
+ }
+ }
+
const bool HasDstSel =
NumOps > 0 &&
Mod == SISrcMods::OP_SEL_0 &&
@@ -1305,6 +1322,16 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Opc = MI->getOpcode();
+ if (isCvt_F32_Fp8_Bf8_e64(Opc)) {
+ auto SrcMod =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ unsigned Mod = MI->getOperand(SrcMod).getImm();
+ unsigned Index0 = !!(Mod & SISrcMods::OP_SEL_0);
+ unsigned Index1 = !!(Mod & SISrcMods::OP_SEL_1);
+ if (Index0 || Index1)
+ O << " op_sel:[" << Index0 << ',' << Index1 << ']';
+ return;
+ }
if (isPermlane16(Opc)) {
auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
@@ -1336,6 +1363,26 @@ void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
}
+void AMDGPUInstPrinter::printIndexKey8bit(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
+ if (Imm == 0)
+ return;
+
+ O << " index_key:" << Imm;
+}
+
+void AMDGPUInstPrinter::printIndexKey16bit(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
+ if (Imm == 0)
+ return;
+
+ O << " index_key:" << Imm;
+}
+
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index e3958f88277d..e91ff86b219a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -139,6 +139,10 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printNegHi(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printIndexKey8bit(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printIndexKey16bit(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpAttr(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
index 8ab66d4fd5b8..19596d53b453 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -167,6 +167,9 @@ enum : uint64_t {
// ds_gws_* instructions.
GWS = UINT64_C(1) << 62,
+
+ // Is a SWMMAC instruction.
+ IsSWMMAC = UINT64_C(1) << 63,
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 2862a7787e75..a812cdc61500 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -208,6 +208,7 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
assert(Old.isReg() && Fold.isImm());
if (!(TSFlags & SIInstrFlags::IsPacked) || (TSFlags & SIInstrFlags::IsMAI) ||
+ (TSFlags & SIInstrFlags::IsWMMA) || (TSFlags & SIInstrFlags::IsSWMMAC) ||
(ST->hasDOTOpSelHazard() && (TSFlags & SIInstrFlags::IsDOT)))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cf947dccafac..d6bf0d8cb2ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2072,11 +2072,45 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
const SIMachineFunctionInfo &MFI,
EVT VT,
AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
- const ArgDescriptor *Reg;
+ const ArgDescriptor *Reg = nullptr;
const TargetRegisterClass *RC;
LLT Ty;
- std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
+ CallingConv::ID CC = DAG.getMachineFunction().getFunction().getCallingConv();
+ const ArgDescriptor WorkGroupIDX =
+ ArgDescriptor::createRegister(AMDGPU::TTMP9);
+ // If GridZ is not programmed in an entry function then the hardware will set
+ // it to all zeros, so there is no need to mask the GridY value in the low
+ // order bits.
+ const ArgDescriptor WorkGroupIDY = ArgDescriptor::createRegister(
+ AMDGPU::TTMP7,
+ AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
+ const ArgDescriptor WorkGroupIDZ =
+ ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
+ if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
+ switch (PVID) {
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
+ Reg = &WorkGroupIDX;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
+ Reg = &WorkGroupIDY;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
+ Reg = &WorkGroupIDZ;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Reg)
+ std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
if (!Reg) {
if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) {
// It's possible for a kernarg intrinsic call to appear in a kernel with
@@ -2505,28 +2539,24 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
}
}
- if (Info.hasWorkGroupIDX()) {
- Register Reg = Info.addWorkGroupIDX(HasArchitectedSGPRs);
- if (!HasArchitectedSGPRs)
+ if (!HasArchitectedSGPRs) {
+ if (Info.hasWorkGroupIDX()) {
+ Register Reg = Info.addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDY()) {
- Register Reg = Info.addWorkGroupIDY(HasArchitectedSGPRs);
- if (!HasArchitectedSGPRs)
+ if (Info.hasWorkGroupIDY()) {
+ Register Reg = Info.addWorkGroupIDY();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDZ()) {
- Register Reg = Info.addWorkGroupIDZ(HasArchitectedSGPRs);
- if (!HasArchitectedSGPRs)
+ if (Info.hasWorkGroupIDZ()) {
+ Register Reg = Info.addWorkGroupIDZ();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
-
- CCInfo.AllocateReg(Reg);
+ CCInfo.AllocateReg(Reg);
+ }
}
if (Info.hasWorkGroupInfo()) {
@@ -7890,6 +7920,17 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
return Loads[0];
}
+SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const {
+ // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
+ if (!Subtarget->hasArchitectedSGPRs())
+ return {};
+ SDLoc SL(Op);
+ MVT VT = MVT::i32;
+ SDValue TTMP8 = DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP8, VT);
+ return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8,
+ DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
+}
+
SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
unsigned Dim,
const ArgDescriptor &Arg) const {
@@ -8060,6 +8101,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_workgroup_id_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_wave_id:
+ return lowerWaveID(DAG, Op);
case Intrinsic::amdgcn_lds_kernel_id: {
if (MFI->isEntryFunction())
return getLDSKernelId(DAG, DL);
@@ -8242,6 +8285,36 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SIInstrInfo::MO_ABS32_LO);
return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
}
+ case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
+ case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: {
+ if (Op.getOperand(4).getValueType() == MVT::i32)
+ return SDValue();
+
+ SDLoc SL(Op);
+ auto IndexKeyi32 = DAG.getAnyExtOrTrunc(Op.getOperand(4), SL, MVT::i32);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
+ Op.getOperand(3), IndexKeyi32);
+ }
+ case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
+ case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
+ case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: {
+ if (Op.getOperand(6).getValueType() == MVT::i32)
+ return SDValue();
+
+ SDLoc SL(Op);
+ auto IndexKeyi32 = DAG.getAnyExtOrTrunc(Op.getOperand(6), SL, MVT::i32);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, Op.getValueType(),
+ {Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
+ Op.getOperand(3), Op.getOperand(4), Op.getOperand(5),
+ IndexKeyi32, Op.getOperand(7)});
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
index d66ba0b59ba9..e436c23af5bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -80,6 +80,7 @@ private:
SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
unsigned NewOpcode) const;
+ SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const;
SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim,
const ArgDescriptor &ArgDesc) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 1b66d163714f..ab536f8f49d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -161,6 +161,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// ds_gws_* instructions.
field bit GWS = 0;
+ // This bit indicates that this is one of SWMMAC instructions.
+ field bit IsSWMMAC = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -248,6 +251,8 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{62} = GWS;
+ let TSFlags{63} = IsSWMMAC;
+
let SchedRW = [Write32Bit];
let AsmVariantName = AMDGPUAsmVariants.Default;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index fc85b089aa47..1c9dacc09f81 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -802,6 +802,14 @@ public:
return isMFMA(MI) || isWMMA(MI);
}
+ static bool isSWMMAC(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
+ }
+
+ bool isSWMMAC(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
+ }
+
bool isDOT(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index a6820544f4b4..45be81950aa3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1088,6 +1088,9 @@ def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
+def IndexKey16bit : CustomOperand<i32, 1>;
+def IndexKey8bit : CustomOperand<i32, 1>;
+
def dpp8 : CustomOperand<i32, 0, "DPP8">;
def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">;
@@ -1344,6 +1347,13 @@ def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
+def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
+def WMMAModsF16Neg : ComplexPattern<untyped, 2, "SelectWMMAModsF16Neg">;
+def WMMAModsF16NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">;
+def WMMAVISrc : ComplexPattern<untyped, 1, "SelectWMMAVISrc">;
+def SWMMACIndex8 : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">;
+def SWMMACIndex16 : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">;
+
def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
@@ -1684,8 +1694,9 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if(HasOMod,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
clampmod0:$clamp, omod0:$omod),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- clampmod0:$clamp))
+ !if (HasClamp,
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod0:$clamp),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
/* else */,
// VOP1 without modifiers
!if (HasClamp,
@@ -2278,6 +2289,9 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field bit IsDOT = 0;
field bit IsSingle = 0;
field bit IsWMMA = 0;
+ field bit IsSWMMAC = 0;
+
+ field bit IsFP8 = 0;
field bit HasDst = !ne(DstVT.Value, untyped.Value);
field bit HasDst32 = HasDst;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9ff66a094f99..0336ec4985ea 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -751,35 +751,21 @@ public:
}
// Add system SGPRs.
- Register addWorkGroupIDX(bool HasArchitectedSGPRs) {
- Register Reg =
- HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP9 : getNextSystemSGPR();
- ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg);
- if (!HasArchitectedSGPRs)
- NumSystemSGPRs += 1;
-
+ Register addWorkGroupIDX() {
+ ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
+ NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDX.getRegister();
}
- Register addWorkGroupIDY(bool HasArchitectedSGPRs) {
- Register Reg =
- HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP7 : getNextSystemSGPR();
- unsigned Mask = HasArchitectedSGPRs && hasWorkGroupIDZ() ? 0xffff : ~0u;
- ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg, Mask);
- if (!HasArchitectedSGPRs)
- NumSystemSGPRs += 1;
-
+ Register addWorkGroupIDY() {
+ ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
+ NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDY.getRegister();
}
- Register addWorkGroupIDZ(bool HasArchitectedSGPRs) {
- Register Reg =
- HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP7 : getNextSystemSGPR();
- unsigned Mask = HasArchitectedSGPRs ? 0xffff << 16 : ~0u;
- ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg, Mask);
- if (!HasArchitectedSGPRs)
- NumSystemSGPRs += 1;
-
+ Register addWorkGroupIDZ() {
+ ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
+ NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDZ.getRegister();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index f42af89cf5e6..b3265b73fa7e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1341,9 +1341,14 @@ def VCSrc_v2f16 : RegOrV2F16 <"VS_32", "OPERAND_REG_INLINE_C">;
// VISrc_* Operands with a VGPR or an inline constant
//===----------------------------------------------------------------------===//
+def VISrc_64_f16 : RegOrF16 <"VReg_64", "OPERAND_REG_INLINE_C">;
+def VISrc_64_b32 : RegOrB32 <"VReg_64", "OPERAND_REG_INLINE_C">;
def VISrc_64_f64 : RegOrF64 <"VReg_64", "OPERAND_REG_INLINE_C">;
+def VISrc_128_f16 : RegOrF16 <"VReg_128", "OPERAND_REG_INLINE_C">;
def VISrc_128_b32 : RegOrB32 <"VReg_128", "OPERAND_REG_INLINE_C">;
def VISrc_128_f32 : RegOrF32 <"VReg_128", "OPERAND_REG_INLINE_C">;
+def VISrc_256_b32 : RegOrB32 <"VReg_256", "OPERAND_REG_INLINE_C">;
+def VISrc_256_f32 : RegOrF32 <"VReg_256", "OPERAND_REG_INLINE_C">;
def VISrc_256_f64 : RegOrF64 <"VReg_256", "OPERAND_REG_INLINE_C">;
def VISrc_512_b32 : RegOrB32 <"VReg_512", "OPERAND_REG_INLINE_C">;
def VISrc_512_f32 : RegOrF32 <"VReg_512", "OPERAND_REG_INLINE_C">;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 0bf9452d822e..106fdb19f278 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -529,6 +529,17 @@ bool isPermlane16(unsigned Opc) {
Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
}
+bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
+ return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
+ Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
+}
+
bool isGenericAtomic(unsigned Opc) {
return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index d3f55c792017..11b0bc5c8171 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -535,6 +535,9 @@ bool isPermlane16(unsigned Opc);
LLVM_READNONE
bool isGenericAtomic(unsigned Opc);
+LLVM_READNONE
+bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
+
namespace VOPD {
enum Component : unsigned {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 95a1d8696347..ef652fce6548 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -571,6 +571,7 @@ let SubtargetPredicate = isGFX9Only in {
} // End SubtargetPredicate = isGFX9Only
class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> {
+ let HasExtDPP = 1;
let HasExtSDWA = 1;
let HasExtSDWA9 = 1;
let HasExt = 1;
@@ -599,6 +600,7 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
(inst_sdwa 0, $src, 0, 0, index)
>;
+let SubtargetPredicate = isGFX9Only in {
let OtherPredicates = [HasCvtFP8VOP1Bug] in {
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
(V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
@@ -617,6 +619,7 @@ foreach Index = [1, 2, 3] in {
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
}
+} // End SubtargetPredicate = isGFX9Only
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
@@ -626,11 +629,77 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
(inst_e32 $src))
>;
-foreach Index = [0, -1] in {
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
- V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
- V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+let SubtargetPredicate = isGFX9Only in {
+ foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+ }
+}
+
+
+// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions.
+def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> {
+ let HasOpSel = 1;
+ let HasExtVOP3DPP = 0;
+}
+
+def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]> {
+ let HasOpSel = 1;
+ let HasExtDPP = 1;
+ let HasExtVOP3DPP = 1;
+ let IsFP8 = 1;
+ let HasClamp = 0;
+ let HasOMod = 0;
+ let HasModifiers = 1;
+ let Src1VOP3DPP = Src1RC64;
+}
+
+let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0,
+ SchedRW = [WriteFloatCvt] in {
+ defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+}
+
+class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
+ VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
+ (f32 (node i32:$src, index)),
+ !if (index,
+ (inst_e64 !if(index{0},
+ !if(index{1}, !or(SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1),
+ SRCMODS.OP_SEL_0),
+ !if(index{1}, SRCMODS.OP_SEL_1, 0)),
+ $src, 0),
+ (inst_e32 $src))
+>;
+
+let SubtargetPredicate = isGFX12Plus in {
+ foreach Index = [0, 1, 2, 3] in {
+ def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index,
+ V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>;
+ def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_bf8, Index,
+ V_CVT_F32_BF8_e32, V_CVT_F32_BF8_OP_SEL_e64>;
+ }
+}
+
+class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index,
+ VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
+ (v2f32 (node i32:$src, index)),
+ !if (index,
+ (inst_e64 SRCMODS.OP_SEL_0, $src, 0, 0, SRCMODS.NONE),
+ (inst_e32 $src))
+>;
+
+let SubtargetPredicate = isGFX12Plus in {
+ foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>;
+ def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_OP_SEL_e64>;
+ }
}
let SubtargetPredicate = isGFX10Plus in {
@@ -853,6 +922,20 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;
+// Define VOP1 instructions using the pseudo instruction with its old profile and
+// VOP3 using the OpSel profile for the pseudo instruction.
+defm V_CVT_F32_FP8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06c, "V_CVT_F32_FP8", "v_cvt_f32_fp8">;
+defm V_CVT_F32_FP8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
+
+defm V_CVT_F32_BF8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06d, "V_CVT_F32_BF8", "v_cvt_f32_bf8">;
+defm V_CVT_F32_BF8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
+
+defm V_CVT_PK_F32_FP8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_FP8 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_OP_SEL", "v_cvt_pk_f32_fp8">;
+
+defm V_CVT_PK_F32_BF8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_BF8 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_OP_SEL", "v_cvt_pk_f32_bf8">;
+
defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c,
"V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">;
defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 713b4712d563..14db52210214 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -520,8 +520,26 @@ def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> {
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
VGPR_32:$vdst_in, op_sel0:$op_sel);
+ let InsVOP3DPP = (ins VGPR_32:$old,
+ FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
+ FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
+ VGPR_32:$vdst_in, op_sel0:$op_sel,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+
+ let InsVOP3DPP16 = (ins VGPR_32:$old,
+ FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
+ FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
+ VGPR_32:$vdst_in, op_sel0:$op_sel,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
+ let InsVOP3DPP8 = (ins VGPR_32:$old,
+ FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
+ FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
+ VGPR_32:$vdst_in, op_sel0:$op_sel, dpp8:$dpp8, FI:$fi);
+
let HasClamp = 0;
- let HasExtVOP3DPP = 0;
+ let HasExtVOP3DPP = 1;
}
def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
@@ -530,14 +548,36 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
FP32InputMods:$src2_modifiers, VGPR_32:$src2,
op_sel0:$op_sel);
+ let InsVOP3DPP16 = (ins VGPR_32:$old,
+ FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
+ FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
+ FP32InputMods:$src2_modifiers, VGPR_32:$src2,
+ op_sel0:$op_sel, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
+ let InsVOP3DPP8 = (ins VGPR_32:$old,
+ FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
+ FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
+ FP32InputMods:$src2_modifiers, VGPR_32:$src2,
+ op_sel0:$op_sel, dpp8:$dpp8, FI:$fi);
let HasClamp = 0;
let HasSrc2 = 0;
let HasSrc2Mods = 1;
+ let HasExtVOP3DPP = 1;
+ let HasOpSel = 1;
let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
getAsmVOP3OpSel<3, HasClamp, HasOMod,
HasSrc0FloatMods, HasSrc1FloatMods,
HasSrc2FloatMods>.ret);
- let HasExtVOP3DPP = 0;
+ let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
+ getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
+ HasOMod, 0, 1, HasSrc0FloatMods,
+ HasSrc1FloatMods,
+ HasSrc2FloatMods>.ret>.ret);
+ let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
+ getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
+ HasOMod, 0, 1, HasSrc0FloatMods,
+ HasSrc1FloatMods,
+ HasSrc2FloatMods>.ret>.ret);
}
def IsPow2Plus1: PatLeaf<(i32 imm), [{
@@ -618,13 +658,13 @@ let SubtargetPredicate = HasFP8ConversionInsts, mayRaiseFPException = 0,
class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat<
(i32 (node f32:$src0, f32:$src1, i32:$old, index)),
- (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0))
+ (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, 0)
>;
class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat<
(i32 (node f32:$src0, i32:$src1, i32:$old, index)),
(inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1,
- !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0))
+ !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, 0)
>;
foreach Index = [0, -1] in {
@@ -998,6 +1038,11 @@ defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>;
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
+defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_gfx12<0x369>;
+defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36a>;
+defm V_CVT_SR_FP8_F32 : VOP3Only_Realtriple_gfx12<0x36b>;
+defm V_CVT_SR_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36c>;
+
//===----------------------------------------------------------------------===//
// GFX11, GFX12
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 0c7a08cd4bc9..107b95a9ca8e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -936,16 +936,19 @@ multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator
!cast<Instruction>(NAME # _threeaddr # Suffix)>;
}
- if !eq(Type, WMMAOpSel) then {
- def : WMMAOpSelPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
- } else if !eq(Type, WMMAUIClamp) then {
- def : WMMAUIClampPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
- } else {
- def : WMMARegularPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
+ let SubtargetPredicate = isGFX11Only in {
+ if !eq(Type, WMMAOpSel) then {
+ def : WMMAOpSelPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
+ } else if !eq(Type, WMMAUIClamp) then {
+ def : WMMAUIClampPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
+ } else {
+ def : WMMARegularPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
+ }
}
}
+
let WaveSizePredicate = isWave32 in {
defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_f16", VOP_V8F32_V16F16_V16F16_V8F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>;
defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>;
@@ -969,6 +972,398 @@ let WaveSizePredicate = isWave64 in {
}
+class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
+ bit _IsIU, bit _IsFP8BF8>
+ : VOP3P_Profile<VOPProfile<ArgTy>> {
+ bit IsIU = _IsIU;
+ bit IsFP8BF8 = _IsFP8BF8;
+ bit IsF16BF16 = !not(!or(IsIU, IsFP8BF8));
+
+ int IndexType = _IndexType;
+
+ let IsPacked = 1;
+ let IsWMMA = !not(_IsSWMMAC);
+ let IsSWMMAC = _IsSWMMAC;
+
+ bit IsAB_F16 = !and(IsF16BF16, ArgTy[1].isFP);
+ bit IsAB_BF16 = !and(IsF16BF16, isIntType<ArgTy[1]>.ret);
+ bit IsC_F32 = !or(!eq(ArgTy[3], v8f32), !eq(ArgTy[3], v4f32));
+ bit IsC_BF16 = !or(!eq(ArgTy[3], v8i16), !eq(ArgTy[3], v4i16));
+ bit IsC_F16 = !or(!eq(ArgTy[3], v8f16), !eq(ArgTy[3], v4f16));
+
+ bit NegLo01 = !or(IsF16BF16, IsIU);
+ bit NegLo2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA);
+ bit NegHi01 = IsF16BF16;
+ bit NegHi2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA);
+ bit NegLoAny = !or(NegLo01, NegLo2);
+ bit NegHiAny = !or(NegHi01, NegHi2);
+
+ let DstRC = !cond(!eq(ArgTy[0], v8f32): VDst_256,
+ !eq(ArgTy[0], v8i32): VDst_256,
+ !eq(ArgTy[0], v8f16): VDst_128,
+ !eq(ArgTy[0], v8i16): VDst_128,
+ !eq(ArgTy[0], v4f32): VDst_128,
+ !eq(ArgTy[0], v4i32): VDst_128,
+ !eq(ArgTy[0], v4f16): VDst_64,
+ !eq(ArgTy[0], v4i16): VDst_64);
+ let Src0RC64 = !cond(!eq(ArgTy[1], v8f16): VRegSrc_128,
+ !eq(ArgTy[1], v4f16): VRegSrc_64,
+ !eq(ArgTy[1], v4i16): VRegSrc_64,
+ !eq(ArgTy[1], v8i16): VRegSrc_128,
+ !eq(ArgTy[1], v4i32): VRegSrc_128,
+ !eq(ArgTy[1], v2i32): VRegSrc_64,
+ !eq(ArgTy[1], i32) : VRegSrc_32);
+ let Src1RC64 = !cond(!eq(ArgTy[2], v16f16): VRegSrc_256,
+ !eq(ArgTy[2], v16i16): VRegSrc_256,
+ !eq(ArgTy[2], v8f16): VRegSrc_128,
+ !eq(ArgTy[2], v8i16): VRegSrc_128,
+ !eq(ArgTy[2], v4i32): VRegSrc_128,
+ !eq(ArgTy[1], v4i16): VRegSrc_64,
+ !eq(ArgTy[1], v4f16): VRegSrc_64,
+ !eq(ArgTy[2], v2i32): VRegSrc_64,
+ !eq(ArgTy[2], i32) : VRegSrc_32);
+ let Src2RC64 = !if(IsSWMMAC, DstRC,
+ !cond(!eq(ArgTy[3], v8f32): VISrc_256_f32,
+ !eq(ArgTy[3], v8i32): VISrc_256_b32,
+ !eq(ArgTy[3], v8f16): VISrc_128_f16,
+ !eq(ArgTy[3], v8i16): VISrc_128_f32, // bf16
+ !eq(ArgTy[3], v4f16): VISrc_64_f16,
+ !eq(ArgTy[3], v4i16): VISrc_64_b32,
+ !eq(ArgTy[3], v4i32): VISrc_128_b32,
+ !eq(ArgTy[3], v4f32): VISrc_128_f32));
+
+ // For f16 and bf16 matrices A and B, each element can be modified by
+ // fneg(neg_lo,neg_hi = 1). For iu4 and iu8 matrices A and B neg_lo is
+ // overloaded to mean unsigned/signed: neg_lo = 0 (u4 and u8) unsigned(zext)
+ // neg_lo = 1 (i4 and i8) signed(sext). For f16, bf16 and f32 matrix C each
+ // element can be modified by fneg(neg_lo = 1) or fabs(neg_hi = 1).
+
+ // Opcode | src0/src1 - matrix A/B | src2 - matrix C or Index
+ // ---------------------------------------------------------------------------
+ // wmma f32_f16 | both neg_lo,neg_hi = 1 | neg_lo = 1 neg C(f32)
+ // wmma f32_bf16 | neg A/B (f16 or bf16) | neg_hi = 1 abs C(f32)
+ // ---------------------------------------------------------------------------
+ // wmma f16_f16 | both neg_lo,neg_hi = 1 | neg_lo = 1 neg C(f16 or bf16)
+ // wmma bf16_bf16 | neg A/B (f16 or bf16) | neg_hi = 1 abs C(f16 or bf16)
+ // ---------------------------------------------------------------------------
+ // wmma i32_iu8/iu4 | neg_lo = 0 u4/u8(zext) | not allowed for
+ // | neg_lo = 1 i4/i8(sext) | i32 matrices
+ // ---------------------------------------------------------------------------
+ // wmma f32_fp8/bf8 | not allowed for | neg_lo = 1 neg C(f32)
+ // (4 instructions) | f8 and bf8 matrices | neg_hi = 1 abs C(f32)
+ // ---------------------------------------------------------------------------
+ // swmmac f32_f16 | both neg_lo,neg_hi = 1 | not allowed for sparse matrix
+ // swmmac f32_bf16 | neg A/B (f16 or bf16) | A Index - matrix C is in dst
+ // ---------------------------------------------------------------------------
+ // swmmac f16_f16 | both neg_lo,neg_hi = 1 | not allowed for sparse matrix
+ // swmmac bf16_bf16 | neg A/B (f16 or bf16) | A Index - matrix C is in dst
+ // ---------------------------------------------------------------------------
+ // swmmac i32_iu8/iu4 | neg_lo = 0 u4/u8(zext) | not allowed for sparse matrix
+ // | neg_lo = 1 i4/i8(sext) | A Index - matrix C is in dst
+ // ---------------------------------------------------------------------------
+ // swmmac f32_fp8/bf8 | not allowed for | not allowed for sparse matrix
+ // (4 instructions) | f8 and bf8 matrices | A Index - matrix C is in dst
+
+ // pseudo
+
+ // fp8bf8 wmmas don't use src (0 and 1) modifiers, iu use neg_lo, f16 and bf16
+ // use neg_lo and neg_hi. iu wmmas (C is i32) don't use src 2 modifiers,
+ // remaining wmmas(f16, bf16 and f8bf8) use neg_lo and neg_hi for C (C is f32
+ // f16 or bf16). swmmac use index_key and don't use src 2 modifiers.
+
+ dag Src0Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src0_modifiers));
+ dag Src1Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src1_modifiers));
+ dag Src2Mods = !if(IsIU, (ins), (ins PackedF16InputMods:$src2_modifiers));
+ dag IndexKey = !cond(!eq(IndexType, 0) : (ins),
+ !eq(IndexType, 8) : (ins IndexKey8bit:$index_key_8bit),
+ !eq(IndexType, 16): (ins IndexKey16bit:$index_key_16bit));
+ dag Clamp = !if(IsIU, (ins clampmod0:$clamp), (ins));
+ dag Neg = !cond(!and(NegLoAny, NegHiAny) : (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi),
+ !and(NegLoAny, !not(NegHiAny)) : (ins neg_lo0:$neg_lo),
+ !and(!not(NegLoAny), !not(NegHiAny)) : (ins));
+
+ let InsVOP3P = !con(Src0Mods, (ins Src0RC64:$src0), Src1Mods, (ins Src1RC64:$src1),
+ !cond(IsWMMA : !con(Src2Mods, (ins Src2RC64:$src2)),
+ IsSWMMAC : !con((ins DstRC:$srcTiedDef), (ins VRegSrc_32:$src2), IndexKey)),
+ Clamp, Neg);
+
+ // asm
+
+ string IndexKeyAsm = !cond(!eq(IndexType, 0) : "",
+ !eq(IndexType, 8) : "$index_key_8bit",
+ !eq(IndexType, 16) : "$index_key_16bit");
+ string ClampAsm = !if(IsIU, "$clamp", "");
+ string NegAsm = !cond(!and(NegLoAny, NegHiAny) : "$neg_lo$neg_hi",
+ !and(NegLoAny, !not(NegHiAny)) : "$neg_lo",
+ !and(!not(NegLoAny), !not(NegHiAny)) : "");
+
+ let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#NegAsm#ClampAsm;
+
+ // isel patterns
+
+ dag Src0InPat = !cond(IsAB_F16 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
+ IsAB_BF16 : (ins Src0VT:$src0),
+ IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
+ IsFP8BF8 : (ins Src0VT:$src0));
+ dag Src0OutPat = !cond(IsAB_F16 : (ins i32:$src0_modifiers, Src0VT:$src0),
+ IsAB_BF16 : (ins (i32 8), Src0VT:$src0),
+ IsIU : (ins i32:$src0_modifiers, Src0VT:$src0),
+ IsFP8BF8 : (ins Src0VT:$src0));
+ dag Src1InPat = !cond(IsAB_F16 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
+ IsAB_BF16 : (ins Src1VT:$src1),
+ IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
+ IsFP8BF8 : (ins Src1VT:$src1));
+ dag Src1OutPat = !cond(IsAB_F16 : (ins i32:$src1_modifiers, Src1VT:$src1),
+ IsAB_BF16 : (ins (i32 8), Src1VT:$src1),
+ IsIU : (ins i32:$src1_modifiers, Src1VT:$src1),
+ IsFP8BF8 : (ins Src1VT:$src1));
+ dag Src2InPatWmma = !cond(IsC_F32 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
+ IsC_F16 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
+ IsC_BF16 : (ins Src2VT:$src2),
+ IsIU : (ins Src2VT:$src2),
+ IsSWMMAC : (ins));
+ dag Src2OutPatWmma = !cond(IsC_F32 : (ins i32:$src2_modifiers, Src2VT:$src2),
+ IsC_F16 : (ins i32:$src2_modifiers, Src2VT:$src2),
+ IsC_BF16 : (ins (i32 8), Src2VT:$src2),
+ IsIU : (ins Src2VT:$src2),
+ IsSWMMAC : (ins));
+ dag ClampPat = !if(IsIU, (ins i1:$clamp), (ins));
+ dag IndexInPat = !cond(!eq(IndexType, 0) : (ins i32:$src2),
+ !eq(IndexType, 8) : (ins (i32 (SWMMACIndex8 i32:$src2, i32:$index_key_8bit))),
+ !eq(IndexType, 16): (ins (i32 (SWMMACIndex16 i32:$src2, i32:$index_key_16bit))));
+ dag IndexOutPat = !cond(!eq(IndexType, 0) : (ins i32:$src2),
+ !eq(IndexType, 8) : (ins i32:$src2, i32:$index_key_8bit),
+ !eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit));
+ dag Src2InlineInPat = (ins (Src2VT (WMMAVISrc Src2VT:$src2)));
+ dag Src2InlineOutPat = !con(!if(IsIU, (ins), (ins (i32 8))), (ins Src2VT:$src2));
+
+
+ dag WmmaInPat = !con(Src0InPat, Src1InPat, Src2InPatWmma, ClampPat);
+ dag WmmaOutPat = !con(Src0OutPat, Src1OutPat, Src2OutPatWmma, ClampPat);
+
+ dag SwmmacInPat = !con(Src0InPat, Src1InPat, (ins Src2VT:$srcTiedDef), IndexInPat, ClampPat);
+ dag SwmmacOutPat = !con(Src0OutPat, Src1OutPat, (ins Src2VT:$srcTiedDef), IndexOutPat, ClampPat);
+
+ // wmma pattern where src2 is inline imm uses _threeaddr pseudo,
+ // can't use _twoaddr since it would violate src2 tied to vdst constraint.
+ dag WmmaInlineInPat = !con(Src0InPat, Src1InPat, Src2InlineInPat, ClampPat);
+ dag WmmaInlineOutPat = !con(Src0OutPat, Src1OutPat, Src2InlineOutPat, ClampPat);
+}
+
+multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> {
+ let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+ let Constraints = "@earlyclobber $vdst,$vdst = $src2", isConvertibleToThreeAddress = 1 in
+ def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{
+ let PseudoInstr = Instr#PseudoInstrSuffix;
+ }
+
+ let Constraints = "@earlyclobber $vdst", SchedRW = [Write32Bit, Write32Bit] in
+ def _threeaddr : VOP3P_Pseudo<Instr, WMMAProfile>{
+ let PseudoInstr = Instr#PseudoInstrSuffix;
+ }
+
+ }
+ def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr),
+ !cast<Instruction>(NAME # _threeaddr)>;
+}
+
+multiclass SWMMACInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> {
+ def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{
+ let Mnemonic = Instr;
+ let PseudoInstr = Instr#PseudoInstrSuffix;
+ let mayRaiseFPException = 0;
+ let ReadsModeReg = 0;
+ let AsmMatchConverter = "cvtSWMMAC";
+
+ let Constraints = "@earlyclobber $vdst,$vdst = $srcTiedDef";
+ }
+}
+
+// First argument in Profile is types for matrices D, A, B and C (D = A * B + C)
+// as used by llvm ir, types are vectors(with matrix elements)
+// wave32:
+// For 16x16 matrices, lanes 0 to 31 will have 8 matrix elts,
+// for 16 x 32 16 elts and for 16 x 64 lanes have 32 elts.
+// wave64:
+// lanes will have half the size of elements in lanes compared to wave32 with
+// exception of 16x16_iu4: lanes0-31 will have 8xi4, remaining lanes are ignored
+
+// general idea on element distribution differences:
+// wave32: lane n has 8 matrix elements
+// wave64: lane n has first 4, lane n+32 has other 4 elements
+
+// index size, for each 2 elements in lane you need 4bits in index
+
+// Non-standard types (iu8, iu4, fp8, bf8) will be packed in vectors of i32s.
+// Original type for them is in comment on the right and refers to A and B.
+
+def F32_F16_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8f16, v8f16, v8f32], 0, 0, 0, 0>;
+def F32_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8i16, v8i16, v8f32], 0, 0, 0, 0>;
+def F16_F16_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v8f16, v8f16, v8f16], 0, 0, 0, 0>;
+def BF16_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8i16, v8i16, v8i16, v8i16], 0, 0, 0, 0>;
+def I32_IU8_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v2i32, v8i32], 0, 0, 1, 0>; // 8xi8
+def I32_IU4X16_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, i32, i32, v8i32], 0, 0, 1, 0>; // 8xi4
+def F32_FP8BF8_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v2i32, v2i32, v8f32], 0, 0, 0, 1>; // 8xf8
+def I32_IU4X32_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v2i32, v8i32], 0, 0, 1, 0>; // 16xi4
+
+def F32_F16_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, v4f16, v4f16, v4f32], 0, 0, 0, 0>;
+def F32_BF16_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, v4i16, v4i16, v4f32], 0, 0, 0, 0>;
+def F16_F16_WMMA_w64 : VOP3PWMMA_Profile<[v4f16, v4f16, v4f16, v4f16], 0, 0, 0, 0>;
+def BF16_BF16_WMMA_w64 : VOP3PWMMA_Profile<[v4i16, v4i16, v4i16, v4i16], 0, 0, 0, 0>;
+def I32_IU8_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 4xi8
+def I32_IU4X16_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 8xi4 *
+def F32_FP8BF8_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, i32, i32, v4f32], 0, 0, 0, 1>; // 4xf8
+def I32_IU4X32_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 8xi4
+
+def F32_F16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8f16, v16f16, v8f32], 1, 16, 0, 0>;
+def F32_BF16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8i16, v16i16, v8f32], 1, 16, 0, 0>;
+def F16_F16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v8f16, v16f16, v8f16], 1, 16, 0, 0>;
+def BF16_BF16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i16, v8i16, v16i16, v8i16], 1, 16, 0, 0>;
+def I32_IU8_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v4i32, v8i32], 1, 16, 1, 0>; // 8xi8, 16xi8
+def I32_IU4X32_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, i32, v2i32, v8i32], 1, 16, 1, 0>; // 8xi4, 16xi4
+def I32_IU4X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v4i32, v8i32], 1, 0, 1, 0>; // 16xi4, 32xi4 **
+def F32_FP8BF8_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v2i32, v4i32, v8f32], 1, 16, 0, 1>; // 8xf8, 16xf8
+
+def F32_F16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, v4f16, v8f16, v4f32], 1, 8, 0, 0>;
+def F32_BF16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, v4i16, v8i16, v4f32], 1, 8, 0, 0>;
+def F16_F16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f16, v4f16, v8f16, v4f16], 1, 8, 0, 0>;
+def BF16_BF16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i16, v4i16, v8i16, v4i16], 1, 8, 0, 0>;
+def I32_IU8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, v2i32, v4i32], 1, 8, 1, 0>; // 4xi8, 8xi8
+def I32_IU4X32_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 1, 16, 1, 0>; // 8xi4, 8xi4 ***
+def I32_IU4X64_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, v2i32, v4i32], 1, 16, 1, 0>; // 8xi4, 16xi4
+def F32_FP8BF8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, i32, v2i32, v4f32], 1, 8, 0, 1>; // 4xf8, 8xf8
+
+// * IU4X16_WMMA_w64 lanes 0-31 will have 8xi4, remaining lanes are ignored
+// ** IU4X64_SWMMAC_w32 index is i32, index_key is not used
+// *** IU4X32_SWMMAC_w64 lanes 0-31 will have 8xi4 remaining lanes are ignored
+// for matrix A, index is i16; Matrix B uses all lanes
+
+let WaveSizePredicate = isWave32 in {
+defm V_WMMA_F32_16X16X16_F16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16", F32_F16_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X16_BF16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16", F32_BF16_WMMA_w32, "_w32">;
+defm V_WMMA_F16_16X16X16_F16_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x16_f16", F16_F16_WMMA_w32, "_w32">;
+defm V_WMMA_BF16_16X16X16_BF16_w32 : WMMAInstGFX12<"v_wmma_bf16_16x16x16_bf16", BF16_BF16_WMMA_w32, "_w32">;
+defm V_WMMA_I32_16X16X16_IU8_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu8", I32_IU8_WMMA_w32, "_w32">;
+defm V_WMMA_I32_16X16X16_IU4_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu4", I32_IU4X16_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X16_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_fp8", F32_FP8BF8_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X16_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_bf8", F32_FP8BF8_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X16_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_fp8", F32_FP8BF8_WMMA_w32, "_w32">;
+defm V_WMMA_F32_16X16X16_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_bf8", F32_FP8BF8_WMMA_w32, "_w32">;
+defm V_WMMA_I32_16X16X32_IU4_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x32_iu4", I32_IU4X32_WMMA_w32, "_w32">;
+
+defm V_SWMMAC_F32_16X16X32_F16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_f16", F32_F16_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X32_BF16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf16", F32_BF16_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F16_16X16X32_F16_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x32_f16", F16_F16_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_BF16_16X16X32_BF16_w32 : SWMMACInstGFX12<"v_swmmac_bf16_16x16x32_bf16", BF16_BF16_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_I32_16X16X32_IU8_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu8", I32_IU8_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_I32_16X16X32_IU4_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu4", I32_IU4X32_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_I32_16X16X64_IU4_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x64_iu4", I32_IU4X64_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_fp8", F32_FP8BF8_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_bf8", F32_FP8BF8_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_fp8", F32_FP8BF8_SWMMAC_w32, "_w32">;
+defm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_bf8", F32_FP8BF8_SWMMAC_w32, "_w32">;
+}
+
+let WaveSizePredicate = isWave64 in {
+defm V_WMMA_F32_16X16X16_F16_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16", F32_F16_WMMA_w64, "_w64">;
+defm V_WMMA_F32_16X16X16_BF16_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16", F32_BF16_WMMA_w64, "_w64">;
+defm V_WMMA_F16_16X16X16_F16_w64 : WMMAInstGFX12<"v_wmma_f16_16x16x16_f16", F16_F16_WMMA_w64, "_w64">;
+defm V_WMMA_BF16_16X16X16_BF16_w64 : WMMAInstGFX12<"v_wmma_bf16_16x16x16_bf16", BF16_BF16_WMMA_w64, "_w64">;
+defm V_WMMA_I32_16X16X16_IU8_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu8", I32_IU8_WMMA_w64, "_w64">;
+defm V_WMMA_I32_16X16X16_IU4_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu4", I32_IU4X16_WMMA_w64, "_w64">;
+defm V_WMMA_F32_16X16X16_FP8_FP8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_fp8", F32_FP8BF8_WMMA_w64, "_w64">;
+defm V_WMMA_F32_16X16X16_FP8_BF8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_bf8", F32_FP8BF8_WMMA_w64, "_w64">;
+defm V_WMMA_F32_16X16X16_BF8_FP8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_fp8", F32_FP8BF8_WMMA_w64, "_w64">;
+defm V_WMMA_F32_16X16X16_BF8_BF8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_bf8", F32_FP8BF8_WMMA_w64, "_w64">;
+defm V_WMMA_I32_16X16X32_IU4_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x32_iu4", I32_IU4X32_WMMA_w64, "_w64">;
+
+defm V_SWMMAC_F32_16X16X32_F16_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_f16", F32_F16_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_F32_16X16X32_BF16_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf16", F32_BF16_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_F16_16X16X32_F16_w64 : SWMMACInstGFX12<"v_swmmac_f16_16x16x32_f16", F16_F16_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_BF16_16X16X32_BF16_w64 : SWMMACInstGFX12<"v_swmmac_bf16_16x16x32_bf16", BF16_BF16_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_I32_16X16X32_IU8_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu8", I32_IU8_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_I32_16X16X32_IU4_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu4", I32_IU4X32_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_I32_16X16X64_IU4_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x64_iu4", I32_IU4X64_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_fp8", F32_FP8BF8_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_bf8", F32_FP8BF8_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_fp8", F32_FP8BF8_SWMMAC_w64, "_w64">;
+defm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_bf8", F32_FP8BF8_SWMMAC_w64, "_w64">;
+}
+
+// IsGFX11OpselIntrinsic: f16_f16 and bf16_bf16 Intrinsics have imm operand that
+// controls opsel. Used by gfx11, removed in gfx12 (operand must be 0).
+multiclass WMMAPat<string Inst, SDPatternOperator node, VOP3PWMMA_Profile P, bit IsGFX11OpselIntrinsic = 0> {
+ def : GCNPat <(P.DstVT !setdagop(!con(P.WmmaInPat, !if(IsGFX11OpselIntrinsic, (ins 0), (ins))), node)),
+ (P.DstVT !setdagop(P.WmmaOutPat, !cast<Instruction>(Inst#"_twoaddr")))>;
+ let AddedComplexity = 4 in
+ def : GCNPat <(P.DstVT !setdagop(!con(P.WmmaInlineInPat, !if(IsGFX11OpselIntrinsic, (ins 0), (ins))), node)),
+ (P.DstVT !setdagop(P.WmmaInlineOutPat, !cast<Instruction>(Inst#"_threeaddr")))>;
+}
+
+class SWMMACPat<Instruction Inst, SDPatternOperator node, VOP3PWMMA_Profile P> :
+ GCNPat <(P.DstVT !setdagop(P.SwmmacInPat, node)),
+ (P.DstVT !setdagop(P.SwmmacOutPat, Inst))>;
+
+class SWMMACPat_w64<Instruction Inst, SDPatternOperator node, VOP3PWMMA_Profile P> :
+ GCNPat <(P.DstVT !setdagop(P.SwmmacInPat, node)),
+ (P.DstVT !setdagop(P.SwmmacOutPat, Inst))>{
+ let WaveSizePredicate = isWave64;
+ }
+
+let WaveSizePredicate = isWave32, SubtargetPredicate = isGFX12Plus in {
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w32", int_amdgcn_wmma_f32_16x16x16_f16, F32_F16_WMMA_w32>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w32", int_amdgcn_wmma_f32_16x16x16_bf16, F32_BF16_WMMA_w32>;
+ defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w32", int_amdgcn_wmma_f16_16x16x16_f16, F16_F16_WMMA_w32,1>;
+ defm : WMMAPat<"V_WMMA_BF16_16X16X16_BF16_w32", int_amdgcn_wmma_bf16_16x16x16_bf16, BF16_BF16_WMMA_w32,1>;
+ defm : WMMAPat<"V_WMMA_I32_16X16X16_IU8_w32", int_amdgcn_wmma_i32_16x16x16_iu8, I32_IU8_WMMA_w32>;
+ defm : WMMAPat<"V_WMMA_I32_16X16X16_IU4_w32", int_amdgcn_wmma_i32_16x16x16_iu4, I32_IU4X16_WMMA_w32>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_fp8_fp8, F32_FP8BF8_WMMA_w32>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_fp8_bf8, F32_FP8BF8_WMMA_w32>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w32>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w32>;
+ defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w32", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w32>;
+
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w32>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w32>;
+ def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w32>;
+ def : SWMMACPat<V_SWMMAC_BF16_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_bf16_16x16x32_bf16, BF16_BF16_SWMMAC_w32>;
+ def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU8_w32_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu8, I32_IU8_SWMMAC_w32>;
+ def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU4_w32_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu4, I32_IU4X32_SWMMAC_w32>;
+ def : GCNPat <(I32_IU4X64_SWMMAC_w32.DstVT !setdagop(I32_IU4X64_SWMMAC_w32.SwmmacInPat, int_amdgcn_swmmac_i32_16x16x64_iu4)),
+ (I32_IU4X64_SWMMAC_w32.DstVT !setdagop(I32_IU4X64_SWMMAC_w32.SwmmacOutPat, V_SWMMAC_I32_16X16X64_IU4_w32_twoaddr))>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_FP8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_fp8, F32_FP8BF8_SWMMAC_w32>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_BF8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_bf8, F32_FP8BF8_SWMMAC_w32>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_FP8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_fp8, F32_FP8BF8_SWMMAC_w32>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_BF8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_bf8, F32_FP8BF8_SWMMAC_w32>;
+}
+
+let WaveSizePredicate = isWave64, SubtargetPredicate = isGFX12Plus in {
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w64", int_amdgcn_wmma_f32_16x16x16_f16, F32_F16_WMMA_w64>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w64", int_amdgcn_wmma_f32_16x16x16_bf16, F32_BF16_WMMA_w64>;
+ defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w64", int_amdgcn_wmma_f16_16x16x16_f16, F16_F16_WMMA_w64,1>;
+ defm : WMMAPat<"V_WMMA_BF16_16X16X16_BF16_w64", int_amdgcn_wmma_bf16_16x16x16_bf16, BF16_BF16_WMMA_w64,1>;
+ defm : WMMAPat<"V_WMMA_I32_16X16X16_IU8_w64", int_amdgcn_wmma_i32_16x16x16_iu8, I32_IU8_WMMA_w64>;
+ defm : WMMAPat<"V_WMMA_I32_16X16X16_IU4_w64", int_amdgcn_wmma_i32_16x16x16_iu4, I32_IU4X16_WMMA_w64>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_fp8_fp8, F32_FP8BF8_WMMA_w64>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_fp8_bf8, F32_FP8BF8_WMMA_w64>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w64>;
+ defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w64>;
+ defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w64", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w64>;
+
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_BF16_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_bf16_16x16x32_bf16, BF16_BF16_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU8_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu8, I32_IU8_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU4_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu4, I32_IU4X32_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_I32_16X16X64_IU4_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x64_iu4, I32_IU4X64_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_FP8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_fp8, F32_FP8BF8_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_BF8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_bf8, F32_FP8BF8_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_FP8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_fp8, F32_FP8BF8_SWMMAC_w64>;
+ def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_BF8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_bf8, F32_FP8BF8_SWMMAC_w64>;
+}
+
+
//===----------------------------------------------------------------------===//
// Begin Real Encodings
//===----------------------------------------------------------------------===//
@@ -1005,6 +1400,99 @@ multiclass VOP3P_Real_Base<GFXGen Gen, bits<7> op, string backing_ps_name = NAME
VOP3Pe_gfx11_gfx12<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>;
}
+class VOP3PeWmma<bits<7> op, VOPProfile P, VOP3PWMMA_Profile WMMAP>
+ : VOP3Pe_gfx11_gfx12<op, P>{
+ // opsel
+ let Inst{11} = !cond(!eq(WMMAP.IndexType, 0) : 0,
+ !eq(WMMAP.IndexType, 8) : index_key_8bit{0},
+ !eq(WMMAP.IndexType, 16) : index_key_16bit{0});
+ let Inst{12} = !if(!eq(WMMAP.IndexType, 8), index_key_8bit{1}, 0);
+ let Inst{13} = 0;
+ // opsel_hi
+ let Inst{59} = 1;
+ let Inst{60} = 1;
+ let Inst{14} = 1;
+ // neg_lo
+ let Inst{61} = !if(WMMAP.NegLo01, src0_modifiers{0}, 0);
+ let Inst{62} = !if(WMMAP.NegLo01, src1_modifiers{0}, 0);
+ let Inst{63} = !if(WMMAP.NegLo2, src2_modifiers{0}, 0);
+ // neg_hi
+ let Inst{8} = !if(WMMAP.NegHi01, src0_modifiers{1}, 0);
+ let Inst{9} = !if(WMMAP.NegHi01, src1_modifiers{1}, 0);
+ let Inst{10} = !if(WMMAP.NegHi2, src2_modifiers{1}, 0);
+ // clamp
+ let Inst{15} = !if(WMMAP.IsIU, clamp{0}, 0);
+}
+
+multiclass VOP3P_WMMA_Real_Base<GFXGen Gen, bits<7> op, VOP3PWMMA_Profile WMMAP,
+ string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
+ def Gen.Suffix :
+ VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>,
+ VOP3PeWmma<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl, WMMAP>;
+}
+
+multiclass VOP3P_Real_WMMA_gfx12 <bits<7> op, VOP3PWMMA_Profile WMMAP> {
+ let WaveSizePredicate = isWave32, DecoderNamespace = "GFX12" in {
+ defm _twoaddr : VOP3P_WMMA_Real_Base <GFX12Gen, op, WMMAP>;
+ }
+}
+
+multiclass VOP3P_Real_WMMA_gfx12w64 <bits<7> op, VOP3PWMMA_Profile WMMAP> {
+ let WaveSizePredicate = isWave64, DecoderNamespace = "WMMAGFX12" in {
+ defm _twoaddr : VOP3P_WMMA_Real_Base <GFX12Gen, op, WMMAP>;
+ }
+}
+
+defm V_WMMA_F32_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>;
+defm V_WMMA_F16_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>;
+defm V_WMMA_BF16_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x043, BF16_BF16_WMMA_w32>;
+defm V_WMMA_I32_16X16X16_IU8_w32 : VOP3P_Real_WMMA_gfx12 <0x044, I32_IU8_WMMA_w32>;
+defm V_WMMA_I32_16X16X16_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x045, I32_IU4X16_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x046, F32_FP8BF8_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x047, F32_FP8BF8_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x048, F32_FP8BF8_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x049, F32_FP8BF8_WMMA_w32>;
+defm V_WMMA_I32_16X16X32_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x04a, I32_IU4X32_WMMA_w32>;
+
+defm V_WMMA_F32_16X16X16_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x040, F32_F16_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x041, F32_BF16_WMMA_w64>;
+defm V_WMMA_F16_16X16X16_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x042, F16_F16_WMMA_w64>;
+defm V_WMMA_BF16_16X16X16_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x043, BF16_BF16_WMMA_w64>;
+defm V_WMMA_I32_16X16X16_IU8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x044, I32_IU8_WMMA_w64>;
+defm V_WMMA_I32_16X16X16_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x045, I32_IU4X16_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x046, F32_FP8BF8_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x047, F32_FP8BF8_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x048, F32_FP8BF8_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x049, F32_FP8BF8_WMMA_w64>;
+defm V_WMMA_I32_16X16X32_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x04a, I32_IU4X32_WMMA_w64>;
+
+
+defm V_SWMMAC_F32_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x050, F32_F16_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x051, F32_BF16_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x052, F16_F16_SWMMAC_w32>;
+defm V_SWMMAC_BF16_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x053, BF16_BF16_SWMMAC_w32>;
+defm V_SWMMAC_I32_16X16X32_IU8_w32 : VOP3P_Real_WMMA_gfx12 <0x054, I32_IU8_SWMMAC_w32>;
+defm V_SWMMAC_I32_16X16X32_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x055, I32_IU4X32_SWMMAC_w32>;
+defm V_SWMMAC_I32_16X16X64_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x056, I32_IU4X64_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x057, F32_FP8BF8_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x058, F32_FP8BF8_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x059, F32_FP8BF8_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x05a, F32_FP8BF8_SWMMAC_w32>;
+
+defm V_SWMMAC_F32_16X16X32_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x050, F32_F16_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x051, F32_BF16_SWMMAC_w64>;
+defm V_SWMMAC_F16_16X16X32_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x052, F16_F16_SWMMAC_w64>;
+defm V_SWMMAC_BF16_16X16X32_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x053, BF16_BF16_SWMMAC_w64>;
+defm V_SWMMAC_I32_16X16X32_IU8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x054, I32_IU8_SWMMAC_w64>;
+defm V_SWMMAC_I32_16X16X32_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x055, I32_IU4X32_SWMMAC_w64>;
+defm V_SWMMAC_I32_16X16X64_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x056, I32_IU4X64_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x057, F32_FP8BF8_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x058, F32_FP8BF8_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x059, F32_FP8BF8_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x05a, F32_FP8BF8_SWMMAC_w64>;
+
multiclass VOP3P_Real_with_name<GFXGen Gen, bits<7> op,
string backing_ps_name = NAME,
string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
index df505c3365cb..20d7c88fb7e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -124,6 +124,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let IsPacked = P.IsPacked;
let IsMAI = P.IsMAI;
let IsWMMA = P.IsWMMA;
+ let IsSWMMAC = P.IsSWMMAC;
let AsmOperands = !if(isVop3OpSel,
P.AsmVOP3OpSel,
@@ -305,6 +306,11 @@ class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
class VOP3OpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx10<op, p>;
+class VOP3FP8OpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
+ let Inst{11} = !if(p.HasSrc0, src0_modifiers{2}, 0);
+ let Inst{12} = !if(p.HasSrc0, src0_modifiers{3}, 0);
+}
+
class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11_gfx12<op, p>{
let Inst{11} = ?;
let Inst{12} = ?;
@@ -378,6 +384,8 @@ class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 {
bits<4> src2_modifiers;
bits<9> src2;
bits<1> clamp;
+ bits<2> index_key_8bit;
+ bits<1> index_key_16bit;
let Inst{7-0} = vdst;
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
@@ -738,7 +746,7 @@ class VOP3_DPPe_Common_Base<bits<10> op, VOPProfile P> : Enc96 {
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
// OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs.
let Inst{11} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{2}, 0),?);
- let Inst{12} = !if(P.HasOpSel,!if(P.HasSrc1Mods, src1_modifiers{2}, 0),?);
+ let Inst{12} = !if(P.HasOpSel,!if(P.HasSrc1Mods, src1_modifiers{2}, !if((P.IsFP8), src0_modifiers{3}, 0)), ?);
let Inst{13} = !if(P.HasOpSel,!if(P.HasSrc2Mods, src2_modifiers{2}, 0),?);
let Inst{14} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{3}, 0),?);
let Inst{15} = !if(P.HasClamp, clamp, 0);
@@ -1406,14 +1414,20 @@ multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName,
defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
let AsmString = asmName # ps.AsmOperands,
IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
- if ps.Pfl.HasOpSel then
- def _e64#Gen.Suffix :
- VOP3_Real_Gen<ps, Gen>,
- VOP3OpSel_gfx11_gfx12<op, ps.Pfl>;
- if !not(ps.Pfl.HasOpSel) then
- def _e64#Gen.Suffix :
- VOP3_Real_Gen<ps, Gen>,
- VOP3e_gfx11_gfx12<op, ps.Pfl>;
+ if ps.Pfl.IsFP8 then {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3FP8OpSel_gfx11_gfx12<op, ps.Pfl>;
+ } else {
+ if ps.Pfl.HasOpSel then
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3OpSel_gfx11_gfx12<op, ps.Pfl>;
+ if !not(ps.Pfl.HasOpSel) then
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
+ VOP3e_gfx11_gfx12<op, ps.Pfl>;
+ }
}
def Gen.Suffix#"_VOP3_alias" : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[Gen.AssemblerPredicate]>, LetDummies;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 5c1c7046fdbf..8629551152cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -1806,12 +1806,13 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
DFS.ProcessLoop();
const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder();
- for (auto *MBB : PostOrder) {
- recomputeLiveIns(*MBB);
- // FIXME: For some reason, the live-in print order is non-deterministic for
- // our tests and I can't out why... So just sort them.
- MBB->sortUniqueLiveIns();
- }
+ bool anyChange = false;
+ do {
+ anyChange = false;
+ for (auto *MBB : PostOrder) {
+ anyChange = recomputeLiveIns(*MBB) || anyChange;
+ }
+ } while (anyChange);
for (auto *MBB : reverse(PostOrder))
recomputeLivenessFlags(*MBB);
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 04349aa52b54..d47dded9ea6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -21,17 +21,20 @@ using namespace llvm;
TypeSize LoongArchTTIImpl::getRegisterBitWidth(
TargetTransformInfo::RegisterKind K) const {
+ TypeSize DefSize = TargetTransformInfoImplBase::getRegisterBitWidth(K);
switch (K) {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
case TargetTransformInfo::RGK_FixedWidthVector:
- if (ST->hasExtLASX() && ST->hasExpAutoVec())
+ if (!ST->hasExpAutoVec())
+ return DefSize;
+ if (ST->hasExtLASX())
return TypeSize::getFixed(256);
- if (ST->hasExtLSX() && ST->hasExpAutoVec())
+ if (ST->hasExtLSX())
return TypeSize::getFixed(128);
- return TypeSize::getFixed(0);
+ [[fallthrough]];
case TargetTransformInfo::RGK_ScalableVector:
- return TypeSize::getScalable(0);
+ return DefSize;
}
llvm_unreachable("Unsupported register kind");
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 718844bc36ff..66b2b0de8d52 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -471,45 +471,6 @@ void MipsAsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
TS.emitDirectiveInsn();
}
-/// isBlockOnlyReachableByFallthough - Return true if the basic block has
-/// exactly one predecessor and the control transfer mechanism between
-/// the predecessor and this block is a fall-through.
-bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
- MBB) const {
- // The predecessor has to be immediately before this block.
- const MachineBasicBlock *Pred = *MBB->pred_begin();
-
- // If the predecessor is a switch statement, assume a jump table
- // implementation, so it is not a fall through.
- if (const BasicBlock *bb = Pred->getBasicBlock())
- if (isa<SwitchInst>(bb->getTerminator()))
- return false;
-
- // If this is a landing pad, it isn't a fall through. If it has no preds,
- // then nothing falls through to it.
- if (MBB->isEHPad() || MBB->pred_empty())
- return false;
-
- // If there isn't exactly one predecessor, it can't be a fall through.
- if (MBB->pred_size() != 1)
- return false;
-
- // The predecessor has to be immediately before this block.
- if (!Pred->isLayoutSuccessor(MBB))
- return false;
-
- // If the block is completely empty, then it definitely does fall through.
- if (Pred->empty())
- return true;
-
- // Otherwise, check the last instruction.
- // Check if the last terminator is an unconditional branch.
- MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->isTerminator()) ;
-
- return !I->isBarrier();
-}
-
// Print out an operand for an inline asm expression.
bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const char *ExtraCode, raw_ostream &O) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h
index 64424b181504..0b55089385d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -142,8 +142,6 @@ public:
void emitFunctionBodyStart() override;
void emitFunctionBodyEnd() override;
void emitBasicBlockEnd(const MachineBasicBlock &MBB) override;
- bool isBlockOnlyReachableByFallthrough(
- const MachineBasicBlock* MBB) const override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
index aee57a5075ff..b43eee8fdd8c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
@@ -208,8 +208,10 @@ bool PPCExpandAtomicPseudo::expandAtomicRMW128(
.addMBB(LoopMBB);
CurrentMBB->addSuccessor(LoopMBB);
CurrentMBB->addSuccessor(ExitMBB);
- recomputeLiveIns(*LoopMBB);
- recomputeLiveIns(*ExitMBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB);
+ } while (anyChange);
NMBBI = MBB.end();
MI.eraseFromParent();
return true;
@@ -286,9 +288,11 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
CurrentMBB->addSuccessor(LoopCmpMBB);
CurrentMBB->addSuccessor(ExitMBB);
- recomputeLiveIns(*LoopCmpMBB);
- recomputeLiveIns(*CmpSuccMBB);
- recomputeLiveIns(*ExitMBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*CmpSuccMBB) ||
+ recomputeLiveIns(*LoopCmpMBB);
+ } while (anyChange);
NMBBI = MBB.end();
MI.eraseFromParent();
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 245e78641ed6..6792842f8550 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1441,8 +1441,11 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
}
// Update liveins.
- recomputeLiveIns(*ProbeLoopBodyMBB);
- recomputeLiveIns(*ProbeExitMBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*ProbeExitMBB) ||
+ recomputeLiveIns(*ProbeLoopBodyMBB);
+ } while (anyChange);
return ProbeExitMBB;
};
// For case HasBP && MaxAlign > 1, we have to realign the SP by performing
@@ -1534,8 +1537,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
}
// Update liveins.
- recomputeLiveIns(*LoopMBB);
- recomputeLiveIns(*ExitMBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB);
+ } while (anyChange);
}
}
++NumPrologProbed;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
index e6e879282241..27d52c16a4f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
@@ -31,6 +31,12 @@ include "RISCVInstrInfo.td"
include "GISel/RISCVRegisterBanks.td"
//===----------------------------------------------------------------------===//
+// RISC-V macro fusions.
+//===----------------------------------------------------------------------===//
+
+include "RISCVMacroFusion.td"
+
+//===----------------------------------------------------------------------===//
// RISC-V Scheduling Models
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
index 3878be680c04..26451c80f57b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -72,7 +72,7 @@ def FeatureStdExtZicntr
[FeatureStdExtZicsr]>;
def FeatureStdExtZicond
- : SubtargetFeature<"experimental-zicond", "HasStdExtZicond", "true",
+ : SubtargetFeature<"zicond", "HasStdExtZicond", "true",
"'Zicond' (Integer Conditional Operations)">;
def HasStdExtZicond : Predicate<"Subtarget->hasStdExtZicond()">,
AssemblerPredicate<(all_of FeatureStdExtZicond),
@@ -1044,30 +1044,6 @@ def TuneDLenFactor2
: SubtargetFeature<"dlen-factor-2", "DLenFactor2", "true",
"Vector unit DLEN(data path width) is half of VLEN">;
-def TuneLUIADDIFusion
- : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
- "true", "Enable LUI+ADDI macrofusion">;
-
-def TuneAUIPCADDIFusion
- : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
- "true", "Enable AUIPC+ADDI macrofusion">;
-
-def TuneZExtHFusion
- : SubtargetFeature<"zexth-fusion", "HasZExtHFusion",
- "true", "Enable SLLI+SRLI to be fused to zero extension of halfword">;
-
-def TuneZExtWFusion
- : SubtargetFeature<"zextw-fusion", "HasZExtWFusion",
- "true", "Enable SLLI+SRLI to be fused to zero extension of word">;
-
-def TuneShiftedZExtWFusion
- : SubtargetFeature<"shifted-zextw-fusion", "HasShiftedZExtWFusion",
- "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension of word">;
-
-def TuneLDADDFusion
- : SubtargetFeature<"ld-add-fusion", "HasLDADDFusion",
- "true", "Enable LD+ADD macrofusion.">;
-
def TuneNoDefaultUnroll
: SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
"Disable default unroll preference.">;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 47c6cd6e5487..7895d74f06d1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4718,7 +4718,7 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
if (SrcVecIdx == -1)
continue;
unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
- SDValue SrcVec = (unsigned)SrcVecIdx > VRegsPerSrc ? V2 : V1;
+ SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
DAG.getVectorIdxConstant(ExtractIdx, DL));
SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
index 0790a941823b..35d3fdae0bd7 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
@@ -8,8 +8,6 @@
//
// This file describes the RISC-V instructions from the standard Integer
// Conditional operations extension (Zicond).
-// This version is still experimental as the 'Zicond' extension hasn't been
-// ratified yet. It is based on v1.0-rc1 of the specification.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
deleted file mode 100644
index f948f05b22f7..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-//===- RISCVMacroFusion.cpp - RISC-V Macro Fusion -------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file This file contains the RISC-V implementation of the DAG scheduling
-/// mutation to pair instructions back to back.
-//
-//===----------------------------------------------------------------------===//
-//
-#include "RISCVMacroFusion.h"
-#include "RISCVSubtarget.h"
-#include "llvm/CodeGen/MacroFusion.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-
-using namespace llvm;
-
-static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) {
- if (!SecondMI.getOperand(1).isReg())
- return false;
-
- if (SecondMI.getOperand(1).getReg() != FirstDest)
- return false;
-
- // If the input is virtual make sure this is the only user.
- if (FirstDest.isVirtual()) {
- auto &MRI = SecondMI.getMF()->getRegInfo();
- return MRI.hasOneNonDBGUse(FirstDest);
- }
-
- return SecondMI.getOperand(0).getReg() == FirstDest;
-}
-
-// Fuse load with add:
-// add rd, rs1, rs2
-// ld rd, 0(rd)
-static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
- if (SecondMI.getOpcode() != RISCV::LD)
- return false;
-
- if (!SecondMI.getOperand(2).isImm())
- return false;
-
- if (SecondMI.getOperand(2).getImm() != 0)
- return false;
-
- // Given SecondMI, when FirstMI is unspecified, we must return
- // if SecondMI may be part of a fused pair at all.
- if (!FirstMI)
- return true;
-
- if (FirstMI->getOpcode() != RISCV::ADD)
- return true;
-
- return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
-}
-
-// Fuse zero extension of halfword:
-// slli rd, rs1, 48
-// srli rd, rd, 48
-static bool isZExtH(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
- if (SecondMI.getOpcode() != RISCV::SRLI)
- return false;
-
- if (!SecondMI.getOperand(2).isImm())
- return false;
-
- if (SecondMI.getOperand(2).getImm() != 48)
- return false;
-
- // Given SecondMI, when FirstMI is unspecified, we must return
- // if SecondMI may be part of a fused pair at all.
- if (!FirstMI)
- return true;
-
- if (FirstMI->getOpcode() != RISCV::SLLI)
- return false;
-
- if (FirstMI->getOperand(2).getImm() != 48)
- return false;
-
- return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
-}
-
-// Fuse zero extension of word:
-// slli rd, rs1, 32
-// srli rd, rd, 32
-static bool isZExtW(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
- if (SecondMI.getOpcode() != RISCV::SRLI)
- return false;
-
- if (!SecondMI.getOperand(2).isImm())
- return false;
-
- if (SecondMI.getOperand(2).getImm() != 32)
- return false;
-
- // Given SecondMI, when FirstMI is unspecified, we must return
- // if SecondMI may be part of a fused pair at all.
- if (!FirstMI)
- return true;
-
- if (FirstMI->getOpcode() != RISCV::SLLI)
- return false;
-
- if (FirstMI->getOperand(2).getImm() != 32)
- return false;
-
- return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
-}
-
-// Fuse shifted zero extension of word:
-// slli rd, rs1, 32
-// srli rd, rd, x
-// where 0 <= x < 32
-static bool isShiftedZExtW(const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
- if (SecondMI.getOpcode() != RISCV::SRLI)
- return false;
-
- if (!SecondMI.getOperand(2).isImm())
- return false;
-
- unsigned SRLIImm = SecondMI.getOperand(2).getImm();
- if (SRLIImm >= 32)
- return false;
-
- // Given SecondMI, when FirstMI is unspecified, we must return
- // if SecondMI may be part of a fused pair at all.
- if (!FirstMI)
- return true;
-
- if (FirstMI->getOpcode() != RISCV::SLLI)
- return false;
-
- if (FirstMI->getOperand(2).getImm() != 32)
- return false;
-
- return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
-}
-
-// Fuse AUIPC followed by ADDI
-// auipc rd, imm20
-// addi rd, rd, imm12
-static bool isAUIPCADDI(const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
- if (SecondMI.getOpcode() != RISCV::ADDI)
- return false;
- // Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
- return true;
-
- if (FirstMI->getOpcode() != RISCV::AUIPC)
- return false;
-
- return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
-}
-
-// Fuse LUI followed by ADDI or ADDIW.
-// rd = imm[31:0] which decomposes to
-// lui rd, imm[31:12]
-// addi(w) rd, rd, imm[11:0]
-static bool isLUIADDI(const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
- if (SecondMI.getOpcode() != RISCV::ADDI &&
- SecondMI.getOpcode() != RISCV::ADDIW)
- return false;
- // Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
- return true;
-
- if (FirstMI->getOpcode() != RISCV::LUI)
- return false;
-
- return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
-}
-
-static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
- const TargetSubtargetInfo &TSI,
- const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
- const RISCVSubtarget &ST = static_cast<const RISCVSubtarget &>(TSI);
-
- if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI))
- return true;
-
- if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI))
- return true;
-
- if (ST.hasZExtHFusion() && isZExtH(FirstMI, SecondMI))
- return true;
-
- if (ST.hasZExtWFusion() && isZExtW(FirstMI, SecondMI))
- return true;
-
- if (ST.hasShiftedZExtWFusion() && isShiftedZExtW(FirstMI, SecondMI))
- return true;
-
- if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI))
- return true;
-
- return false;
-}
-
-std::unique_ptr<ScheduleDAGMutation> llvm::createRISCVMacroFusionDAGMutation() {
- return createMacroFusionDAGMutation(shouldScheduleAdjacent);
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.h
deleted file mode 100644
index 7598db3f8fe1..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.h
+++ /dev/null
@@ -1,28 +0,0 @@
-//===- RISCVMacroFusion.h - RISC-V Macro Fusion -----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file This file contains the RISC-V definition of the DAG scheduling
-/// mutation to pair instructions back to back.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H
-#define LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H
-
-#include "llvm/CodeGen/MachineScheduler.h"
-
-namespace llvm {
-
-/// Note that you have to add:
-/// DAG.addMutation(createRISCVMacroFusionDAGMutation());
-/// to RISCVPassConfig::createMachineScheduler() to have an effect.
-std::unique_ptr<ScheduleDAGMutation> createRISCVMacroFusionDAGMutation();
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.td
new file mode 100644
index 000000000000..875a93d09a2c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.td
@@ -0,0 +1,93 @@
+//==----- RISCVMacroFusion.td - Macro Fusion Definitions -----*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the macro fusion predicators.
+
+// Fuse LUI followed by ADDI or ADDIW:
+// rd = imm[31:0] which decomposes to
+// lui rd, imm[31:12]
+// addi(w) rd, rd, imm[11:0]
+def TuneLUIADDIFusion
+ : SimpleFusion<"lui-addi-fusion", "HasLUIADDIFusion",
+ "Enable LUI+ADDI macro fusion",
+ CheckOpcode<[LUI]>,
+ CheckOpcode<[ADDI, ADDIW]>>;
+
+// Fuse AUIPC followed by ADDI:
+// auipc rd, imm20
+// addi rd, rd, imm12
+def TuneAUIPCADDIFusion
+ : SimpleFusion<"auipc-addi-fusion", "HasAUIPCADDIFusion",
+ "Enable AUIPC+ADDI macrofusion",
+ CheckOpcode<[AUIPC]>,
+ CheckOpcode<[ADDI]>>;
+
+// Fuse zero extension of halfword:
+// slli rd, rs1, 48
+// srli rd, rd, 48
+def TuneZExtHFusion
+ : SimpleFusion<"zexth-fusion", "HasZExtHFusion",
+ "Enable SLLI+SRLI to be fused to zero extension of halfword",
+ CheckAll<[
+ CheckOpcode<[SLLI]>,
+ CheckIsImmOperand<2>,
+ CheckImmOperand<2, 48>
+ ]>,
+ CheckAll<[
+ CheckOpcode<[SRLI]>,
+ CheckIsImmOperand<2>,
+ CheckImmOperand<2, 48>
+ ]>>;
+
+// Fuse zero extension of word:
+// slli rd, rs1, 32
+// srli rd, rd, 32
+def TuneZExtWFusion
+ : SimpleFusion<"zextw-fusion", "HasZExtWFusion",
+ "Enable SLLI+SRLI to be fused to zero extension of word",
+ CheckAll<[
+ CheckOpcode<[SLLI]>,
+ CheckIsImmOperand<2>,
+ CheckImmOperand<2, 32>
+ ]>,
+ CheckAll<[
+ CheckOpcode<[SRLI]>,
+ CheckIsImmOperand<2>,
+ CheckImmOperand<2, 32>
+ ]>>;
+
+// Fuse shifted zero extension of word:
+// slli rd, rs1, 32
+// srli rd, rd, x
+// where 0 <= x < 32
+def TuneShiftedZExtWFusion
+ : SimpleFusion<"shifted-zextw-fusion", "HasShiftedZExtWFusion",
+ "Enable SLLI+SRLI to be fused when computing (shifted) word zero extension",
+ CheckAll<[
+ CheckOpcode<[SLLI]>,
+ CheckIsImmOperand<2>,
+ CheckImmOperand<2, 32>
+ ]>,
+ CheckAll<[
+ CheckOpcode<[SRLI]>,
+ CheckIsImmOperand<2>,
+ CheckImmOperandRange<2, 0, 31>
+ ]>>;
+
+// Fuse load with add:
+// add rd, rs1, rs2
+// ld rd, 0(rd)
+def TuneLDADDFusion
+ : SimpleFusion<"ld-add-fusion", "HasLDADDFusion", "Enable LD+ADD macrofusion",
+ CheckOpcode<[ADD]>,
+ CheckAll<[
+ CheckOpcode<[LD]>,
+ CheckIsImmOperand<2>,
+ CheckImmOperand<2, 0>
+ ]>>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 7b64d3cee9c8..d3236bb07d56 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -16,8 +16,9 @@
#include "GISel/RISCVRegisterBankInfo.h"
#include "RISCV.h"
#include "RISCVFrameLowering.h"
-#include "RISCVMacroFusion.h"
#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
@@ -29,6 +30,9 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "RISCVGenSubtargetInfo.inc"
+#define GET_RISCV_MACRO_FUSION_PRED_IMPL
+#include "RISCVGenMacroFusion.inc"
+
namespace llvm::RISCVTuneInfoTable {
#define GET_RISCVTuneInfoTable_IMPL
@@ -187,7 +191,7 @@ bool RISCVSubtarget::enableSubRegLiveness() const {
void RISCVSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
- Mutations.push_back(createRISCVMacroFusionDAGMutation());
+ Mutations.push_back(createMacroFusionDAGMutation(getMacroFusions()));
}
/// Enable use of alias analysis during code generation (during MI
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 2ba93764facd..8c55efa69a6a 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -27,6 +27,9 @@
#include "llvm/Target/TargetMachine.h"
#include <bitset>
+#define GET_RISCV_MACRO_FUSION_PRED_DECL
+#include "RISCVGenMacroFusion.inc"
+
#define GET_SUBTARGETINFO_HEADER
#include "RISCVGenSubtargetInfo.inc"
@@ -196,11 +199,6 @@ public:
return UserReservedRegister[i];
}
- bool hasMacroFusion() const {
- return hasLUIADDIFusion() || hasAUIPCADDIFusion() || hasZExtHFusion() ||
- hasZExtWFusion() || hasShiftedZExtWFusion() || hasLDADDFusion();
- }
-
// Vector codegen related methods.
bool hasVInstructions() const { return HasStdExtZve32x; }
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b4b81b545a54..2285c99d7901 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -14,7 +14,6 @@
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
-#include "RISCVMacroFusion.h"
#include "RISCVTargetObjectFile.h"
#include "RISCVTargetTransformInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
@@ -26,6 +25,8 @@
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -361,9 +362,10 @@ public:
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
}
- if (ST.hasMacroFusion()) {
+ const auto &MacroFusions = ST.getMacroFusions();
+ if (!MacroFusions.empty()) {
DAG = DAG ? DAG : createGenericSchedLive(C);
- DAG->addMutation(createRISCVMacroFusionDAGMutation());
+ DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
}
return DAG;
}
@@ -371,9 +373,10 @@ public:
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
- if (ST.hasMacroFusion()) {
+ const auto &MacroFusions = ST.getMacroFusions();
+ if (!MacroFusions.empty()) {
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
- DAG->addMutation(createRISCVMacroFusionDAGMutation());
+ DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
return DAG;
}
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index db19c8881c68..80c994a32ea9 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -840,8 +840,10 @@ void SystemZELFFrameLowering::inlineStackProbe(
StackAllocMI->eraseFromParent();
if (DoneMBB != nullptr) {
// Compute the live-in lists for the new blocks.
- recomputeLiveIns(*DoneMBB);
- recomputeLiveIns(*LoopMBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*DoneMBB) || recomputeLiveIns(*LoopMBB);
+ } while (anyChange);
}
}
@@ -1439,8 +1441,10 @@ void SystemZXPLINKFrameLowering::inlineStackProbe(
StackAllocMI->eraseFromParent();
// Compute the live-in lists for the new blocks.
- recomputeLiveIns(*NextMBB);
- recomputeLiveIns(*StackExtMBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*StackExtMBB) || recomputeLiveIns(*NextMBB);
+ } while (anyChange);
}
bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 9f0fd4d0938e..87ec8aa23080 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -877,7 +877,6 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) {
OutStreamer->emitInt32(FeatureFlagsAnd); // data
emitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding
- OutStreamer->endSection(Nt);
OutStreamer->switchSection(Cur);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
index c0d358ead278..c2f76a3b8abb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -885,8 +885,10 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
}
// Update Live In information
- recomputeLiveIns(*testMBB);
- recomputeLiveIns(*tailMBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*tailMBB) || recomputeLiveIns(*testMBB);
+ } while (anyChange);
}
void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
@@ -1378,10 +1380,11 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
footMBB->addSuccessor(&MBB);
}
- recomputeLiveIns(*headMBB);
- recomputeLiveIns(*bodyMBB);
- recomputeLiveIns(*footMBB);
- recomputeLiveIns(MBB);
+ bool anyChange = false;
+ do {
+ anyChange = recomputeLiveIns(*footMBB) || recomputeLiveIns(*bodyMBB) ||
+ recomputeLiveIns(*headMBB) || recomputeLiveIns(MBB);
+ } while (anyChange);
}
} else {
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index fe7d90fbcdf7..bb5e22c71427 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12422,7 +12422,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
: avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
let ExeDomain = VTI.ExeDomain in
defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
- (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
+ (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
"$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
(OpNode (VTI.VT VTI.RC:$src1),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index c9d0f66c6e46..63136af2295f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -291,12 +291,15 @@ struct X86BroadcastFoldTable {
static bool matchBroadcastSize(const X86FoldTableEntry &Entry,
unsigned BroadcastBits) {
switch (Entry.Flags & TB_BCAST_MASK) {
- case TB_BCAST_SD:
- case TB_BCAST_Q:
- return BroadcastBits == 64;
- case TB_BCAST_SS:
+ case TB_BCAST_W:
+ case TB_BCAST_SH:
+ return BroadcastBits == 16;
case TB_BCAST_D:
+ case TB_BCAST_SS:
return BroadcastBits == 32;
+ case TB_BCAST_Q:
+ case TB_BCAST_SD:
+ return BroadcastBits == 64;
}
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
index d6f9aa6d6ace..9ac1f783b7f0 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2354,33 +2354,26 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VBLENDPSrri:
// If we're optimizing for size, try to use MOVSD/MOVSS.
if (MI.getParent()->getParent()->getFunction().hasOptSize()) {
- unsigned Mask;
- switch (Opc) {
- default:
- llvm_unreachable("Unreachable!");
- case X86::BLENDPDrri:
- Opc = X86::MOVSDrr;
- Mask = 0x03;
- break;
- case X86::BLENDPSrri:
- Opc = X86::MOVSSrr;
- Mask = 0x0F;
- break;
- case X86::VBLENDPDrri:
- Opc = X86::VMOVSDrr;
- Mask = 0x03;
- break;
- case X86::VBLENDPSrri:
- Opc = X86::VMOVSSrr;
- Mask = 0x0F;
- break;
- }
+ unsigned Mask = (Opc == X86::BLENDPDrri || Opc == X86::VBLENDPDrri) ? 0x03: 0x0F;
if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
+#define FROM_TO(FROM, TO) \
+ case X86::FROM: \
+ Opc = X86::TO; \
+ break;
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unreachable!");
+ FROM_TO(BLENDPDrri, MOVSDrr)
+ FROM_TO(BLENDPSrri, MOVSSrr)
+ FROM_TO(VBLENDPDrri, VMOVSDrr)
+ FROM_TO(VBLENDPSrri, VMOVSSrr)
+ }
WorkingMI = CloneIfNew(MI);
WorkingMI->setDesc(get(Opc));
WorkingMI->removeOperand(3);
break;
}
+#undef FROM_TO
}
[[fallthrough]];
case X86::PBLENDWrri:
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp
index 3cbe974ff314..20f324604aa5 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/TargetParser.cpp
@@ -294,6 +294,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gfx12-insts"] = true;
Features["atomic-fadd-rtn-insts"] = true;
Features["image-insts"] = true;
+ Features["fp8-conversion-insts"] = true;
break;
case GK_GFX1151:
case GK_GFX1150:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index bb2a77daa60a..1254a050027a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1032,7 +1032,8 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
bool IsLoadCSE = false;
- if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) {
+ BatchAAResults BatchAA(*AA);
+ if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
if (IsLoadCSE)
combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a8a5f9831e15..79873a9b4cbb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -802,6 +802,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return InsertNewInstWith(LShr, I->getIterator());
} else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one.
Known.One |= HighBits;
+ // SignBits may be out-of-sync with Known.countMinSignBits(). Mask out
+ // high bits of Known.Zero to avoid conflicts.
+ Known.Zero &= ~HighBits;
}
} else {
computeKnownBits(I, Known, Depth, CxtI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 8f09569d0d9c..7b672e89b67a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1061,11 +1061,16 @@ void State::addInfoFor(BasicBlock &BB) {
FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I)));
break;
// Enqueue the intrinsics to add extra info.
- case Intrinsic::abs:
case Intrinsic::umin:
case Intrinsic::umax:
case Intrinsic::smin:
case Intrinsic::smax:
+ // TODO: Check if it is possible to instead only added the min/max facts
+ // when simplifying uses of the min/max intrinsics.
+ if (!isGuaranteedNotToBePoison(&I))
+ break;
+ [[fallthrough]];
+ case Intrinsic::abs:
WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I));
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 8603c5cf9c02..87c01ead634f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1260,8 +1260,11 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// the entry to its block.
BasicBlock::iterator BBIt(LoadI);
bool IsLoadCSE;
+ BatchAAResults BatchAA(*AA);
+ // The dominator tree is updated lazily and may not be valid at this point.
+ BatchAA.disableDominatorTree();
if (Value *AvailableVal = FindAvailableLoadedValue(
- LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
+ LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
// If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
@@ -1322,9 +1325,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
AATags);
- PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(),
- PredBB, BBIt, DefMaxInstsToScan,
- AA, &IsLoadCSE, &NumScanedInst);
+ PredAvailable = findAvailablePtrLoadStore(
+ Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
+ &BatchAA, &IsLoadCSE, &NumScanedInst);
// If PredBB has a single predecessor, continue scanning through the
// single predecessor.
@@ -1336,7 +1339,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
BBIt = SinglePredBB->end();
PredAvailable = findAvailablePtrLoadStore(
Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
- (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
+ (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
&NumScanedInst);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6ca93e15719f..dd596c567cd4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1957,6 +1957,8 @@ class GeneratedRTChecks {
bool CostTooHigh = false;
const bool AddBranchWeights;
+ Loop *OuterLoop = nullptr;
+
public:
GeneratedRTChecks(ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI,
TargetTransformInfo *TTI, const DataLayout &DL,
@@ -2053,6 +2055,9 @@ public:
DT->eraseNode(SCEVCheckBlock);
LI->removeBlock(SCEVCheckBlock);
}
+
+ // Outer loop is used as part of the later cost calculations.
+ OuterLoop = L->getParentLoop();
}
InstructionCost getCost() {
@@ -2076,16 +2081,61 @@ public:
LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
RTCheckCost += C;
}
- if (MemCheckBlock)
+ if (MemCheckBlock) {
+ InstructionCost MemCheckCost = 0;
for (Instruction &I : *MemCheckBlock) {
if (MemCheckBlock->getTerminator() == &I)
continue;
InstructionCost C =
TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput);
LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
- RTCheckCost += C;
+ MemCheckCost += C;
}
+ // If the runtime memory checks are being created inside an outer loop
+ // we should find out if these checks are outer loop invariant. If so,
+ // the checks will likely be hoisted out and so the effective cost will
+ // reduce according to the outer loop trip count.
+ if (OuterLoop) {
+ ScalarEvolution *SE = MemCheckExp.getSE();
+ // TODO: If profitable, we could refine this further by analysing every
+ // individual memory check, since there could be a mixture of loop
+ // variant and invariant checks that mean the final condition is
+ // variant.
+ const SCEV *Cond = SE->getSCEV(MemRuntimeCheckCond);
+ if (SE->isLoopInvariant(Cond, OuterLoop)) {
+ // It seems reasonable to assume that we can reduce the effective
+ // cost of the checks even when we know nothing about the trip
+ // count. Assume that the outer loop executes at least twice.
+ unsigned BestTripCount = 2;
+
+ // If exact trip count is known use that.
+ if (unsigned SmallTC = SE->getSmallConstantTripCount(OuterLoop))
+ BestTripCount = SmallTC;
+ else if (LoopVectorizeWithBlockFrequency) {
+ // Else use profile data if available.
+ if (auto EstimatedTC = getLoopEstimatedTripCount(OuterLoop))
+ BestTripCount = *EstimatedTC;
+ }
+
+ InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
+
+ // Let's ensure the cost is always at least 1.
+ NewMemCheckCost = std::max(*NewMemCheckCost.getValue(),
+ (InstructionCost::CostType)1);
+
+ LLVM_DEBUG(dbgs()
+ << "We expect runtime memory checks to be hoisted "
+ << "out of the outer loop. Cost reduced from "
+ << MemCheckCost << " to " << NewMemCheckCost << '\n');
+
+ MemCheckCost = NewMemCheckCost;
+ }
+ }
+
+ RTCheckCost += MemCheckCost;
+ }
+
if (SCEVCheckBlock || MemCheckBlock)
LLVM_DEBUG(dbgs() << "Total cost of runtime checks: " << RTCheckCost
<< "\n");
@@ -2144,8 +2194,8 @@ public:
BranchInst::Create(LoopVectorPreHeader, SCEVCheckBlock);
// Create new preheader for vector loop.
- if (auto *PL = LI->getLoopFor(LoopVectorPreHeader))
- PL->addBasicBlockToLoop(SCEVCheckBlock, *LI);
+ if (OuterLoop)
+ OuterLoop->addBasicBlockToLoop(SCEVCheckBlock, *LI);
SCEVCheckBlock->getTerminator()->eraseFromParent();
SCEVCheckBlock->moveBefore(LoopVectorPreHeader);
@@ -2179,8 +2229,8 @@ public:
DT->changeImmediateDominator(LoopVectorPreHeader, MemCheckBlock);
MemCheckBlock->moveBefore(LoopVectorPreHeader);
- if (auto *PL = LI->getLoopFor(LoopVectorPreHeader))
- PL->addBasicBlockToLoop(MemCheckBlock, *LI);
+ if (OuterLoop)
+ OuterLoop->addBasicBlockToLoop(MemCheckBlock, *LI);
BranchInst &BI =
*BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheckCond);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index bbeb5da2cfec..ae2fc522ba40 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -597,13 +597,15 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
for (const auto &I : enumerate(operands())) {
// Some intrinsics have a scalar argument - don't replace it with a
// vector.
- // Some vectorized function variants may also take a scalar argument,
- // e.g. linear parameters for pointers.
Value *Arg;
- if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) ||
- (UseIntrinsic &&
- isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())))
+ if (UseIntrinsic &&
+ isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
Arg = State.get(I.value(), VPIteration(0, 0));
+ // Some vectorized function variants may also take a scalar argument,
+ // e.g. linear parameters for pointers. This needs to be the scalar value
+ // from the start of the respective part when interleaving.
+ else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
+ Arg = State.get(I.value(), VPIteration(Part, 0));
else
Arg = State.get(I.value(), Part);
if (UseIntrinsic &&
diff --git a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index 455183987b7b..50156d34528c 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -57,7 +57,8 @@ class MatcherTableEmitter {
// We de-duplicate the predicates by code string, and use this map to track
// all the patterns with "identical" predicates.
- StringMap<TinyPtrVector<TreePattern *>> NodePredicatesByCodeToRun;
+ MapVector<std::string, TinyPtrVector<TreePattern *>, StringMap<unsigned>>
+ NodePredicatesByCodeToRun;
std::vector<std::string> PatternPredicates;
diff --git a/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp b/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp
index d3a73e02cd91..0b9b6389fe38 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.cpp
@@ -59,6 +59,30 @@ void PredicateExpander::expandCheckImmOperandSimple(raw_ostream &OS,
OS << ")";
}
+void PredicateExpander::expandCheckImmOperandLT(raw_ostream &OS, int OpIndex,
+ int ImmVal,
+ StringRef FunctionMapper) {
+ if (!FunctionMapper.empty())
+ OS << FunctionMapper << "(";
+ OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+ << ").getImm()";
+ if (!FunctionMapper.empty())
+ OS << ")";
+ OS << (shouldNegate() ? " >= " : " < ") << ImmVal;
+}
+
+void PredicateExpander::expandCheckImmOperandGT(raw_ostream &OS, int OpIndex,
+ int ImmVal,
+ StringRef FunctionMapper) {
+ if (!FunctionMapper.empty())
+ OS << FunctionMapper << "(";
+ OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+ << ").getImm()";
+ if (!FunctionMapper.empty())
+ OS << ")";
+ OS << (shouldNegate() ? " <= " : " > ") << ImmVal;
+}
+
void PredicateExpander::expandCheckRegOperand(raw_ostream &OS, int OpIndex,
const Record *Reg,
StringRef FunctionMapper) {
@@ -352,6 +376,16 @@ void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) {
Rec->getValueAsString("ImmVal"),
Rec->getValueAsString("FunctionMapper"));
+ if (Rec->isSubClassOf("CheckImmOperandLT"))
+ return expandCheckImmOperandLT(OS, Rec->getValueAsInt("OpIndex"),
+ Rec->getValueAsInt("ImmVal"),
+ Rec->getValueAsString("FunctionMapper"));
+
+ if (Rec->isSubClassOf("CheckImmOperandGT"))
+ return expandCheckImmOperandGT(OS, Rec->getValueAsInt("OpIndex"),
+ Rec->getValueAsInt("ImmVal"),
+ Rec->getValueAsString("FunctionMapper"));
+
if (Rec->isSubClassOf("CheckImmOperandSimple"))
return expandCheckImmOperandSimple(OS, Rec->getValueAsInt("OpIndex"),
Rec->getValueAsString("FunctionMapper"));
diff --git a/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h b/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h
index cfb0a3d51e67..a0dc63023978 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/PredicateExpander.h
@@ -61,6 +61,10 @@ public:
StringRef FunctionMapperer);
void expandCheckImmOperandSimple(raw_ostream &OS, int OpIndex,
StringRef FunctionMapper);
+ void expandCheckImmOperandLT(raw_ostream &OS, int OpIndex, int ImmVal,
+ StringRef FunctionMapper);
+ void expandCheckImmOperandGT(raw_ostream &OS, int OpIndex, int ImmVal,
+ StringRef FunctionMapper);
void expandCheckRegOperand(raw_ostream &OS, int OpIndex, const Record *Reg,
StringRef FunctionMapper);
void expandCheckRegOperandSimple(raw_ostream &OS, int OpIndex,
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 8a860d0945bb..7ea02ecba324 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -14,6 +14,7 @@
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "X86RecognizableInstr.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/X86FoldTablesUtils.h"
#include "llvm/TableGen/Record.h"
@@ -80,6 +81,7 @@ class X86FoldTablesEmitter {
bool FoldStore = false;
enum BcastType {
BCAST_NONE,
+ BCAST_W,
BCAST_D,
BCAST_Q,
BCAST_SS,
@@ -114,6 +116,9 @@ class X86FoldTablesEmitter {
switch (BroadcastKind) {
case BCAST_NONE:
break;
+ case BCAST_W:
+ Attrs += "TB_BCAST_W|";
+ break;
case BCAST_D:
Attrs += "TB_BCAST_D|";
break;
@@ -529,45 +534,22 @@ void X86FoldTablesEmitter::addBroadcastEntry(
assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly");
X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
- Record *RegRec = RegInst->TheDef;
- StringRef RegInstName = RegRec->getName();
- StringRef MemInstName = MemInst->TheDef->getName();
- Record *Domain = RegRec->getValueAsDef("ExeDomain");
- bool IsSSEPackedInt = Domain->getName() == "SSEPackedInt";
- if ((RegInstName.contains("DZ") || RegInstName.contains("DWZ") ||
- RegInstName.contains("Dr") || RegInstName.contains("I32")) &&
- IsSSEPackedInt) {
- assert((MemInstName.contains("DZ") || RegInstName.contains("DWZ") ||
- MemInstName.contains("Dr") || MemInstName.contains("I32")) &&
- "Unmatched names for broadcast");
- Result.BroadcastKind = X86FoldTableEntry::BCAST_D;
- } else if ((RegInstName.contains("QZ") || RegInstName.contains("QBZ") ||
- RegInstName.contains("Qr") || RegInstName.contains("I64")) &&
- IsSSEPackedInt) {
- assert((MemInstName.contains("QZ") || MemInstName.contains("QBZ") ||
- MemInstName.contains("Qr") || MemInstName.contains("I64")) &&
- "Unmatched names for broadcast");
- Result.BroadcastKind = X86FoldTableEntry::BCAST_Q;
- } else if ((RegInstName.contains("PS") || RegInstName.contains("F32") ||
- RegInstName.contains("CPH")) &&
- !RegInstName.contains("PH2PS")) {
- assert((MemInstName.contains("PS") || MemInstName.contains("F32") ||
- MemInstName.contains("CPH")) &&
- "Unmatched names for broadcast");
- Result.BroadcastKind = X86FoldTableEntry::BCAST_SS;
- } else if ((RegInstName.contains("PD") || RegInstName.contains("F64")) &&
- !RegInstName.contains("PH2PD")) {
- assert((MemInstName.contains("PD") || MemInstName.contains("F64")) &&
- "Unmatched names for broadcast");
- Result.BroadcastKind = X86FoldTableEntry::BCAST_SD;
- } else if (RegInstName.contains("PH")) {
- assert(MemInstName.contains("PH") && "Unmatched names for broadcast");
- Result.BroadcastKind = X86FoldTableEntry::BCAST_SH;
- } else {
- errs() << RegInstName << ", " << MemInstName << "\n";
- llvm_unreachable("Name is not canoicalized for broadcast or "
- "ExeDomain is incorrect");
+ DagInit *In = MemInst->TheDef->getValueAsDag("InOperandList");
+ for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) {
+ Result.BroadcastKind =
+ StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString())
+ .Case("i16mem", X86FoldTableEntry::BCAST_W)
+ .Case("i32mem", X86FoldTableEntry::BCAST_D)
+ .Case("i64mem", X86FoldTableEntry::BCAST_Q)
+ .Case("f16mem", X86FoldTableEntry::BCAST_SH)
+ .Case("f32mem", X86FoldTableEntry::BCAST_SS)
+ .Case("f64mem", X86FoldTableEntry::BCAST_SD)
+ .Default(X86FoldTableEntry::BCAST_NONE);
+ if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE)
+ break;
}
+ assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE &&
+ "Unknown memory operand for broadcast");
Table[RegInst] = Result;
}