diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:11:55 +0000 |
commit | 5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch) | |
tree | 1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/libcxx/include/__algorithm/pstl_backends | |
parent | 3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff) | |
parent | 312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff) |
Diffstat (limited to 'contrib/llvm-project/libcxx/include/__algorithm/pstl_backends')
12 files changed, 454 insertions, 285 deletions
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backend.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backend.h index e54f331b9430..6980ded189ea 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backend.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backend.h @@ -15,10 +15,11 @@ // _Functor takes a subrange for [__first, __last) that should be executed in serial template <class _RandomAccessIterator, class _Functor> - void __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func); + optional<__empty> __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func); template <class _Iterator, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduction> - _Tp __parallel_transform_reduce(_Iterator __first, _Iterator __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduction); + optional<_Tp> + __parallel_transform_reduce(_Iterator __first, _Iterator __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduction); // Cancel the execution of other jobs - they aren't needed anymore void __cancel_execution(); @@ -28,7 +29,7 @@ class _RandomAccessIterator3, class _Compare, class _LeafMerge> - void __parallel_merge( + optional<void> __parallel_merge( _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, @@ -44,6 +45,14 @@ _LeafSort __leaf_sort); TODO: Document the parallel backend + +Exception handling +================== + +CPU backends are expected to report errors (i.e. failure to allocate) by returning a disengaged `optional` from their +implementation. Exceptions shouldn't be used to report an internal failure-to-allocate, since all exceptions are turned +into a program termination at the front-end level. When a backend returns a disengaged `optional` to the frontend, the +frontend will turn that into a call to `std::__throw_bad_alloc();` to report the internal failure to the user. */ #include <__algorithm/pstl_backends/cpu_backends/any_of.h> diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h index 8fe26797bf15..13dff80086e7 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h @@ -16,26 +16,32 @@ #include <__atomic/memory_order.h> #include <__config> #include <__functional/operations.h> -#include <__iterator/iterator_traits.h> +#include <__iterator/concepts.h> #include <__type_traits/is_execution_policy.h> +#include <__utility/move.h> #include <__utility/pair.h> -#include <__utility/terminate_on_exception.h> #include <cstdint> +#include <optional> #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Index, class _Brick> -_LIBCPP_HIDE_FROM_ABI bool __parallel_or(_Index __first, _Index __last, _Brick __f) { +_LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) { std::atomic<bool> __found(false); - __par_backend::__parallel_for(__first, __last, [__f, &__found](_Index __i, _Index __j) { + auto __ret = __par_backend::__parallel_for(__first, __last, [__f, &__found](_Index __i, _Index __j) { if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) { __found.store(true, std::memory_order_relaxed); __par_backend::__cancel_execution(); } }); - return __found; + if (!__ret) + return nullopt; + return static_cast<bool>(__found); } // TODO: check whether __simd_first() can be used here @@ -64,19 +70,19 @@ _LIBCPP_HIDE_FROM_ABI bool __simd_or(_Index __first, _DifferenceType __n, _Pred } template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate> -_LIBCPP_HIDE_FROM_ABI bool +_LIBCPP_HIDE_FROM_ABI optional<bool> __pstl_any_of(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { - return std::__terminate_on_exception([&] { - return std::__parallel_or( - __first, __last, [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { - return std::__pstl_any_of<__remove_parallel_policy_t<_ExecutionPolicy>>( - __cpu_backend_tag{}, __brick_first, __brick_last, __pred); - }); - }); + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { + return std::__parallel_or( + __first, __last, [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { + auto __res = std::__pstl_any_of<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, __brick_first, __brick_last, __pred); + _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); + return *std::move(__res); + }); } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { return std::__simd_or(__first, __last - __first, __pred); } else { return std::any_of(__first, __last, __pred); @@ -85,6 +91,8 @@ __pstl_any_of(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __la _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_ANY_OF_H diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h index 5e5e0a23bf89..64babe9fd2bd 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h @@ -12,9 +12,10 @@ #include <__algorithm/fill.h> #include <__algorithm/pstl_backends/cpu_backends/backend.h> #include <__config> -#include <__iterator/iterator_traits.h> +#include <__iterator/concepts.h> #include <__type_traits/is_execution_policy.h> -#include <__utility/terminate_on_exception.h> +#include <__utility/empty.h> +#include <optional> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -34,22 +35,23 @@ _LIBCPP_HIDE_FROM_ABI _Index __simd_fill_n(_Index __first, _DifferenceType __n, } template <class _ExecutionPolicy, class _ForwardIterator, class _Tp> -_LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI optional<__empty> __pstl_fill(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { - std::__terminate_on_exception([&] { - __par_backend::__parallel_for( - __first, __last, [&__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { - std::__pstl_fill<__remove_parallel_policy_t<_ExecutionPolicy>>( - __cpu_backend_tag{}, __brick_first, __brick_last, __value); - }); - }); + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { + return __par_backend::__parallel_for( + __first, __last, [&__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { + [[maybe_unused]] auto __res = std::__pstl_fill<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, __brick_first, __brick_last, __value); + _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); + }); } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { std::__simd_fill_n(__first, __last - __first, __value); + return __empty{}; } else { std::fill(__first, __last, __value); + return __empty{}; } } diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h index 3fa49549e64e..170470e4fb7e 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h @@ -14,11 +14,13 @@ #include <__atomic/atomic.h> #include <__config> #include <__functional/operations.h> +#include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__type_traits/is_execution_policy.h> +#include <__utility/move.h> #include <__utility/pair.h> -#include <__utility/terminate_on_exception.h> #include <cstddef> +#include <optional> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -26,31 +28,37 @@ #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Index, class _Brick, class _Compare> -_LIBCPP_HIDE_FROM_ABI _Index +_LIBCPP_HIDE_FROM_ABI optional<_Index> __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) { typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; _DifferenceType __initial_dist = __b_first ? __n : -1; std::atomic<_DifferenceType> __extremum(__initial_dist); // TODO: find out what is better here: parallel_for or parallel_reduce - __par_backend::__parallel_for(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { - // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of - // why using a shared variable scales fairly well in this situation. - if (__comp(__i - __first, __extremum)) { - _Index __res = __f(__i, __j); - // If not '__last' returned then we found what we want so put this to extremum - if (__res != __j) { - const _DifferenceType __k = __res - __first; - for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) { - __extremum.compare_exchange_weak(__old, __k); + auto __res = + __par_backend::__parallel_for(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { + // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of + // why using a shared variable scales fairly well in this situation. + if (__comp(__i - __first, __extremum)) { + _Index __result = __f(__i, __j); + // If not '__last' returned then we found what we want so put this to extremum + if (__result != __j) { + const _DifferenceType __k = __result - __first; + for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) { + __extremum.compare_exchange_weak(__old, __k); + } + } } - } - } - }); - return __extremum != __initial_dist ? __first + __extremum : __last; + }); + if (!__res) + return nullopt; + return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last; } template <class _Index, class _DifferenceType, class _Compare> @@ -90,23 +98,23 @@ __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Co } template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate> -_LIBCPP_HIDE_FROM_ABI _ForwardIterator +_LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator> __pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { - return std::__terminate_on_exception([&] { - return std::__parallel_find( - __first, - __last, - [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { - return std::__pstl_find_if<__remove_parallel_policy_t<_ExecutionPolicy>>( - __cpu_backend_tag{}, __brick_first, __brick_last, __pred); - }, - less<>{}, - true); - }); + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { + return std::__parallel_find( + __first, + __last, + [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { + auto __res = std::__pstl_find_if<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, __brick_first, __brick_last, __pred); + _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); + return *std::move(__res); + }, + less<>{}, + true); } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { using __diff_t = __iter_diff_t<_ForwardIterator>; return std::__simd_first(__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) { return __pred(__iter[__i]); @@ -118,6 +126,8 @@ __pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __l _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h index 36d0ac238e24..81fd4526b8db 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h @@ -12,9 +12,10 @@ #include <__algorithm/for_each.h> #include <__algorithm/pstl_backends/cpu_backends/backend.h> #include <__config> -#include <__iterator/iterator_traits.h> +#include <__iterator/concepts.h> #include <__type_traits/is_execution_policy.h> -#include <__utility/terminate_on_exception.h> +#include <__utility/empty.h> +#include <optional> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -25,7 +26,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Iterator, class _DifferenceType, class _Function> -_LIBCPP_HIDE_FROM_ABI _Iterator __simd_walk_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept { +_LIBCPP_HIDE_FROM_ABI _Iterator __simd_walk(_Iterator __first, _DifferenceType __n, _Function __f) noexcept { _PSTL_PRAGMA_SIMD for (_DifferenceType __i = 0; __i < __n; ++__i) __f(__first[__i]); @@ -34,22 +35,23 @@ _LIBCPP_HIDE_FROM_ABI _Iterator __simd_walk_1(_Iterator __first, _DifferenceType } template <class _ExecutionPolicy, class _ForwardIterator, class _Functor> -_LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI optional<__empty> __pstl_for_each(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Functor __func) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { - std::__terminate_on_exception([&] { - std::__par_backend::__parallel_for( - __first, __last, [__func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { - std::__pstl_for_each<__remove_parallel_policy_t<_ExecutionPolicy>>( - __cpu_backend_tag{}, __brick_first, __brick_last, __func); - }); - }); + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { + return std::__par_backend::__parallel_for( + __first, __last, [__func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { + [[maybe_unused]] auto __res = std::__pstl_for_each<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, __brick_first, __brick_last, __func); + _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); + }); } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { - std::__simd_walk_1(__first, __last - __first, __func); + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { + std::__simd_walk(__first, __last - __first, __func); + return __empty{}; } else { std::for_each(__first, __last, __func); + return __empty{}; } } diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h index bab6a3639bd0..e885e7f22517 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h @@ -9,8 +9,10 @@ #ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H #define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H +#include <__algorithm/inplace_merge.h> #include <__algorithm/lower_bound.h> #include <__algorithm/max.h> +#include <__algorithm/merge.h> #include <__algorithm/upper_bound.h> #include <__atomic/atomic.h> #include <__config> @@ -21,12 +23,13 @@ #include <__memory/construct_at.h> #include <__memory/unique_ptr.h> #include <__numeric/reduce.h> +#include <__utility/empty.h> #include <__utility/exception_guard.h> #include <__utility/move.h> #include <__utility/pair.h> -#include <__utility/terminate_on_exception.h> #include <cstddef> #include <new> +#include <optional> _LIBCPP_PUSH_MACROS #include <__undef_macros> @@ -57,13 +60,11 @@ struct __chunk_partitions { ptrdiff_t __first_chunk_size_; }; -[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size); +[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size) noexcept; template <class _RandomAccessIterator, class _Functor> -_LIBCPP_HIDE_FROM_ABI void -__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) { - auto __partitions = __libdispatch::__partition_chunks(__last - __first); - +_LIBCPP_HIDE_FROM_ABI optional<__empty> +__dispatch_parallel_for(__chunk_partitions __partitions, _RandomAccessIterator __first, _Functor __func) { // Perform the chunked execution. __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) { auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_; @@ -73,6 +74,15 @@ __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fun : (__chunk * __partitions.__chunk_size_) + (__partitions.__first_chunk_size_ - __partitions.__chunk_size_); __func(__first + __index, __first + __index + __this_chunk_size); }); + + return __empty{}; +} + +template <class _RandomAccessIterator, class _Functor> +_LIBCPP_HIDE_FROM_ABI optional<__empty> +__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) { + return __libdispatch::__dispatch_parallel_for( + __libdispatch::__partition_chunks(__last - __first), std::move(__first), std::move(__func)); } template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIteratorOut> @@ -90,23 +100,23 @@ template <typename _RandomAccessIterator1, typename _RandomAccessIterator3, typename _Compare, typename _LeafMerge> -_LIBCPP_HIDE_FROM_ABI void __parallel_merge( +_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_merge( _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp, - _LeafMerge __leaf_merge) { + _LeafMerge __leaf_merge) noexcept { __chunk_partitions __partitions = __libdispatch::__partition_chunks(std::max<ptrdiff_t>(__last1 - __first1, __last2 - __first2)); if (__partitions.__chunk_count_ == 0) - return; + return __empty{}; if (__partitions.__chunk_count_ == 1) { __leaf_merge(__first1, __last1, __first2, __last2, __result, __comp); - return; + return __empty{}; } using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>; @@ -117,61 +127,76 @@ _LIBCPP_HIDE_FROM_ABI void __parallel_merge( std::destroy_n(__ptr, __n_ranges); std::allocator<__merge_range_t>().deallocate(__ptr, __n_ranges); }; - unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges( - std::allocator<__merge_range_t>().allocate(__n_ranges), __destroy); - // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case - std::__terminate_on_exception([&] { - __merge_range_t* __r = __ranges.get(); - std::__construct_at(__r++, __first1, __first2, __result); - - bool __iterate_first_range = __last1 - __first1 > __last2 - __first2; - - auto __compute_chunk = [&](size_t __chunk_size) -> __merge_range_t { - auto [__mid1, __mid2] = [&] { - if (__iterate_first_range) { - auto __m1 = __first1 + __chunk_size; - auto __m2 = std::lower_bound(__first2, __last2, __m1[-1], __comp); - return std::make_pair(__m1, __m2); - } else { - auto __m2 = __first2 + __chunk_size; - auto __m1 = std::lower_bound(__first1, __last1, __m2[-1], __comp); - return std::make_pair(__m1, __m2); + unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges( + [&]() -> __merge_range_t* { +# ifndef _LIBCPP_HAS_NO_EXCEPTIONS + try { +# endif + return std::allocator<__merge_range_t>().allocate(__n_ranges); +# ifndef _LIBCPP_HAS_NO_EXCEPTIONS + } catch (const std::bad_alloc&) { + return nullptr; } - }(); +# endif + }(), + __destroy); - __result += (__mid1 - __first1) + (__mid2 - __first2); - __first1 = __mid1; - __first2 = __mid2; - return {std::move(__mid1), std::move(__mid2), __result}; - }; + if (!__ranges) + return nullopt; + + // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case + __merge_range_t* __r = __ranges.get(); + std::__construct_at(__r++, __first1, __first2, __result); + + bool __iterate_first_range = __last1 - __first1 > __last2 - __first2; + + auto __compute_chunk = [&](size_t __chunk_size) -> __merge_range_t { + auto [__mid1, __mid2] = [&] { + if (__iterate_first_range) { + auto __m1 = __first1 + __chunk_size; + auto __m2 = std::lower_bound(__first2, __last2, __m1[-1], __comp); + return std::make_pair(__m1, __m2); + } else { + auto __m2 = __first2 + __chunk_size; + auto __m1 = std::lower_bound(__first1, __last1, __m2[-1], __comp); + return std::make_pair(__m1, __m2); + } + }(); + + __result += (__mid1 - __first1) + (__mid2 - __first2); + __first1 = __mid1; + __first2 = __mid2; + return {std::move(__mid1), std::move(__mid2), __result}; + }; - // handle first chunk - std::__construct_at(__r++, __compute_chunk(__partitions.__first_chunk_size_)); - - // handle 2 -> N - 1 chunks - for (ptrdiff_t __i = 0; __i != __partitions.__chunk_count_ - 2; ++__i) - std::__construct_at(__r++, __compute_chunk(__partitions.__chunk_size_)); - - // handle last chunk - std::__construct_at(__r, __last1, __last2, __result); - - __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __index) { - auto __first_iters = __ranges[__index]; - auto __last_iters = __ranges[__index + 1]; - __leaf_merge( - __first_iters.__mid1_, - __last_iters.__mid1_, - __first_iters.__mid2_, - __last_iters.__mid2_, - __first_iters.__result_, - __comp); - }); + // handle first chunk + std::__construct_at(__r++, __compute_chunk(__partitions.__first_chunk_size_)); + + // handle 2 -> N - 1 chunks + for (ptrdiff_t __i = 0; __i != __partitions.__chunk_count_ - 2; ++__i) + std::__construct_at(__r++, __compute_chunk(__partitions.__chunk_size_)); + + // handle last chunk + std::__construct_at(__r, __last1, __last2, __result); + + __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __index) { + auto __first_iters = __ranges[__index]; + auto __last_iters = __ranges[__index + 1]; + __leaf_merge( + __first_iters.__mid1_, + __last_iters.__mid1_, + __first_iters.__mid2_, + __last_iters.__mid2_, + __first_iters.__result_, + __comp); }); + + return __empty{}; } template <class _RandomAccessIterator, class _Transform, class _Value, class _Combiner, class _Reduction> -_LIBCPP_HIDE_FROM_ABI _Value __parallel_transform_reduce( +_LIBCPP_HIDE_FROM_ABI optional<_Value> __parallel_transform_reduce( _RandomAccessIterator __first, _RandomAccessIterator __last, _Transform __transform, @@ -211,20 +236,101 @@ _LIBCPP_HIDE_FROM_ABI _Value __parallel_transform_reduce( } }); - return std::__terminate_on_exception([&] { - return std::reduce( - std::make_move_iterator(__values.get()), - std::make_move_iterator(__values.get() + __partitions.__chunk_count_), - std::move(__init), - __combiner); - }); + return std::reduce( + std::make_move_iterator(__values.get()), + std::make_move_iterator(__values.get() + __partitions.__chunk_count_), + std::move(__init), + __combiner); } -// TODO: parallelize this template <class _RandomAccessIterator, class _Comp, class _LeafSort> -_LIBCPP_HIDE_FROM_ABI void __parallel_stable_sort( +_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_stable_sort( _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp, _LeafSort __leaf_sort) { - __leaf_sort(__first, __last, __comp); + const auto __size = __last - __first; + auto __partitions = __libdispatch::__partition_chunks(__size); + + if (__partitions.__chunk_count_ == 0) + return __empty{}; + + if (__partitions.__chunk_count_ == 1) { + __leaf_sort(__first, __last, __comp); + return __empty{}; + } + + using _Value = __iter_value_type<_RandomAccessIterator>; + + auto __destroy = [__size](_Value* __ptr) { + std::destroy_n(__ptr, __size); + std::allocator<_Value>().deallocate(__ptr, __size); + }; + + // TODO: use __uninitialized_buffer + unique_ptr<_Value[], decltype(__destroy)> __values(std::allocator<_Value>().allocate(__size), __destroy); + + // Initialize all elements to a moved-from state + // TODO: Don't do this - this can be done in the first merge - see https://llvm.org/PR63928 + std::__construct_at(__values.get(), std::move(*__first)); + for (__iter_diff_t<_RandomAccessIterator> __i = 1; __i != __size; ++__i) { + std::__construct_at(__values.get() + __i, std::move(__values.get()[__i - 1])); + } + *__first = std::move(__values.get()[__size - 1]); + + __libdispatch::__dispatch_parallel_for( + __partitions, + __first, + [&__leaf_sort, &__comp](_RandomAccessIterator __chunk_first, _RandomAccessIterator __chunk_last) { + __leaf_sort(std::move(__chunk_first), std::move(__chunk_last), __comp); + }); + + bool __objects_are_in_buffer = false; + do { + const auto __old_chunk_size = __partitions.__chunk_size_; + if (__partitions.__chunk_count_ % 2 == 1) { + auto __inplace_merge_chunks = [&__comp, &__partitions](auto __first_chunk_begin) { + std::inplace_merge( + __first_chunk_begin, + __first_chunk_begin + __partitions.__first_chunk_size_, + __first_chunk_begin + __partitions.__first_chunk_size_ + __partitions.__chunk_size_, + __comp); + }; + if (__objects_are_in_buffer) + __inplace_merge_chunks(__values.get()); + else + __inplace_merge_chunks(__first); + __partitions.__first_chunk_size_ += 2 * __partitions.__chunk_size_; + } else { + __partitions.__first_chunk_size_ += __partitions.__chunk_size_; + } + + __partitions.__chunk_size_ *= 2; + __partitions.__chunk_count_ /= 2; + + auto __merge_chunks = [__partitions, __old_chunk_size, &__comp](auto __from_first, auto __to_first) { + __libdispatch::__dispatch_parallel_for( + __partitions, + __from_first, + [__old_chunk_size, &__from_first, &__to_first, &__comp](auto __chunk_first, auto __chunk_last) { + std::merge(std::make_move_iterator(__chunk_first), + std::make_move_iterator(__chunk_last - __old_chunk_size), + std::make_move_iterator(__chunk_last - __old_chunk_size), + std::make_move_iterator(__chunk_last), + __to_first + (__chunk_first - __from_first), + __comp); + }); + }; + + if (__objects_are_in_buffer) + __merge_chunks(__values.get(), __first); + else + __merge_chunks(__first, __values.get()); + __objects_are_in_buffer = !__objects_are_in_buffer; + } while (__partitions.__chunk_count_ > 1); + + if (__objects_are_in_buffer) { + std::move(__values.get(), __values.get() + __size, __first); + } + + return __empty{}; } _LIBCPP_HIDE_FROM_ABI inline void __cancel_execution() {} diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/merge.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/merge.h index d5be1e302d35..b0db70f58b2e 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/merge.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/merge.h @@ -12,10 +12,10 @@ #include <__algorithm/merge.h> #include <__algorithm/pstl_backends/cpu_backends/backend.h> #include <__config> -#include <__iterator/iterator_traits.h> +#include <__iterator/concepts.h> #include <__type_traits/is_execution_policy.h> #include <__utility/move.h> -#include <__utility/terminate_on_exception.h> +#include <optional> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -23,6 +23,9 @@ #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _ExecutionPolicy, @@ -30,7 +33,7 @@ template <class _ExecutionPolicy, class _ForwardIterator2, class _ForwardOutIterator, class _Comp> -_LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_merge( +_LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_merge( __cpu_backend_tag, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -39,34 +42,35 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_merge( _ForwardOutIterator __result, _Comp __comp) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator1>::value && - __has_random_access_iterator_category<_ForwardIterator2>::value && - __has_random_access_iterator_category<_ForwardOutIterator>::value) { - return std::__terminate_on_exception([&] { - __par_backend::__parallel_merge( - __first1, - __last1, - __first2, - __last2, - __result, - __comp, - [](_ForwardIterator1 __g_first1, - _ForwardIterator1 __g_last1, - _ForwardIterator2 __g_first2, - _ForwardIterator2 __g_last2, - _ForwardOutIterator __g_result, - _Comp __g_comp) { - return std::__pstl_merge<__remove_parallel_policy_t<_ExecutionPolicy>>( - __cpu_backend_tag{}, - std::move(__g_first1), - std::move(__g_last1), - std::move(__g_first2), - std::move(__g_last2), - std::move(__g_result), - std::move(__g_comp)); - }); - return __result + (__last1 - __first1) + (__last2 - __first2); - }); + __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && + __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value && + __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) { + auto __res = __par_backend::__parallel_merge( + __first1, + __last1, + __first2, + __last2, + __result, + __comp, + [](_ForwardIterator1 __g_first1, + _ForwardIterator1 __g_last1, + _ForwardIterator2 __g_first2, + _ForwardIterator2 __g_last2, + _ForwardOutIterator __g_result, + _Comp __g_comp) { + [[maybe_unused]] auto __g_res = std::__pstl_merge<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, + std::move(__g_first1), + std::move(__g_last1), + std::move(__g_first2), + std::move(__g_last2), + std::move(__g_result), + std::move(__g_comp)); + _LIBCPP_ASSERT_INTERNAL(__g_res, "unsed/sed should never try to allocate!"); + }); + if (!__res) + return nullopt; + return __result + (__last1 - __first1) + (__last2 - __first2); } else { return std::merge(__first1, __last1, __first2, __last2, __result, __comp); } @@ -74,6 +78,8 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_merge( _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_MERGE_H diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h index f151c3b098f6..afcc7ffb2661 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h @@ -11,8 +11,10 @@ #define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_SERIAL_H #include <__config> +#include <__utility/empty.h> #include <__utility/move.h> #include <cstddef> +#include <optional> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -20,26 +22,32 @@ #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD namespace __par_backend { inline namespace __serial_cpu_backend { template <class _RandomAccessIterator, class _Fp> -_LIBCPP_HIDE_FROM_ABI void __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) { +_LIBCPP_HIDE_FROM_ABI optional<__empty> +__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) { __f(__first, __last); + return __empty{}; } template <class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce> -_LIBCPP_HIDE_FROM_ABI _Tp +_LIBCPP_HIDE_FROM_ABI optional<_Tp> __parallel_transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) { return __reduce(std::move(__first), std::move(__last), std::move(__init)); } template <class _RandomAccessIterator, class _Compare, class _LeafSort> -_LIBCPP_HIDE_FROM_ABI void __parallel_stable_sort( +_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_stable_sort( _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) { __leaf_sort(__first, __last, __comp); + return __empty{}; } _LIBCPP_HIDE_FROM_ABI inline void __cancel_execution() {} @@ -49,7 +57,7 @@ template <class _RandomAccessIterator1, class _RandomAccessIterator3, class _Compare, class _LeafMerge> -_LIBCPP_HIDE_FROM_ABI void __parallel_merge( +_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_merge( _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, @@ -58,6 +66,7 @@ _LIBCPP_HIDE_FROM_ABI void __parallel_merge( _Compare __comp, _LeafMerge __leaf_merge) { __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp); + return __empty{}; } // TODO: Complete this list @@ -67,6 +76,8 @@ _LIBCPP_HIDE_FROM_ABI void __parallel_merge( _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && && _LIBCPP_STD_VER >= 17 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_SERIAL_H diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/stable_sort.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/stable_sort.h index 0a701443b3c4..34c423586c4b 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/stable_sort.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/stable_sort.h @@ -13,7 +13,8 @@ #include <__algorithm/stable_sort.h> #include <__config> #include <__type_traits/is_execution_policy.h> -#include <__utility/terminate_on_exception.h> +#include <__utility/empty.h> +#include <optional> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -24,17 +25,16 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _ExecutionPolicy, class _RandomAccessIterator, class _Comp> -_LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI optional<__empty> __pstl_stable_sort(__cpu_backend_tag, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy>) { - std::__terminate_on_exception([&] { - __par_backend::__parallel_stable_sort( - __first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) { - std::stable_sort(__g_first, __g_last, __g_comp); - }); - }); + return __par_backend::__parallel_stable_sort( + __first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) { + std::stable_sort(__g_first, __g_last, __g_comp); + }); } else { std::stable_sort(__first, __last, __comp); + return __empty{}; } } diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/thread.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/thread.h index 30eb0ae3626d..eb11a961b760 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/thread.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/thread.h @@ -11,8 +11,10 @@ #include <__assert> #include <__config> +#include <__utility/empty.h> #include <__utility/move.h> #include <cstddef> +#include <optional> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -32,20 +34,23 @@ namespace __par_backend { inline namespace __thread_cpu_backend { template <class _RandomAccessIterator, class _Fp> -_LIBCPP_HIDE_FROM_ABI void __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) { +_LIBCPP_HIDE_FROM_ABI optional<__empty> +__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) { __f(__first, __last); + return __empty{}; } template <class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce> -_LIBCPP_HIDE_FROM_ABI _Tp +_LIBCPP_HIDE_FROM_ABI optional<_Tp> __parallel_transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) { return __reduce(std::move(__first), std::move(__last), std::move(__init)); } template <class _RandomAccessIterator, class _Compare, class _LeafSort> -_LIBCPP_HIDE_FROM_ABI void __parallel_stable_sort( +_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_stable_sort( _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) { __leaf_sort(__first, __last, __comp); + return __empty{}; } _LIBCPP_HIDE_FROM_ABI inline void __cancel_execution() {} @@ -55,7 +60,7 @@ template <class _RandomAccessIterator1, class _RandomAccessIterator3, class _Compare, class _LeafMerge> -_LIBCPP_HIDE_FROM_ABI void __parallel_merge( +_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_merge( _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, @@ -64,6 +69,7 @@ _LIBCPP_HIDE_FROM_ABI void __parallel_merge( _Compare __comp, _LeafMerge __leaf_merge) { __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp); + return __empty{}; } } // namespace __thread_cpu_backend diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform.h index ef25ff0238da..fdf1a2e78dad 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform.h @@ -12,11 +12,12 @@ #include <__algorithm/pstl_backends/cpu_backends/backend.h> #include <__algorithm/transform.h> #include <__config> +#include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_execution_policy.h> #include <__type_traits/remove_cvref.h> -#include <__utility/terminate_on_exception.h> +#include <optional> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -24,11 +25,14 @@ #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function> _LIBCPP_HIDE_FROM_ABI _Iterator2 -__simd_walk_2(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept { +__simd_walk(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept { _PSTL_PRAGMA_SIMD for (_DifferenceType __i = 0; __i < __n; ++__i) __f(__first1[__i], __first2[__i]); @@ -36,27 +40,27 @@ __simd_walk_2(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Fu } template <class _ExecutionPolicy, class _ForwardIterator, class _ForwardOutIterator, class _UnaryOperation> -_LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_transform( +_LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_transform( __cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _ForwardOutIterator __result, _UnaryOperation __op) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value && - __has_random_access_iterator_category<_ForwardOutIterator>::value) { - std::__terminate_on_exception([&] { - std::__par_backend::__parallel_for( - __first, __last, [__op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { - return std::__pstl_transform<__remove_parallel_policy_t<_ExecutionPolicy>>( - __cpu_backend_tag{}, __brick_first, __brick_last, __result + (__brick_first - __first), __op); - }); - }); + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value && + __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) { + std::__par_backend::__parallel_for( + __first, __last, [__op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { + auto __res = std::__pstl_transform<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, __brick_first, __brick_last, __result + (__brick_first - __first), __op); + _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); + return *std::move(__res); + }); return __result + (__last - __first); } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value && - __has_random_access_iterator_category<_ForwardOutIterator>::value) { - return std::__simd_walk_2( + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value && + __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) { + return std::__simd_walk( __first, __last - __first, __result, @@ -69,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_transform( } template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Iterator3, class _Function> -_LIBCPP_HIDE_FROM_ABI _Iterator3 __simd_walk_3( +_LIBCPP_HIDE_FROM_ABI _Iterator3 __simd_walk( _Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Iterator3 __first3, _Function __f) noexcept { _PSTL_PRAGMA_SIMD for (_DifferenceType __i = 0; __i < __n; ++__i) @@ -82,7 +86,7 @@ template <class _ExecutionPolicy, class _ForwardOutIterator, class _BinaryOperation, enable_if_t<is_execution_policy_v<__remove_cvref_t<_ExecutionPolicy>>, int> = 0> -_LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_transform( +_LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_transform( __cpu_backend_tag, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -90,29 +94,29 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_transform( _ForwardOutIterator __result, _BinaryOperation __op) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator1>::value && - __has_random_access_iterator_category<_ForwardIterator2>::value && - __has_random_access_iterator_category<_ForwardOutIterator>::value) { - std::__terminate_on_exception([&] { - std::__par_backend::__parallel_for( - __first1, - __last1, - [__op, __first1, __first2, __result](_ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last) { - return std::__pstl_transform<__remove_parallel_policy_t<_ExecutionPolicy>>( - __cpu_backend_tag{}, - __brick_first, - __brick_last, - __first2 + (__brick_first - __first1), - __result + (__brick_first - __first1), - __op); - }); - }); + __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && + __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value && + __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) { + auto __res = std::__par_backend::__parallel_for( + __first1, + __last1, + [__op, __first1, __first2, __result](_ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last) { + return std::__pstl_transform<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, + __brick_first, + __brick_last, + __first2 + (__brick_first - __first1), + __result + (__brick_first - __first1), + __op); + }); + if (!__res) + return nullopt; return __result + (__last1 - __first1); } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator1>::value && - __has_random_access_iterator_category<_ForwardIterator2>::value && - __has_random_access_iterator_category<_ForwardOutIterator>::value) { - return std::__simd_walk_3( + __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && + __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value && + __has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) { + return std::__simd_walk( __first1, __last1 - __first1, __first2, @@ -127,6 +131,8 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator __pstl_transform( _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_TRANSFORM_H diff --git a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h index c51c312d938a..ab2e3172b8b6 100644 --- a/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h +++ b/contrib/llvm-project/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h @@ -11,14 +11,15 @@ #include <__algorithm/pstl_backends/cpu_backends/backend.h> #include <__config> +#include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__numeric/transform_reduce.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_execution_policy.h> #include <__type_traits/operation_traits.h> #include <__utility/move.h> -#include <__utility/terminate_on_exception.h> #include <new> +#include <optional> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -28,12 +29,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template < - typename _DifferenceType, - typename _Tp, - typename _BinaryOperation, - typename _UnaryOperation, - __enable_if_t<__is_trivial_plus_operation<_BinaryOperation, _Tp, _Tp>::value && is_arithmetic_v<_Tp>, int> = 0> +template <typename _DifferenceType, + typename _Tp, + typename _BinaryOperation, + typename _UnaryOperation, + typename _UnaryResult = invoke_result_t<_UnaryOperation, _DifferenceType>, + __enable_if_t<__desugars_to<__plus_tag, _BinaryOperation, _Tp, _UnaryResult>::value && is_arithmetic_v<_Tp> && + is_arithmetic_v<_UnaryResult>, + int> = 0> _LIBCPP_HIDE_FROM_ABI _Tp __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept { _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init) @@ -42,12 +45,14 @@ __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _Unar return __init; } -template < - typename _Size, - typename _Tp, - typename _BinaryOperation, - typename _UnaryOperation, - __enable_if_t<!(__is_trivial_plus_operation<_BinaryOperation, _Tp, _Tp>::value && is_arithmetic_v<_Tp>), int> = 0> +template <typename _Size, + typename _Tp, + typename _BinaryOperation, + typename _UnaryOperation, + typename _UnaryResult = invoke_result_t<_UnaryOperation, _Size>, + __enable_if_t<!(__desugars_to<__plus_tag, _BinaryOperation, _Tp, _UnaryResult>::value && + is_arithmetic_v<_Tp> && is_arithmetic_v<_UnaryResult>), + int> = 0> _LIBCPP_HIDE_FROM_ABI _Tp __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept { const _Size __block_size = __lane_size / sizeof(_Tp); @@ -97,7 +102,7 @@ template <class _ExecutionPolicy, class _Tp, class _BinaryOperation1, class _BinaryOperation2> -_LIBCPP_HIDE_FROM_ABI _Tp __pstl_transform_reduce( +_LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce( __cpu_backend_tag, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -106,32 +111,30 @@ _LIBCPP_HIDE_FROM_ABI _Tp __pstl_transform_reduce( _BinaryOperation1 __reduce, _BinaryOperation2 __transform) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator1>::value && - __has_random_access_iterator_category<_ForwardIterator2>::value) { - return std::__terminate_on_exception([&] { - return __par_backend::__parallel_transform_reduce( - __first1, - std::move(__last1), - [__first1, __first2, __transform](_ForwardIterator1 __iter) { - return __transform(*__iter, *(__first2 + (__iter - __first1))); - }, - std::move(__init), - std::move(__reduce), - [__first1, __first2, __reduce, __transform]( - _ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last, _Tp __brick_init) { - return std::__pstl_transform_reduce<__remove_parallel_policy_t<_ExecutionPolicy>>( - __cpu_backend_tag{}, - __brick_first, - std::move(__brick_last), - __first2 + (__brick_first - __first1), - std::move(__brick_init), - std::move(__reduce), - std::move(__transform)); - }); - }); + __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && + __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) { + return __par_backend::__parallel_transform_reduce( + __first1, + std::move(__last1), + [__first1, __first2, __transform](_ForwardIterator1 __iter) { + return __transform(*__iter, *(__first2 + (__iter - __first1))); + }, + std::move(__init), + std::move(__reduce), + [__first1, __first2, __reduce, __transform]( + _ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last, _Tp __brick_init) { + return *std::__pstl_transform_reduce<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, + __brick_first, + std::move(__brick_last), + __first2 + (__brick_first - __first1), + std::move(__brick_init), + std::move(__reduce), + std::move(__transform)); + }); } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator1>::value && - __has_random_access_iterator_category<_ForwardIterator2>::value) { + __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && + __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) { return std::__simd_transform_reduce( __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) { return __transform(__first1[__i], __first2[__i]); @@ -148,7 +151,7 @@ _LIBCPP_HIDE_FROM_ABI _Tp __pstl_transform_reduce( } template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation> -_LIBCPP_HIDE_FROM_ABI _Tp __pstl_transform_reduce( +_LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce( __cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, @@ -156,26 +159,26 @@ _LIBCPP_HIDE_FROM_ABI _Tp __pstl_transform_reduce( _BinaryOperation __reduce, _UnaryOperation __transform) { if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { - return std::__terminate_on_exception([&] { - return __par_backend::__parallel_transform_reduce( - std::move(__first), - std::move(__last), - [__transform](_ForwardIterator __iter) { return __transform(*__iter); }, - std::move(__init), - __reduce, - [__transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) { - return std::__pstl_transform_reduce<__remove_parallel_policy_t<_ExecutionPolicy>>( - __cpu_backend_tag{}, - std::move(__brick_first), - std::move(__brick_last), - std::move(__brick_init), - std::move(__reduce), - std::move(__transform)); - }); - }); + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { + return __par_backend::__parallel_transform_reduce( + std::move(__first), + std::move(__last), + [__transform](_ForwardIterator __iter) { return __transform(*__iter); }, + std::move(__init), + __reduce, + [__transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) { + auto __res = std::__pstl_transform_reduce<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, + std::move(__brick_first), + std::move(__brick_last), + std::move(__brick_init), + std::move(__reduce), + std::move(__transform)); + _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); + return *std::move(__res); + }); } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __has_random_access_iterator_category<_ForwardIterator>::value) { + __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { return std::__simd_transform_reduce( __last - __first, std::move(__init), |