aboutsummaryrefslogtreecommitdiff
path: root/libcxx/include/__algorithm/sort.h
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-02-11 12:38:04 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-02-11 12:38:11 +0000
commite3b557809604d036af6e00c60f012c2025b59a5e (patch)
tree8a11ba2269a3b669601e2fd41145b174008f4da8 /libcxx/include/__algorithm/sort.h
parent08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff)
Diffstat (limited to 'libcxx/include/__algorithm/sort.h')
-rw-r--r--libcxx/include/__algorithm/sort.h644
1 files changed, 470 insertions, 174 deletions
diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
index 1ca2f1b81712..a7d2d55a06f8 100644
--- a/libcxx/include/__algorithm/sort.h
+++ b/libcxx/include/__algorithm/sort.h
@@ -11,19 +11,29 @@
#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
+#include <__algorithm/iter_swap.h>
#include <__algorithm/iterator_operations.h>
#include <__algorithm/min_element.h>
#include <__algorithm/partial_sort.h>
#include <__algorithm/unwrap_iter.h>
-#include <__bits>
+#include <__assert>
+#include <__bit/blsr.h>
+#include <__bit/countl.h>
+#include <__bit/countr.h>
#include <__config>
#include <__debug>
#include <__debug_utils/randomize_range.h>
#include <__functional/operations.h>
#include <__functional/ranges_operations.h>
#include <__iterator/iterator_traits.h>
+#include <__memory/destruct_n.h>
+#include <__memory/unique_ptr.h>
+#include <__type_traits/conditional.h>
+#include <__type_traits/is_arithmetic.h>
+#include <__utility/move.h>
+#include <__utility/pair.h>
#include <climits>
-#include <memory>
+#include <cstdint>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -43,7 +53,7 @@ struct _WrapAlgPolicy {
using _Comp = _CompT;
_Comp& __comp;
- _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
_WrapAlgPolicy(_Comp& __c) : __comp(__c) {}
};
@@ -62,7 +72,7 @@ struct _UnwrapAlgPolicy {
using _AlgPolicy = _ClassicAlgPolicy;
using _Comp = _CompT;
- _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 static
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static
_Comp __get_comp(_Comp __comp) { return __comp; }
};
@@ -73,14 +83,15 @@ struct _UnwrapAlgPolicy<_WrapAlgPolicy<_Ts...> > {
using _AlgPolicy = typename _Wrapped::_AlgPolicy;
using _Comp = typename _Wrapped::_Comp;
- _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 static
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static
_Comp __get_comp(_Wrapped& __w) { return __w.__comp; }
};
// stable, 2-3 compares, 0-2 swaps
template <class _AlgPolicy, class _Compare, class _ForwardIterator>
-_LIBCPP_CONSTEXPR_AFTER_CXX11 unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z,
+_LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z,
_Compare __c) {
using _Ops = _IterOps<_AlgPolicy>;
@@ -118,10 +129,10 @@ _LIBCPP_CONSTEXPR_AFTER_CXX11 unsigned __sort3(_ForwardIterator __x, _ForwardIte
// stable, 3-6 compares, 0-5 swaps
template <class _AlgPolicy, class _Compare, class _ForwardIterator>
+_LIBCPP_HIDE_FROM_ABI
unsigned __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4,
_Compare __c) {
- using _Ops = _IterOps<_AlgPolicy>;
-
+ using _Ops = _IterOps<_AlgPolicy>;
unsigned __r = std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
if (__c(*__x4, *__x3)) {
_Ops::iter_swap(__x3, __x4);
@@ -171,12 +182,12 @@ _LIBCPP_HIDDEN unsigned __sort5(_ForwardIterator __x1, _ForwardIterator __x2, _F
}
template <class _AlgPolicy, class _Compare, class _ForwardIterator>
-_LIBCPP_HIDDEN unsigned __sort5_wrap_policy(
+_LIBCPP_HIDE_FROM_ABI unsigned __sort5_wrap_policy(
_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _ForwardIterator __x5,
_Compare __c) {
using _WrappedComp = typename _WrapAlgPolicy<_AlgPolicy, _Compare>::type;
_WrappedComp __wrapped_comp(__c);
- return std::__sort5<_WrappedComp>(
+ return std::__sort5<_WrappedComp, _ForwardIterator>(
std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __wrapped_comp);
}
@@ -201,6 +212,13 @@ using __use_branchless_sort =
integral_constant<bool, __is_cpp17_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) &&
is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>;
+namespace __detail {
+
+// Size in bits for the bitset in use.
+enum { __block_size = sizeof(uint64_t) * 8 };
+
+} // namespace __detail
+
// Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
template <class _Compare, class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
@@ -231,8 +249,8 @@ template <class, class _Compare, class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
__sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
_Compare __c) {
- _VSTD::__cond_swap<_Compare>(__x2, __x3, __c);
- _VSTD::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
+ std::__cond_swap<_Compare>(__x2, __x3, __c);
+ std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
}
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
@@ -246,11 +264,11 @@ template <class, class _Compare, class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
__sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
_RandomAccessIterator __x4, _Compare __c) {
- _VSTD::__cond_swap<_Compare>(__x1, __x3, __c);
- _VSTD::__cond_swap<_Compare>(__x2, __x4, __c);
- _VSTD::__cond_swap<_Compare>(__x1, __x2, __c);
- _VSTD::__cond_swap<_Compare>(__x3, __x4, __c);
- _VSTD::__cond_swap<_Compare>(__x2, __x3, __c);
+ std::__cond_swap<_Compare>(__x1, __x3, __c);
+ std::__cond_swap<_Compare>(__x2, __x4, __c);
+ std::__cond_swap<_Compare>(__x1, __x2, __c);
+ std::__cond_swap<_Compare>(__x3, __x4, __c);
+ std::__cond_swap<_Compare>(__x2, __x3, __c);
}
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
@@ -260,16 +278,21 @@ __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2,
std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c);
}
-template <class, class _Compare, class _RandomAccessIterator>
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
-__sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
- _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) {
- _VSTD::__cond_swap<_Compare>(__x1, __x2, __c);
- _VSTD::__cond_swap<_Compare>(__x4, __x5, __c);
- _VSTD::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
- _VSTD::__cond_swap<_Compare>(__x2, __x5, __c);
- _VSTD::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
- _VSTD::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
+__sort5_maybe_branchless(
+ _RandomAccessIterator __x1,
+ _RandomAccessIterator __x2,
+ _RandomAccessIterator __x3,
+ _RandomAccessIterator __x4,
+ _RandomAccessIterator __x5,
+ _Compare __c) {
+ std::__cond_swap<_Compare>(__x1, __x2, __c);
+ std::__cond_swap<_Compare>(__x4, __x5, __c);
+ std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
+ std::__cond_swap<_Compare>(__x2, __x5, __c);
+ std::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
+ std::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
}
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
@@ -281,7 +304,8 @@ __sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2,
// Assumes size > 0
template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
-_LIBCPP_CONSTEXPR_AFTER_CXX11 void __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last,
+_LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX14 void __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last,
_Compare __comp) {
_BidirectionalIterator __lm1 = __last;
for (--__lm1; __first != __lm1; ++__first) {
@@ -291,32 +315,48 @@ _LIBCPP_CONSTEXPR_AFTER_CXX11 void __selection_sort(_BidirectionalIterator __fir
}
}
+// Sort the iterator range [__first, __last) using the comparator __comp using
+// the insertion sort algorithm.
template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
+_LIBCPP_HIDE_FROM_ABI
void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) {
using _Ops = _IterOps<_AlgPolicy>;
typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
- if (__first != __last) {
- _BidirectionalIterator __i = __first;
- for (++__i; __i != __last; ++__i) {
- _BidirectionalIterator __j = __i;
- value_type __t(_Ops::__iter_move(__j));
- for (_BidirectionalIterator __k = __i; __k != __first && __comp(__t, *--__k); --__j)
+ if (__first == __last)
+ return;
+ _BidirectionalIterator __i = __first;
+ for (++__i; __i != __last; ++__i) {
+ _BidirectionalIterator __j = __i;
+ --__j;
+ if (__comp(*__i, *__j)) {
+ value_type __t(_Ops::__iter_move(__i));
+ _BidirectionalIterator __k = __j;
+ __j = __i;
+ do {
*__j = _Ops::__iter_move(__k);
- *__j = _VSTD::move(__t);
+ __j = __k;
+ } while (__j != __first && __comp(__t, *--__k));
+ *__j = std::move(__t);
}
}
}
+// Sort the iterator range [__first, __last) using the comparator __comp using
+// the insertion sort algorithm. Insertion sort has two loops, outer and inner.
+// The implementation below has not bounds check (unguarded) for the inner loop.
+// Assumes that there is an element in the position (__first - 1) and that each
+// element in the input range is greater or equal to the element at __first - 1.
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
+_LIBCPP_HIDE_FROM_ABI void
+__insertion_sort_unguarded(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
using _Ops = _IterOps<_AlgPolicy>;
-
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
- _RandomAccessIterator __j = __first + difference_type(2);
- std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), __j, __comp);
- for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) {
+ if (__first == __last)
+ return;
+ for (_RandomAccessIterator __i = __first + difference_type(1); __i != __last; ++__i) {
+ _RandomAccessIterator __j = __i - difference_type(1);
if (__comp(*__i, *__j)) {
value_type __t(_Ops::__iter_move(__i));
_RandomAccessIterator __k = __j;
@@ -324,15 +364,14 @@ void __insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __l
do {
*__j = _Ops::__iter_move(__k);
__j = __k;
- } while (__j != __first && __comp(__t, *--__k));
- *__j = _VSTD::move(__t);
+ } while (__comp(__t, *--__k)); // No need for bounds check due to the assumption stated above.
+ *__j = std::move(__t);
}
- __j = __i;
}
}
template <class _WrappedComp, class _RandomAccessIterator>
-bool __insertion_sort_incomplete(
+_LIBCPP_HIDDEN bool __insertion_sort_incomplete(
_RandomAccessIterator __first, _RandomAccessIterator __last, _WrappedComp __wrapped_comp) {
using _Unwrap = _UnwrapAlgPolicy<_WrappedComp>;
using _AlgPolicy = typename _Unwrap::_AlgPolicy;
@@ -348,7 +387,7 @@ bool __insertion_sort_incomplete(
return true;
case 2:
if (__comp(*--__last, *__first))
- _IterOps<_AlgPolicy>::iter_swap(__first, __last);
+ _Ops::iter_swap(__first, __last);
return true;
case 3:
std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
@@ -377,7 +416,7 @@ bool __insertion_sort_incomplete(
*__j = _Ops::__iter_move(__k);
__j = __k;
} while (__j != __first && __comp(__t, *--__k));
- *__j = _VSTD::move(__t);
+ *__j = std::move(__t);
if (++__count == __limit)
return ++__i == __last;
}
@@ -387,6 +426,7 @@ bool __insertion_sort_incomplete(
}
template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
+_LIBCPP_HIDE_FROM_ABI
void __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterator __last1,
typename iterator_traits<_BidirectionalIterator>::value_type* __first2, _Compare __comp) {
using _Ops = _IterOps<_AlgPolicy>;
@@ -416,17 +456,336 @@ void __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterat
}
}
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
- typename iterator_traits<_RandomAccessIterator>::difference_type __depth) {
+template <class _AlgPolicy, class _RandomAccessIterator>
+inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
+ _RandomAccessIterator __first, _RandomAccessIterator __last, uint64_t& __left_bitset, uint64_t& __right_bitset) {
+ using _Ops = _IterOps<_AlgPolicy>;
+ typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+ // Swap one pair on each iteration as long as both bitsets have at least one
+ // element for swapping.
+ while (__left_bitset != 0 && __right_bitset != 0) {
+ difference_type tz_left = __libcpp_ctz(__left_bitset);
+ __left_bitset = __libcpp_blsr(__left_bitset);
+ difference_type tz_right = __libcpp_ctz(__right_bitset);
+ __right_bitset = __libcpp_blsr(__right_bitset);
+ _Ops::iter_swap(__first + tz_left, __last - tz_right);
+ }
+}
+
+template <class _Compare,
+ class _RandomAccessIterator,
+ class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
+inline _LIBCPP_HIDE_FROM_ABI void
+__populate_left_bitset(_RandomAccessIterator __first, _Compare __comp, _ValueType& __pivot, uint64_t& __left_bitset) {
+ // Possible vectorization. With a proper "-march" flag, the following loop
+ // will be compiled into a set of SIMD instructions.
+ _RandomAccessIterator __iter = __first;
+ for (int __j = 0; __j < __detail::__block_size;) {
+ bool __comp_result = !__comp(*__iter, __pivot);
+ __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
+ __j++;
+ ++__iter;
+ }
+}
+
+template <class _Compare,
+ class _RandomAccessIterator,
+ class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
+inline _LIBCPP_HIDE_FROM_ABI void
+__populate_right_bitset(_RandomAccessIterator __lm1, _Compare __comp, _ValueType& __pivot, uint64_t& __right_bitset) {
+ // Possible vectorization. With a proper "-march" flag, the following loop
+ // will be compiled into a set of SIMD instructions.
+ _RandomAccessIterator __iter = __lm1;
+ for (int __j = 0; __j < __detail::__block_size;) {
+ bool __comp_result = __comp(*__iter, __pivot);
+ __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
+ __j++;
+ --__iter;
+ }
+}
+
+template <class _AlgPolicy,
+ class _Compare,
+ class _RandomAccessIterator,
+ class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
+inline _LIBCPP_HIDE_FROM_ABI void __bitset_partition_partial_blocks(
+ _RandomAccessIterator& __first,
+ _RandomAccessIterator& __lm1,
+ _Compare __comp,
+ _ValueType& __pivot,
+ uint64_t& __left_bitset,
+ uint64_t& __right_bitset) {
+ typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+ difference_type __remaining_len = __lm1 - __first + 1;
+ difference_type __l_size;
+ difference_type __r_size;
+ if (__left_bitset == 0 && __right_bitset == 0) {
+ __l_size = __remaining_len / 2;
+ __r_size = __remaining_len - __l_size;
+ } else if (__left_bitset == 0) {
+ // We know at least one side is a full block.
+ __l_size = __remaining_len - __detail::__block_size;
+ __r_size = __detail::__block_size;
+ } else { // if (__right_bitset == 0)
+ __l_size = __detail::__block_size;
+ __r_size = __remaining_len - __detail::__block_size;
+ }
+ // Record the comparison outcomes for the elements currently on the left side.
+ if (__left_bitset == 0) {
+ _RandomAccessIterator __iter = __first;
+ for (int j = 0; j < __l_size; j++) {
+ bool __comp_result = !__comp(*__iter, __pivot);
+ __left_bitset |= (static_cast<uint64_t>(__comp_result) << j);
+ ++__iter;
+ }
+ }
+ // Record the comparison outcomes for the elements currently on the right
+ // side.
+ if (__right_bitset == 0) {
+ _RandomAccessIterator __iter = __lm1;
+ for (int j = 0; j < __r_size; j++) {
+ bool __comp_result = __comp(*__iter, __pivot);
+ __right_bitset |= (static_cast<uint64_t>(__comp_result) << j);
+ --__iter;
+ }
+ }
+ std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
+ __first += (__left_bitset == 0) ? __l_size : 0;
+ __lm1 -= (__right_bitset == 0) ? __r_size : 0;
+}
+
+template <class _AlgPolicy, class _RandomAccessIterator>
+inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
+ _RandomAccessIterator& __first, _RandomAccessIterator& __lm1, uint64_t& __left_bitset, uint64_t& __right_bitset) {
+ using _Ops = _IterOps<_AlgPolicy>;
+ typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+ if (__left_bitset) {
+ // Swap within the left side. Need to find set positions in the reverse
+ // order.
+ while (__left_bitset != 0) {
+ difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
+ __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
+ _RandomAccessIterator it = __first + __tz_left;
+ if (it != __lm1) {
+ _Ops::iter_swap(it, __lm1);
+ }
+ --__lm1;
+ }
+ __first = __lm1 + difference_type(1);
+ } else if (__right_bitset) {
+ // Swap within the right side. Need to find set positions in the reverse
+ // order.
+ while (__right_bitset != 0) {
+ difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
+ __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
+ _RandomAccessIterator it = __lm1 - __tz_right;
+ if (it != __first) {
+ _Ops::iter_swap(it, __first);
+ }
+ ++__first;
+ }
+ }
+}
+
+// Partition [__first, __last) using the comparator __comp. *__first has the
+// chosen pivot. Elements that are equivalent are kept to the left of the
+// pivot. Returns the iterator for the pivot and a bool value which is true if
+// the provided range is already sorted, false otherwise. We assume that the
+// length of the range is at least three elements.
+//
+// __bitset_partition uses bitsets for storing outcomes of the comparisons
+// between the pivot and other elements.
+template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
+_LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
+__bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
using _Ops = _IterOps<_AlgPolicy>;
+ typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
+ typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+ _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
+
+ _RandomAccessIterator __begin = __first;
+ value_type __pivot(_Ops::__iter_move(__first));
+ // Find the first element greater than the pivot.
+ if (__comp(__pivot, *(__last - difference_type(1)))) {
+ // Not guarded since we know the last element is greater than the pivot.
+ while (!__comp(__pivot, *++__first)) {
+ }
+ } else {
+ while (++__first < __last && !__comp(__pivot, *__first)) {
+ }
+ }
+ // Find the last element less than or equal to the pivot.
+ if (__first < __last) {
+ // It will be always guarded because __introsort will do the median-of-three
+ // before calling this.
+ while (__comp(__pivot, *--__last)) {
+ }
+ }
+ // If the first element greater than the pivot is at or after the
+ // last element less than or equal to the pivot, then we have covered the
+ // entire range without swapping elements. This implies the range is already
+ // partitioned.
+ bool __already_partitioned = __first >= __last;
+ if (!__already_partitioned) {
+ _Ops::iter_swap(__first, __last);
+ ++__first;
+ }
+
+ // In [__first, __last) __last is not inclusive. From now on, it uses last
+ // minus one to be inclusive on both sides.
+ _RandomAccessIterator __lm1 = __last - difference_type(1);
+ uint64_t __left_bitset = 0;
+ uint64_t __right_bitset = 0;
+
+ // Reminder: length = __lm1 - __first + 1.
+ while (__lm1 - __first >= 2 * __detail::__block_size - 1) {
+ // Record the comparison outcomes for the elements currently on the left
+ // side.
+ if (__left_bitset == 0)
+ std::__populate_left_bitset<_Compare>(__first, __comp, __pivot, __left_bitset);
+ // Record the comparison outcomes for the elements currently on the right
+ // side.
+ if (__right_bitset == 0)
+ std::__populate_right_bitset<_Compare>(__lm1, __comp, __pivot, __right_bitset);
+ // Swap the elements recorded to be the candidates for swapping in the
+ // bitsets.
+ std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
+ // Only advance the iterator if all the elements that need to be moved to
+ // other side were moved.
+ __first += (__left_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
+ __lm1 -= (__right_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
+ }
+ // Now, we have a less-than a block worth of elements on at least one of the
+ // sides.
+ std::__bitset_partition_partial_blocks<_AlgPolicy, _Compare>(
+ __first, __lm1, __comp, __pivot, __left_bitset, __right_bitset);
+ // At least one the bitsets would be empty. For the non-empty one, we need to
+ // properly partition the elements that appear within that bitset.
+ std::__swap_bitmap_pos_within<_AlgPolicy>(__first, __lm1, __left_bitset, __right_bitset);
+
+ // Move the pivot to its correct position.
+ _RandomAccessIterator __pivot_pos = __first - difference_type(1);
+ if (__begin != __pivot_pos) {
+ *__begin = _Ops::__iter_move(__pivot_pos);
+ }
+ *__pivot_pos = std::move(__pivot);
+ return std::make_pair(__pivot_pos, __already_partitioned);
+}
+// Partition [__first, __last) using the comparator __comp. *__first has the
+// chosen pivot. Elements that are equivalent are kept to the right of the
+// pivot. Returns the iterator for the pivot and a bool value which is true if
+// the provided range is already sorted, false otherwise. We assume that the
+// length of the range is at least three elements.
+template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
+_LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
+__partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
+ using _Ops = _IterOps<_AlgPolicy>;
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
- typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
- const difference_type __limit =
- is_trivially_copy_constructible<value_type>::value && is_trivially_copy_assignable<value_type>::value ? 30 : 6;
+ typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
+ _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
+ _RandomAccessIterator __begin = __first;
+ value_type __pivot(_Ops::__iter_move(__first));
+ // Find the first element greater or equal to the pivot. It will be always
+ // guarded because __introsort will do the median-of-three before calling
+ // this.
+ while (__comp(*++__first, __pivot))
+ ;
+
+ // Find the last element less than the pivot.
+ if (__begin == __first - difference_type(1)) {
+ while (__first < __last && !__comp(*--__last, __pivot))
+ ;
+ } else {
+ // Guarded.
+ while (!__comp(*--__last, __pivot))
+ ;
+ }
+
+ // If the first element greater than or equal to the pivot is at or after the
+ // last element less than the pivot, then we have covered the entire range
+ // without swapping elements. This implies the range is already partitioned.
+ bool __already_partitioned = __first >= __last;
+ // Go through the remaining elements. Swap pairs of elements (one to the
+ // right of the pivot and the other to left of the pivot) that are not on the
+ // correct side of the pivot.
+ while (__first < __last) {
+ _Ops::iter_swap(__first, __last);
+ while (__comp(*++__first, __pivot))
+ ;
+ while (!__comp(*--__last, __pivot))
+ ;
+ }
+ // Move the pivot to its correct position.
+ _RandomAccessIterator __pivot_pos = __first - difference_type(1);
+ if (__begin != __pivot_pos) {
+ *__begin = _Ops::__iter_move(__pivot_pos);
+ }
+ *__pivot_pos = std::move(__pivot);
+ return std::make_pair(__pivot_pos, __already_partitioned);
+}
+
+// Similar to the above function. Elements equivalent to the pivot are put to
+// the left of the pivot. Returns the iterator to the pivot element.
+template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
+_LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
+__partition_with_equals_on_left(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
+ using _Ops = _IterOps<_AlgPolicy>;
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+ typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
+ _RandomAccessIterator __begin = __first;
+ value_type __pivot(_Ops::__iter_move(__first));
+ if (__comp(__pivot, *(__last - difference_type(1)))) {
+ // Guarded.
+ while (!__comp(__pivot, *++__first)) {
+ }
+ } else {
+ while (++__first < __last && !__comp(__pivot, *__first)) {
+ }
+ }
+
+ if (__first < __last) {
+ // It will be always guarded because __introsort will do the
+ // median-of-three before calling this.
+ while (__comp(__pivot, *--__last)) {
+ }
+ }
+ while (__first < __last) {
+ _Ops::iter_swap(__first, __last);
+ while (!__comp(__pivot, *++__first))
+ ;
+ while (__comp(__pivot, *--__last))
+ ;
+ }
+ _RandomAccessIterator __pivot_pos = __first - difference_type(1);
+ if (__begin != __pivot_pos) {
+ *__begin = _Ops::__iter_move(__pivot_pos);
+ }
+ *__pivot_pos = std::move(__pivot);
+ return __first;
+}
+
+// The main sorting function. Implements introsort combined with other ideas:
+// - option of using block quick sort for partitioning,
+// - guarded and unguarded insertion sort for small lengths,
+// - Tuckey's ninther technique for computing the pivot,
+// - check on whether partition was not required.
+// The implementation is partly based on Orson Peters' pattern-defeating
+// quicksort, published at: <https://github.com/orlp/pdqsort>.
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _UseBitSetPartition>
+void __introsort(_RandomAccessIterator __first,
+ _RandomAccessIterator __last,
+ _Compare __comp,
+ typename iterator_traits<_RandomAccessIterator>::difference_type __depth,
+ bool __leftmost = true) {
+ using _Ops = _IterOps<_AlgPolicy>;
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+ using _Comp_ref = __comp_ref_type<_Compare>;
+ // Upper bound for using insertion sort for sorting.
+ _LIBCPP_CONSTEXPR difference_type __limit = 24;
+ // Lower bound for using Tuckey's ninther technique for median computation.
+ _LIBCPP_CONSTEXPR difference_type __ninther_threshold = 128;
while (true) {
- __restart:
difference_type __len = __last - __first;
switch (__len) {
case 0:
@@ -434,7 +793,7 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C
return;
case 2:
if (__comp(*--__last, *__first))
- _IterOps<_AlgPolicy>::iter_swap(__first, __last);
+ _Ops::iter_swap(__first, __last);
return;
case 3:
std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
@@ -449,127 +808,60 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C
--__last, __comp);
return;
}
- if (__len <= __limit) {
- std::__insertion_sort_3<_AlgPolicy, _Compare>(__first, __last, __comp);
+ // Use insertion sort if the length of the range is below the specified limit.
+ if (__len < __limit) {
+ if (__leftmost) {
+ std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
+ } else {
+ std::__insertion_sort_unguarded<_AlgPolicy, _Compare>(__first, __last, __comp);
+ }
return;
}
- // __len > 5
if (__depth == 0) {
// Fallback to heap sort as Introsort suggests.
std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp);
return;
}
--__depth;
- _RandomAccessIterator __m = __first;
- _RandomAccessIterator __lm1 = __last;
- --__lm1;
- unsigned __n_swaps;
{
- difference_type __delta;
- if (__len >= 1000) {
- __delta = __len / 2;
- __m += __delta;
- __delta /= 2;
- __n_swaps = std::__sort5_wrap_policy<_AlgPolicy, _Compare>(
- __first, __first + __delta, __m, __m + __delta, __lm1, __comp);
+ difference_type __half_len = __len / 2;
+ // Use Tuckey's ninther technique or median of 3 for pivot selection
+ // depending on the length of the range being sorted.
+ if (__len > __ninther_threshold) {
+ std::__sort3<_AlgPolicy, _Compare>(__first, __first + __half_len, __last - difference_type(1), __comp);
+ std::__sort3<_AlgPolicy, _Compare>(
+ __first + difference_type(1), __first + (__half_len - 1), __last - difference_type(2), __comp);
+ std::__sort3<_AlgPolicy, _Compare>(
+ __first + difference_type(2), __first + (__half_len + 1), __last - difference_type(3), __comp);
+ std::__sort3<_AlgPolicy, _Compare>(
+ __first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), __comp);
+ _Ops::iter_swap(__first, __first + __half_len);
} else {
- __delta = __len / 2;
- __m += __delta;
- __n_swaps = std::__sort3<_AlgPolicy, _Compare>(__first, __m, __lm1, __comp);
+ std::__sort3<_AlgPolicy, _Compare>(__first + __half_len, __first, __last - difference_type(1), __comp);
}
}
- // *__m is median
- // partition [__first, __m) < *__m and *__m <= [__m, __last)
- // (this inhibits tossing elements equivalent to __m around unnecessarily)
- _RandomAccessIterator __i = __first;
- _RandomAccessIterator __j = __lm1;
- // j points beyond range to be tested, *__m is known to be <= *__lm1
- // The search going up is known to be guarded but the search coming down isn't.
- // Prime the downward search with a guard.
- if (!__comp(*__i, *__m)) // if *__first == *__m
- {
- // *__first == *__m, *__first doesn't go in first part
- // manually guard downward moving __j against __i
- while (true) {
- if (__i == --__j) {
- // *__first == *__m, *__m <= all other elements
- // Parition instead into [__first, __i) == *__first and *__first < [__i, __last)
- ++__i; // __first + 1
- __j = __last;
- if (!__comp(*__first, *--__j)) // we need a guard if *__first == *(__last-1)
- {
- while (true) {
- if (__i == __j)
- return; // [__first, __last) all equivalent elements
- if (__comp(*__first, *__i)) {
- _Ops::iter_swap(__i, __j);
- ++__n_swaps;
- ++__i;
- break;
- }
- ++__i;
- }
- }
- // [__first, __i) == *__first and *__first < [__j, __last) and __j == __last - 1
- if (__i == __j)
- return;
- while (true) {
- while (!__comp(*__first, *__i))
- ++__i;
- while (__comp(*__first, *--__j))
- ;
- if (__i >= __j)
- break;
- _Ops::iter_swap(__i, __j);
- ++__n_swaps;
- ++__i;
- }
- // [__first, __i) == *__first and *__first < [__i, __last)
- // The first part is sorted, sort the second part
- // _VSTD::__sort<_Compare>(__i, __last, __comp);
- __first = __i;
- goto __restart;
- }
- if (__comp(*__j, *__m)) {
- _Ops::iter_swap(__i, __j);
- ++__n_swaps;
- break; // found guard for downward moving __j, now use unguarded partition
- }
- }
- }
- // It is known that *__i < *__m
- ++__i;
- // j points beyond range to be tested, *__m is known to be <= *__lm1
- // if not yet partitioned...
- if (__i < __j) {
- // known that *(__i - 1) < *__m
- // known that __i <= __m
- while (true) {
- // __m still guards upward moving __i
- while (__comp(*__i, *__m))
- ++__i;
- // It is now known that a guard exists for downward moving __j
- while (!__comp(*--__j, *__m))
- ;
- if (__i > __j)
- break;
- _Ops::iter_swap(__i, __j);
- ++__n_swaps;
- // It is known that __m != __j
- // If __m just moved, follow it
- if (__m == __i)
- __m = __j;
- ++__i;
- }
- }
- // [__first, __i) < *__m and *__m <= [__i, __last)
- if (__i != __m && __comp(*__m, *__i)) {
- _Ops::iter_swap(__i, __m);
- ++__n_swaps;
+ // The elements to the left of the current iterator range are already
+ // sorted. If the current iterator range to be sorted is not the
+ // leftmost part of the entire iterator range and the pivot is same as
+ // the highest element in the range to the left, then we know that all
+ // the elements in the range [first, pivot] would be equal to the pivot,
+ // assuming the equal elements are put on the left side when
+ // partitioned. This also means that we do not need to sort the left
+ // side of the partition.
+ if (!__leftmost && !__comp(*(__first - difference_type(1)), *__first)) {
+ __first = std::__partition_with_equals_on_left<_AlgPolicy, _RandomAccessIterator, _Comp_ref>(
+ __first, __last, _Comp_ref(__comp));
+ continue;
}
+ // Use bitset partition only if asked for.
+ auto __ret =
+ _UseBitSetPartition
+ ? std::__bitset_partition<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp)
+ : std::__partition_with_equals_on_right<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp);
+ _RandomAccessIterator __i = __ret.first;
// [__first, __i) < *__i and *__i <= [__i+1, __last)
// If we were given a perfect partition, see if insertion sort is quick...
- if (__n_swaps == 0) {
+ if (__ret.second) {
using _WrappedComp = typename _WrapAlgPolicy<_AlgPolicy, _Compare>::type;
_WrappedComp __wrapped_comp(__comp);
bool __fs = std::__insertion_sort_incomplete<_WrappedComp>(__first, __i, __wrapped_comp);
@@ -585,14 +877,11 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C
}
}
}
- // sort smaller range with recursive call and larger with tail recursion elimination
- if (__i - __first < __last - __i) {
- std::__introsort<_AlgPolicy, _Compare>(__first, __i, __comp, __depth);
- __first = ++__i;
- } else {
- std::__introsort<_AlgPolicy, _Compare>(__i + difference_type(1), __last, __comp, __depth);
- __last = __i;
- }
+ // Sort the left partiton recursively and the right partition with tail recursion elimination.
+ std::__introsort<_AlgPolicy, _Compare, _RandomAccessIterator, _UseBitSetPartition>(
+ __first, __i, __comp, __depth, __leftmost);
+ __leftmost = false;
+ __first = ++__i;
}
}
@@ -616,15 +905,22 @@ inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
}
template <class _WrappedComp, class _RandomAccessIterator>
-void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _WrappedComp __wrapped_comp) {
+_LIBCPP_HIDDEN void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _WrappedComp __wrapped_comp) {
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
- difference_type __depth_limit = 2 * __log2i(__last - __first);
+ difference_type __depth_limit = 2 * std::__log2i(__last - __first);
using _Unwrap = _UnwrapAlgPolicy<_WrappedComp>;
using _AlgPolicy = typename _Unwrap::_AlgPolicy;
using _Compare = typename _Unwrap::_Comp;
_Compare __comp = _Unwrap::__get_comp(__wrapped_comp);
- std::__introsort<_AlgPolicy, _Compare>(__first, __last, __comp, __depth_limit);
+ // Only use bitset partitioning for arithmetic types. We should also check
+ // that the default comparator is in use so that we are sure that there are no
+ // branches in the comparator.
+ std::__introsort<_AlgPolicy,
+ _Compare,
+ _RandomAccessIterator,
+ __use_branchless_sort<_Compare, _RandomAccessIterator>::value>(
+ __first, __last, __comp, __depth_limit);
}
template <class _Compare, class _Tp>
@@ -672,11 +968,11 @@ extern template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<long do
extern template _LIBCPP_FUNC_VIS unsigned __sort5<__less<long double>&, long double*>(long double*, long double*, long double*, long double*, long double*, __less<long double>&);
template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
void __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
std::__debug_randomize_range<_AlgPolicy>(__first, __last);
- using _Comp_ref = typename __comp_ref_type<_Comp>::type;
+ using _Comp_ref = __comp_ref_type<_Comp>;
if (__libcpp_is_constant_evaluated()) {
std::__partial_sort<_AlgPolicy>(__first, __last, __last, __comp);
@@ -689,13 +985,13 @@ void __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _C
}
template <class _RandomAccessIterator, class _Comp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
void sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
std::__sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp);
}
template <class _RandomAccessIterator>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
void sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
std::sort(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>());
}