aboutsummaryrefslogtreecommitdiff
path: root/lib/libc
diff options
context:
space:
mode:
authorDag-Erling Smørgrav <des@FreeBSD.org>2025-08-15 07:22:33 +0000
committerDag-Erling Smørgrav <des@FreeBSD.org>2025-08-15 07:23:03 +0000
commit5205b32de3fb7702e96b3991f5b1a61eee406d8b (patch)
tree4dd90017ebfd894f266a16e189750468a1c95474 /lib/libc
parentc992ac6213276f54d868f317cc5092f8aed4ff54 (diff)
Diffstat (limited to 'lib/libc')
-rw-r--r--lib/libc/stdlib/qsort.c13
-rw-r--r--lib/libc/tests/stdlib/Makefile1
-rw-r--r--lib/libc/tests/stdlib/qsort_bench.c113
3 files changed, 114 insertions, 13 deletions
diff --git a/lib/libc/stdlib/qsort.c b/lib/libc/stdlib/qsort.c
index e0b06494cf98..400124593d07 100644
--- a/lib/libc/stdlib/qsort.c
+++ b/lib/libc/stdlib/qsort.c
@@ -106,13 +106,11 @@ local_qsort(void *a, size_t n, size_t es, cmp_t *cmp, void *thunk)
char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
size_t d1, d2;
int cmp_result;
- int swap_cnt;
/* if there are less than 2 elements, then sorting is not needed */
if (__predict_false(n < 2))
return;
loop:
- swap_cnt = 0;
if (n < 7) {
for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
for (pl = pm;
@@ -141,7 +139,6 @@ loop:
for (;;) {
while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
if (cmp_result == 0) {
- swap_cnt = 1;
swapfunc(pa, pb, es);
pa += es;
}
@@ -149,7 +146,6 @@ loop:
}
while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
if (cmp_result == 0) {
- swap_cnt = 1;
swapfunc(pc, pd, es);
pd -= es;
}
@@ -158,18 +154,9 @@ loop:
if (pb > pc)
break;
swapfunc(pb, pc, es);
- swap_cnt = 1;
pb += es;
pc -= es;
}
- if (swap_cnt == 0) { /* Switch to insertion sort */
- for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
- for (pl = pm;
- pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
- pl -= es)
- swapfunc(pl, pl - es, es);
- return;
- }
pn = (char *)a + n * es;
d1 = MIN(pa - (char *)a, pb - pa);
diff --git a/lib/libc/tests/stdlib/Makefile b/lib/libc/tests/stdlib/Makefile
index 50726a5d8af6..9d84becfbd1f 100644
--- a/lib/libc/tests/stdlib/Makefile
+++ b/lib/libc/tests/stdlib/Makefile
@@ -14,6 +14,7 @@ ATF_TESTS_C+= qsort_b_test
ATF_TESTS_C+= qsort_r_compat_test
ATF_TESTS_C+= qsort_r_test
ATF_TESTS_C+= qsort_s_test
+ATF_TESTS_C+= qsort_bench
ATF_TESTS_C+= set_constraint_handler_s_test
ATF_TESTS_C+= strfmon_test
ATF_TESTS_C+= tsearch_test
diff --git a/lib/libc/tests/stdlib/qsort_bench.c b/lib/libc/tests/stdlib/qsort_bench.c
new file mode 100644
index 000000000000..5f2cfae40140
--- /dev/null
+++ b/lib/libc/tests/stdlib/qsort_bench.c
@@ -0,0 +1,113 @@
+/*-
+ * Copyright (c) 2025 Dag-Erling Smørgrav <des@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <atf-c.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+/*-
+ * Measures qsort(3) runtime with pathological input and verify that it
+ * stays close to N * log2(N).
+ *
+ * Thanks to Vivian Hussey for the proof of concept.
+ *
+ * The input we construct is similar to a sweep from 0 to N where each
+ * half, except for the first element, has been reversed; for instance,
+ * with N = 8, we get { 0, 3, 2, 1, 4, 8, 7, 6 }. This triggers a bug in
+ * the BSD qsort(3) where it will switch to insertion sort if the pivots
+ * are sorted.
+ *
+ * This article goes into more detail about the bug and its origin:
+ *
+ * https://www.raygard.net/2022/02/26/Re-engineering-a-qsort-part-3
+ *
+ * With this optimization (the `if (swap_cnt == 0)` block), qsort(3) needs
+ * roughly N * N / 4 comparisons to sort our pathological input. Without
+ * it, it needs only a little more than N * log2(N) comparisons.
+ */
+
+/* we stop testing once a single takes longer than this */
+#define MAXRUNSECS 10
+
+static bool debugging;
+
+static uintmax_t ncmp;
+
+static int
+intcmp(const void *a, const void *b)
+{
+ ncmp++;
+ return ((*(int *)a > *(int *)b) - (*(int *)a < *(int *)b));
+}
+
+static void
+qsort_bench(int log2n)
+{
+ uintmax_t n = 1LLU << log2n;
+ int *buf;
+
+ /* fill an array with a pathological pattern */
+ ATF_REQUIRE(buf = malloc(n * sizeof(*buf)));
+ buf[0] = 0;
+ buf[n / 2] = n / 2;
+ for (unsigned int i = 1; i < n / 2; i++) {
+ buf[i] = n / 2 - i;
+ buf[n / 2 + i] = n - i;
+ }
+
+ ncmp = 0;
+ qsort(buf, n, sizeof(*buf), intcmp);
+
+ /* check result and free array */
+ if (debugging) {
+ for (unsigned int i = 1; i < n; i++) {
+ ATF_REQUIRE_MSG(buf[i] > buf[i - 1],
+ "array is not sorted");
+ }
+ }
+ free(buf);
+
+ /* check that runtime does not exceed N² */
+ ATF_CHECK_MSG(ncmp / n < n,
+ "runtime %ju exceeds N² for N = %ju", ncmp, n);
+
+ /* check that runtime does not exceed N log N by much */
+ ATF_CHECK_MSG(ncmp / n <= log2n + 1,
+ "runtime %ju exceeds N log N for N = %ju", ncmp, n);
+}
+
+ATF_TC_WITHOUT_HEAD(qsort_bench);
+ATF_TC_BODY(qsort_bench, tc)
+{
+ struct timespec t0, t1;
+ uintmax_t tus;
+
+ for (int i = 10; i <= 30; i++) {
+ clock_gettime(CLOCK_UPTIME, &t0);
+ qsort_bench(i);
+ clock_gettime(CLOCK_UPTIME, &t1);
+ tus = t1.tv_sec * 1000000 + t1.tv_nsec / 1000;
+ tus -= t0.tv_sec * 1000000 + t0.tv_nsec / 1000;
+ if (debugging) {
+ fprintf(stderr, "N = 2^%d in %ju.%06jus\n",
+ i, tus / 1000000, tus % 1000000);
+ }
+ /* stop once an individual run exceeds our limit */
+ if (tus / 1000000 >= MAXRUNSECS)
+ break;
+ }
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+ debugging = !getenv("__RUNNING_INSIDE_ATF_RUN") &&
+ isatty(STDERR_FILENO);
+ ATF_TP_ADD_TC(tp, qsort_bench);
+ return (atf_no_error());
+}