mirror of
https://github.com/opnsense/src.git
synced 2026-06-11 09:41:03 -04:00
libc: Drop incorrect qsort optimization
As pointed out in the PR and the article linked below, the switch to insertion sort in the BSD qsort code is based on a misunderstanding of Knuth's TAOCP and is actually a pessimization. As demonstrated by the added test, it is trivially easy to construct pathological input which results in quadratic runtime. Without that misguided optimization, the same input runs in nearly linearithmic time. https://www.raygard.net/2022/02/26/Re-engineering-a-qsort-part-3 PR: 287089 MFC after: 1 week Reviewed by: imp Differential Revision: https://reviews.freebsd.org/D51907
This commit is contained in:
parent
c992ac6213
commit
5205b32de3
3 changed files with 114 additions and 13 deletions
|
|
@ -106,13 +106,11 @@ local_qsort(void *a, size_t n, size_t es, cmp_t *cmp, void *thunk)
|
|||
char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
|
||||
size_t d1, d2;
|
||||
int cmp_result;
|
||||
int swap_cnt;
|
||||
|
||||
/* if there are less than 2 elements, then sorting is not needed */
|
||||
if (__predict_false(n < 2))
|
||||
return;
|
||||
loop:
|
||||
swap_cnt = 0;
|
||||
if (n < 7) {
|
||||
for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
|
||||
for (pl = pm;
|
||||
|
|
@ -141,7 +139,6 @@ loop:
|
|||
for (;;) {
|
||||
while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
|
||||
if (cmp_result == 0) {
|
||||
swap_cnt = 1;
|
||||
swapfunc(pa, pb, es);
|
||||
pa += es;
|
||||
}
|
||||
|
|
@ -149,7 +146,6 @@ loop:
|
|||
}
|
||||
while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
|
||||
if (cmp_result == 0) {
|
||||
swap_cnt = 1;
|
||||
swapfunc(pc, pd, es);
|
||||
pd -= es;
|
||||
}
|
||||
|
|
@ -158,18 +154,9 @@ loop:
|
|||
if (pb > pc)
|
||||
break;
|
||||
swapfunc(pb, pc, es);
|
||||
swap_cnt = 1;
|
||||
pb += es;
|
||||
pc -= es;
|
||||
}
|
||||
if (swap_cnt == 0) { /* Switch to insertion sort */
|
||||
for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
|
||||
for (pl = pm;
|
||||
pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
|
||||
pl -= es)
|
||||
swapfunc(pl, pl - es, es);
|
||||
return;
|
||||
}
|
||||
|
||||
pn = (char *)a + n * es;
|
||||
d1 = MIN(pa - (char *)a, pb - pa);
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ ATF_TESTS_C+= qsort_b_test
|
|||
ATF_TESTS_C+= qsort_r_compat_test
|
||||
ATF_TESTS_C+= qsort_r_test
|
||||
ATF_TESTS_C+= qsort_s_test
|
||||
ATF_TESTS_C+= qsort_bench
|
||||
ATF_TESTS_C+= set_constraint_handler_s_test
|
||||
ATF_TESTS_C+= strfmon_test
|
||||
ATF_TESTS_C+= tsearch_test
|
||||
|
|
|
|||
113
lib/libc/tests/stdlib/qsort_bench.c
Normal file
113
lib/libc/tests/stdlib/qsort_bench.c
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
/*-
|
||||
* Copyright (c) 2025 Dag-Erling Smørgrav <des@FreeBSD.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <atf-c.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/*-
|
||||
* Measures qsort(3) runtime with pathological input and verify that it
|
||||
* stays close to N * log2(N).
|
||||
*
|
||||
* Thanks to Vivian Hussey for the proof of concept.
|
||||
*
|
||||
* The input we construct is similar to a sweep from 0 to N where each
|
||||
* half, except for the first element, has been reversed; for instance,
|
||||
* with N = 8, we get { 0, 3, 2, 1, 4, 8, 7, 6 }. This triggers a bug in
|
||||
* the BSD qsort(3) where it will switch to insertion sort if the pivots
|
||||
* are sorted.
|
||||
*
|
||||
* This article goes into more detail about the bug and its origin:
|
||||
*
|
||||
* https://www.raygard.net/2022/02/26/Re-engineering-a-qsort-part-3
|
||||
*
|
||||
* With this optimization (the `if (swap_cnt == 0)` block), qsort(3) needs
|
||||
* roughly N * N / 4 comparisons to sort our pathological input. Without
|
||||
* it, it needs only a little more than N * log2(N) comparisons.
|
||||
*/
|
||||
|
||||
/* we stop testing once a single takes longer than this */
|
||||
#define MAXRUNSECS 10
|
||||
|
||||
static bool debugging;
|
||||
|
||||
static uintmax_t ncmp;
|
||||
|
||||
static int
|
||||
intcmp(const void *a, const void *b)
|
||||
{
|
||||
ncmp++;
|
||||
return ((*(int *)a > *(int *)b) - (*(int *)a < *(int *)b));
|
||||
}
|
||||
|
||||
static void
|
||||
qsort_bench(int log2n)
|
||||
{
|
||||
uintmax_t n = 1LLU << log2n;
|
||||
int *buf;
|
||||
|
||||
/* fill an array with a pathological pattern */
|
||||
ATF_REQUIRE(buf = malloc(n * sizeof(*buf)));
|
||||
buf[0] = 0;
|
||||
buf[n / 2] = n / 2;
|
||||
for (unsigned int i = 1; i < n / 2; i++) {
|
||||
buf[i] = n / 2 - i;
|
||||
buf[n / 2 + i] = n - i;
|
||||
}
|
||||
|
||||
ncmp = 0;
|
||||
qsort(buf, n, sizeof(*buf), intcmp);
|
||||
|
||||
/* check result and free array */
|
||||
if (debugging) {
|
||||
for (unsigned int i = 1; i < n; i++) {
|
||||
ATF_REQUIRE_MSG(buf[i] > buf[i - 1],
|
||||
"array is not sorted");
|
||||
}
|
||||
}
|
||||
free(buf);
|
||||
|
||||
/* check that runtime does not exceed N² */
|
||||
ATF_CHECK_MSG(ncmp / n < n,
|
||||
"runtime %ju exceeds N² for N = %ju", ncmp, n);
|
||||
|
||||
/* check that runtime does not exceed N log N by much */
|
||||
ATF_CHECK_MSG(ncmp / n <= log2n + 1,
|
||||
"runtime %ju exceeds N log N for N = %ju", ncmp, n);
|
||||
}
|
||||
|
||||
ATF_TC_WITHOUT_HEAD(qsort_bench);
|
||||
ATF_TC_BODY(qsort_bench, tc)
|
||||
{
|
||||
struct timespec t0, t1;
|
||||
uintmax_t tus;
|
||||
|
||||
for (int i = 10; i <= 30; i++) {
|
||||
clock_gettime(CLOCK_UPTIME, &t0);
|
||||
qsort_bench(i);
|
||||
clock_gettime(CLOCK_UPTIME, &t1);
|
||||
tus = t1.tv_sec * 1000000 + t1.tv_nsec / 1000;
|
||||
tus -= t0.tv_sec * 1000000 + t0.tv_nsec / 1000;
|
||||
if (debugging) {
|
||||
fprintf(stderr, "N = 2^%d in %ju.%06jus\n",
|
||||
i, tus / 1000000, tus % 1000000);
|
||||
}
|
||||
/* stop once an individual run exceeds our limit */
|
||||
if (tus / 1000000 >= MAXRUNSECS)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ATF_TP_ADD_TCS(tp)
|
||||
{
|
||||
debugging = !getenv("__RUNNING_INSIDE_ATF_RUN") &&
|
||||
isatty(STDERR_FILENO);
|
||||
ATF_TP_ADD_TC(tp, qsort_bench);
|
||||
return (atf_no_error());
|
||||
}
|
||||
Loading…
Reference in a new issue