aboutsummaryrefslogtreecommitdiff
path: root/usr.bin/sort
diff options
context:
space:
mode:
authorConrad Meyer <cem@FreeBSD.org>2020-06-23 16:43:48 +0000
committerConrad Meyer <cem@FreeBSD.org>2020-06-23 16:43:48 +0000
commit9f7e5bdad1f350e68879e92c23601f0e3c2b3a3b (patch)
tree0ac834f481eb1aabea239caa18f62a4e5463c34e /usr.bin/sort
parent5f018c9147685536eb56456536a9ddaa993a308f (diff)
downloadsrc-9f7e5bdad1f350e68879e92c23601f0e3c2b3a3b.tar.gz
src-9f7e5bdad1f350e68879e92c23601f0e3c2b3a3b.zip
Notes
Diffstat (limited to 'usr.bin/sort')
-rw-r--r--usr.bin/sort/radixsort.c37
1 files changed, 33 insertions, 4 deletions
diff --git a/usr.bin/sort/radixsort.c b/usr.bin/sort/radixsort.c
index 8c43c61faf8c..4993566aeb77 100644
--- a/usr.bin/sort/radixsort.c
+++ b/usr.bin/sort/radixsort.c
@@ -258,14 +258,28 @@ add_leaf(struct sort_level *sl, struct sort_list_item *item)
static inline int
get_wc_index(struct sort_list_item *sli, size_t level)
{
+ const size_t wcfact = (MB_CUR_MAX == 1) ? 1 : sizeof(wchar_t);
const struct key_value *kv;
const struct bwstring *bws;
kv = get_key_from_keys_array(&sli->ka, 0);
bws = kv->k;
- if ((BWSLEN(bws) > level))
- return (unsigned char) BWS_GET(bws,level);
+ if ((BWSLEN(bws) * wcfact > level)) {
+ wchar_t res;
+
+ /*
+ * Sort wchar strings a byte at a time, rather than a single
+ * byte from each wchar.
+ */
+ res = (wchar_t)BWS_GET(bws, level / wcfact);
+ /* Sort most-significant byte first. */
+ if (level % wcfact < wcfact - 1)
+ res = (res >> (8 * (wcfact - 1 - (level % wcfact))));
+
+ return (res & 0xff);
+ }
+
return (-1);
}
@@ -317,6 +331,7 @@ free_sort_level(struct sort_level *sl)
static void
run_sort_level_next(struct sort_level *sl)
{
+ const size_t wcfact = (MB_CUR_MAX == 1) ? 1 : sizeof(wchar_t);
struct sort_level *slc;
size_t i, sln, tosort_num;
@@ -333,8 +348,16 @@ run_sort_level_next(struct sort_level *sl)
sort_left_dec(1);
goto end;
case (2):
+ /*
+ * Radixsort only processes a single byte at a time. In wchar
+ * mode, this can be a subset of the length of a character.
+ * list_coll_offset() offset is in units of wchar, not bytes.
+ * So to calculate the offset, we must divide by
+ * sizeof(wchar_t) and round down to the index of the first
+ * character this level references.
+ */
if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]),
- sl->level) > 0) {
+ sl->level / wcfact) > 0) {
sl->sorted[sl->start_position++] = sl->tosort[1];
sl->sorted[sl->start_position] = sl->tosort[0];
} else {
@@ -348,7 +371,13 @@ run_sort_level_next(struct sort_level *sl)
if (TINY_NODE(sl) || (sl->level > 15)) {
listcoll_t func;
- func = get_list_call_func(sl->level);
+ /*
+ * Collate comparison offset is in units of
+ * character-width, so we must divide the level (bytes)
+ * by operating character width (wchar_t or char). See
+ * longer comment above.
+ */
+ func = get_list_call_func(sl->level / wcfact);
sl->leaves = sl->tosort;
sl->leaves_num = sl->tosort_num;