diff options
| author | Christos Margiolis <christos@FreeBSD.org> | 2023-12-01 00:30:10 +0000 |
|---|---|---|
| committer | Christos Margiolis <christos@FreeBSD.org> | 2023-12-01 00:30:10 +0000 |
| commit | 3d44dce90a6946e2ef2ab30ffbf8e2930acf888b (patch) | |
| tree | 106662dff29bc4bb9f82828394703061712fcbea | |
| parent | f42518ff1250fcc76a0a1eed9f411edbbb172541 (diff) | |
| -rw-r--r-- | usr.bin/sort/bwstring.c | 144 | ||||
| -rw-r--r-- | usr.bin/sort/sort.1.in | 4 | ||||
| -rw-r--r-- | usr.bin/sort/tests/Makefile | 1 | ||||
| -rwxr-xr-x | usr.bin/sort/tests/sort_monthsort_test.sh | 159 |
4 files changed, 263 insertions, 45 deletions
diff --git a/usr.bin/sort/bwstring.c b/usr.bin/sort/bwstring.c index fc1b50cb78ac..b0c14e996b23 100644 --- a/usr.bin/sort/bwstring.c +++ b/usr.bin/sort/bwstring.c @@ -43,63 +43,114 @@ bool byte_sort; -static wchar_t **wmonths; -static char **cmonths; +struct wmonth { + wchar_t *mon; + wchar_t *ab; + wchar_t *alt; +}; -/* initialise months */ +struct cmonth { + char *mon; + char *ab; + char *alt; +}; + +static struct wmonth *wmonths; +static struct cmonth *cmonths; + +static int +populate_cmonth(char **field, const nl_item item, int idx) +{ + char *tmp, *m; + size_t i, len; + + tmp = nl_langinfo(item); + if (debug_sort) + printf("month[%d]=%s\n", idx, tmp); + if (*tmp == '\0') + return (0); + m = sort_strdup(tmp); + len = strlen(tmp); + for (i = 0; i < len; i++) + m[i] = toupper(m[i]); + *field = m; + + return (1); +} + +static int +populate_wmonth(wchar_t **field, const nl_item item, int idx) +{ + wchar_t *m; + char *tmp; + size_t i, len; + + tmp = nl_langinfo(item); + if (debug_sort) + printf("month[%d]=%s\n", idx, tmp); + if (*tmp == '\0') + return (0); + len = strlen(tmp); + m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); + if (mbstowcs(m, tmp, len) == ((size_t) - 1)) { + sort_free(m); + return (0); + } + m[len] = L'\0'; + for (i = 0; i < len; i++) + m[i] = towupper(m[i]); + *field = m; + + return (1); +} void initialise_months(void) { - const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, + const nl_item mon_item[12] = { MON_1, MON_2, MON_3, MON_4, + MON_5, MON_6, MON_7, MON_8, MON_9, MON_10, + MON_11, MON_12 }; + const nl_item ab_item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, ABMON_11, ABMON_12 }; - char *tmp; - size_t len; - + const nl_item alt_item[12] = { ALTMON_1, ALTMON_2, ALTMON_3, ALTMON_4, + ALTMON_5, ALTMON_6, ALTMON_7, ALTMON_8, ALTMON_9, ALTMON_10, + ALTMON_11, ALTMON_12 }; + int i; + + /* + * Handle all possible month formats: abbrevation, full name, + * standalone name (without case ending). + */ if (mb_cur_max == 1) { if (cmonths == NULL) { - char *m; - - cmonths = sort_malloc(sizeof(char*) * 12); - for (int i = 0; i < 12; i++) { - cmonths[i] = NULL; - tmp = nl_langinfo(item[i]); - if (debug_sort) - printf("month[%d]=%s\n", i, tmp); - if (*tmp == '\0') + cmonths = sort_malloc(sizeof(struct cmonth) * 12); + for (i = 0; i < 12; i++) { + if (!populate_cmonth(&cmonths[i].mon, + mon_item[i], i)) + continue; + if (!populate_cmonth(&cmonths[i].ab, + ab_item[i], i)) + continue; + if (!populate_cmonth(&cmonths[i].alt, + alt_item[i], i)) continue; - m = sort_strdup(tmp); - len = strlen(tmp); - for (unsigned int j = 0; j < len; j++) - m[j] = toupper(m[j]); - cmonths[i] = m; } } } else { if (wmonths == NULL) { - wchar_t *m; - - wmonths = sort_malloc(sizeof(wchar_t *) * 12); - for (int i = 0; i < 12; i++) { - wmonths[i] = NULL; - tmp = nl_langinfo(item[i]); - if (debug_sort) - printf("month[%d]=%s\n", i, tmp); - if (*tmp == '\0') + wmonths = sort_malloc(sizeof(struct wmonth) * 12); + for (i = 0; i < 12; i++) { + if (!populate_wmonth(&wmonths[i].mon, + mon_item[i], i)) continue; - len = strlen(tmp); - m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); - if (mbstowcs(m, tmp, len) == - ((size_t) - 1)) { - sort_free(m); + if (!populate_wmonth(&wmonths[i].ab, + ab_item[i], i)) + continue; + if (!populate_wmonth(&wmonths[i].alt, + alt_item[i], i)) continue; - } - m[len] = L'\0'; - for (unsigned int j = 0; j < len; j++) - m[j] = towupper(m[j]); - wmonths[i] = m; } } } @@ -754,8 +805,11 @@ bws_month_score(const struct bwstring *s0) ++s; for (int i = 11; i >= 0; --i) { - if (cmonths[i] && - (s == strstr(s, cmonths[i]))) + if (cmonths[i].mon && (s == strstr(s, cmonths[i].mon))) + return (i); + if (cmonths[i].ab && (s == strstr(s, cmonths[i].ab))) + return (i); + if (cmonths[i].alt && (s == strstr(s, cmonths[i].alt))) return (i); } @@ -769,7 +823,11 @@ bws_month_score(const struct bwstring *s0) ++s; for (int i = 11; i >= 0; --i) { - if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) + if (wmonths[i].ab && (s == wcsstr(s, wmonths[i].ab))) + return (i); + if (wmonths[i].mon && (s == wcsstr(s, wmonths[i].mon))) + return (i); + if (wmonths[i].alt && (s == wcsstr(s, wmonths[i].alt))) return (i); } } diff --git a/usr.bin/sort/sort.1.in b/usr.bin/sort/sort.1.in index b9cd3c2b8121..80cc1dcb0282 100644 --- a/usr.bin/sort/sort.1.in +++ b/usr.bin/sort/sort.1.in @@ -30,7 +30,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd September 4, 2019 +.Dd November 30, 2023 .Dt SORT 1 .Os .Sh NAME @@ -179,7 +179,7 @@ options (human-readable). .It Fl i , Fl Fl ignore-nonprinting Ignore all non-printable characters. .It Fl M , Fl Fl month-sort , Fl Fl sort=month -Sort by month abbreviations. +Sort by month. Unknown strings are considered smaller than the month names. .It Fl n , Fl Fl numeric-sort , Fl Fl sort=numeric Sort fields numerically by arithmetic value. diff --git a/usr.bin/sort/tests/Makefile b/usr.bin/sort/tests/Makefile index 1982fd1cee0a..752dec06bbff 100644 --- a/usr.bin/sort/tests/Makefile +++ b/usr.bin/sort/tests/Makefile @@ -2,6 +2,7 @@ PACKAGE= tests NETBSD_ATF_TESTS_SH= sort_test +ATF_TESTS_SH= sort_monthsort_test ${PACKAGE}FILES+= d_any_char_dflag_out.txt ${PACKAGE}FILES+= d_any_char_fflag_out.txt diff --git a/usr.bin/sort/tests/sort_monthsort_test.sh b/usr.bin/sort/tests/sort_monthsort_test.sh new file mode 100755 index 000000000000..db42981fb107 --- /dev/null +++ b/usr.bin/sort/tests/sort_monthsort_test.sh @@ -0,0 +1,159 @@ +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2023 Christos Margiolis <christos@FreeBSD.org> +# + +get_months_fmt() +{ + rm -f in + for i in $(seq 12 1); do + printf "2000-%02d-01\n" ${i} | xargs -I{} \ + date -jf "%Y-%m-%d" {} "${1}" >>in + done +} + +atf_test_case monthsort_english +monthsort_english_head() +{ + atf_set "descr" "Test the -M flag with English months" +} +monthsort_english_body() +{ + export LC_TIME="en_US.UTF-8" + + cat >expout <<EOF +January +February +March +April +May +June +July +August +September +October +November +December +EOF + + # No need to test the rest of the formats (%b and %OB) as %b is a + # substring of %B and %OB is the same as %B. + get_months_fmt '+%B' + atf_check -o file:expout sort -M in +} + +atf_test_case monthsort_all_formats_greek +monthsort_all_formats_greek_head() +{ + atf_set "descr" "Test the -M flag with all possible Greek month formats" +} +monthsort_all_formats_greek_body() +{ + # Test with the Greek locale, since, unlike English, the + # abbreviation/full-name and standalone formats are different. + export LC_TIME="el_GR.UTF-8" + + # Abbreviation format (e.g Jan, Ιαν) + cat >expout <<EOF +Ιαν +Φεβ +Μαρ +Απρ +Μαΐ +Ιουν +Ιουλ +Αυγ +Σεπ +Οκτ +Νοε +Δεκ +EOF + get_months_fmt '+%b' + atf_check -o file:expout sort -M in + + # Full-name format (e.g January, Ιανουαρίου) + cat >expout <<EOF +Ιανουαρίου +Φεβρουαρίου +Μαρτίου +Απριλίου +Μαΐου +Ιουνίου +Ιουλίου +Αυγούστου +Σεπτεμβρίου +Οκτωβρίου +Νοεμβρίου +Δεκεμβρίου +EOF + get_months_fmt '+%B' + atf_check -o file:expout sort -M in + + # Standalone format (e.g January, Ιανουάριος) + cat >expout <<EOF +Ιανουάριος +Φεβρουάριος +Μάρτιος +Απρίλιος +Μάϊος +Ιούνιος +Ιούλιος +Αύγουστος +Σεπτέμβριος +Οκτώβριος +Νοέμβριος +Δεκέμβριος +EOF + get_months_fmt '+%OB' + atf_check -o file:expout sort -M in +} + +atf_test_case monthsort_mixed_formats_greek +monthsort_mixed_formats_greek_head() +{ + atf_set "descr" "Test the -M flag with mixed Greek month formats" +} +monthsort_mixed_formats_greek_body() +{ + export LC_TIME="el_GR.UTF-8" + + cat >in <<EOF +Δεκέμβριος +Νοεμβρίου +Οκτ +Σεπ +Αυγ +Ιούλιος +Ιουνίου +Μαΐου +Απριλίου +Μάρτιος +Φεβρουάριος +Ιανουάριος +EOF + + cat >expout <<EOF +Ιανουάριος +Φεβρουάριος +Μάρτιος +Απριλίου +Μαΐου +Ιουνίου +Ιούλιος +Αυγ +Σεπ +Οκτ +Νοεμβρίου +Δεκέμβριος +EOF + + atf_check -o file:expout sort -M in +} + +atf_init_test_cases() +{ + atf_add_test_case monthsort_english + atf_add_test_case monthsort_all_formats_greek + atf_add_test_case monthsort_mixed_formats_greek +} |
