aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristos Margiolis <christos@FreeBSD.org>2023-12-01 00:30:10 +0000
committerChristos Margiolis <christos@FreeBSD.org>2023-12-01 00:30:10 +0000
commit3d44dce90a6946e2ef2ab30ffbf8e2930acf888b (patch)
tree106662dff29bc4bb9f82828394703061712fcbea
parentf42518ff1250fcc76a0a1eed9f411edbbb172541 (diff)
-rw-r--r--usr.bin/sort/bwstring.c144
-rw-r--r--usr.bin/sort/sort.1.in4
-rw-r--r--usr.bin/sort/tests/Makefile1
-rwxr-xr-xusr.bin/sort/tests/sort_monthsort_test.sh159
4 files changed, 263 insertions, 45 deletions
diff --git a/usr.bin/sort/bwstring.c b/usr.bin/sort/bwstring.c
index fc1b50cb78ac..b0c14e996b23 100644
--- a/usr.bin/sort/bwstring.c
+++ b/usr.bin/sort/bwstring.c
@@ -43,63 +43,114 @@
bool byte_sort;
-static wchar_t **wmonths;
-static char **cmonths;
+struct wmonth {
+ wchar_t *mon;
+ wchar_t *ab;
+ wchar_t *alt;
+};
-/* initialise months */
+struct cmonth {
+ char *mon;
+ char *ab;
+ char *alt;
+};
+
+static struct wmonth *wmonths;
+static struct cmonth *cmonths;
+
+static int
+populate_cmonth(char **field, const nl_item item, int idx)
+{
+ char *tmp, *m;
+ size_t i, len;
+
+ tmp = nl_langinfo(item);
+ if (debug_sort)
+ printf("month[%d]=%s\n", idx, tmp);
+ if (*tmp == '\0')
+ return (0);
+ m = sort_strdup(tmp);
+ len = strlen(tmp);
+ for (i = 0; i < len; i++)
+ m[i] = toupper(m[i]);
+ *field = m;
+
+ return (1);
+}
+
+static int
+populate_wmonth(wchar_t **field, const nl_item item, int idx)
+{
+ wchar_t *m;
+ char *tmp;
+ size_t i, len;
+
+ tmp = nl_langinfo(item);
+ if (debug_sort)
+ printf("month[%d]=%s\n", idx, tmp);
+ if (*tmp == '\0')
+ return (0);
+ len = strlen(tmp);
+ m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
+ if (mbstowcs(m, tmp, len) == ((size_t) - 1)) {
+ sort_free(m);
+ return (0);
+ }
+ m[len] = L'\0';
+ for (i = 0; i < len; i++)
+ m[i] = towupper(m[i]);
+ *field = m;
+
+ return (1);
+}
void
initialise_months(void)
{
- const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
+ const nl_item mon_item[12] = { MON_1, MON_2, MON_3, MON_4,
+ MON_5, MON_6, MON_7, MON_8, MON_9, MON_10,
+ MON_11, MON_12 };
+ const nl_item ab_item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
ABMON_11, ABMON_12 };
- char *tmp;
- size_t len;
-
+ const nl_item alt_item[12] = { ALTMON_1, ALTMON_2, ALTMON_3, ALTMON_4,
+ ALTMON_5, ALTMON_6, ALTMON_7, ALTMON_8, ALTMON_9, ALTMON_10,
+ ALTMON_11, ALTMON_12 };
+ int i;
+
+ /*
+ * Handle all possible month formats: abbrevation, full name,
+ * standalone name (without case ending).
+ */
if (mb_cur_max == 1) {
if (cmonths == NULL) {
- char *m;
-
- cmonths = sort_malloc(sizeof(char*) * 12);
- for (int i = 0; i < 12; i++) {
- cmonths[i] = NULL;
- tmp = nl_langinfo(item[i]);
- if (debug_sort)
- printf("month[%d]=%s\n", i, tmp);
- if (*tmp == '\0')
+ cmonths = sort_malloc(sizeof(struct cmonth) * 12);
+ for (i = 0; i < 12; i++) {
+ if (!populate_cmonth(&cmonths[i].mon,
+ mon_item[i], i))
+ continue;
+ if (!populate_cmonth(&cmonths[i].ab,
+ ab_item[i], i))
+ continue;
+ if (!populate_cmonth(&cmonths[i].alt,
+ alt_item[i], i))
continue;
- m = sort_strdup(tmp);
- len = strlen(tmp);
- for (unsigned int j = 0; j < len; j++)
- m[j] = toupper(m[j]);
- cmonths[i] = m;
}
}
} else {
if (wmonths == NULL) {
- wchar_t *m;
-
- wmonths = sort_malloc(sizeof(wchar_t *) * 12);
- for (int i = 0; i < 12; i++) {
- wmonths[i] = NULL;
- tmp = nl_langinfo(item[i]);
- if (debug_sort)
- printf("month[%d]=%s\n", i, tmp);
- if (*tmp == '\0')
+ wmonths = sort_malloc(sizeof(struct wmonth) * 12);
+ for (i = 0; i < 12; i++) {
+ if (!populate_wmonth(&wmonths[i].mon,
+ mon_item[i], i))
continue;
- len = strlen(tmp);
- m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
- if (mbstowcs(m, tmp, len) ==
- ((size_t) - 1)) {
- sort_free(m);
+ if (!populate_wmonth(&wmonths[i].ab,
+ ab_item[i], i))
+ continue;
+ if (!populate_wmonth(&wmonths[i].alt,
+ alt_item[i], i))
continue;
- }
- m[len] = L'\0';
- for (unsigned int j = 0; j < len; j++)
- m[j] = towupper(m[j]);
- wmonths[i] = m;
}
}
}
@@ -754,8 +805,11 @@ bws_month_score(const struct bwstring *s0)
++s;
for (int i = 11; i >= 0; --i) {
- if (cmonths[i] &&
- (s == strstr(s, cmonths[i])))
+ if (cmonths[i].mon && (s == strstr(s, cmonths[i].mon)))
+ return (i);
+ if (cmonths[i].ab && (s == strstr(s, cmonths[i].ab)))
+ return (i);
+ if (cmonths[i].alt && (s == strstr(s, cmonths[i].alt)))
return (i);
}
@@ -769,7 +823,11 @@ bws_month_score(const struct bwstring *s0)
++s;
for (int i = 11; i >= 0; --i) {
- if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
+ if (wmonths[i].ab && (s == wcsstr(s, wmonths[i].ab)))
+ return (i);
+ if (wmonths[i].mon && (s == wcsstr(s, wmonths[i].mon)))
+ return (i);
+ if (wmonths[i].alt && (s == wcsstr(s, wmonths[i].alt)))
return (i);
}
}
diff --git a/usr.bin/sort/sort.1.in b/usr.bin/sort/sort.1.in
index b9cd3c2b8121..80cc1dcb0282 100644
--- a/usr.bin/sort/sort.1.in
+++ b/usr.bin/sort/sort.1.in
@@ -30,7 +30,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd September 4, 2019
+.Dd November 30, 2023
.Dt SORT 1
.Os
.Sh NAME
@@ -179,7 +179,7 @@ options (human-readable).
.It Fl i , Fl Fl ignore-nonprinting
Ignore all non-printable characters.
.It Fl M , Fl Fl month-sort , Fl Fl sort=month
-Sort by month abbreviations.
+Sort by month.
Unknown strings are considered smaller than the month names.
.It Fl n , Fl Fl numeric-sort , Fl Fl sort=numeric
Sort fields numerically by arithmetic value.
diff --git a/usr.bin/sort/tests/Makefile b/usr.bin/sort/tests/Makefile
index 1982fd1cee0a..752dec06bbff 100644
--- a/usr.bin/sort/tests/Makefile
+++ b/usr.bin/sort/tests/Makefile
@@ -2,6 +2,7 @@
PACKAGE= tests
NETBSD_ATF_TESTS_SH= sort_test
+ATF_TESTS_SH= sort_monthsort_test
${PACKAGE}FILES+= d_any_char_dflag_out.txt
${PACKAGE}FILES+= d_any_char_fflag_out.txt
diff --git a/usr.bin/sort/tests/sort_monthsort_test.sh b/usr.bin/sort/tests/sort_monthsort_test.sh
new file mode 100755
index 000000000000..db42981fb107
--- /dev/null
+++ b/usr.bin/sort/tests/sort_monthsort_test.sh
@@ -0,0 +1,159 @@
+#
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2023 Christos Margiolis <christos@FreeBSD.org>
+#
+
+get_months_fmt()
+{
+ rm -f in
+ for i in $(seq 12 1); do
+ printf "2000-%02d-01\n" ${i} | xargs -I{} \
+ date -jf "%Y-%m-%d" {} "${1}" >>in
+ done
+}
+
+atf_test_case monthsort_english
+monthsort_english_head()
+{
+ atf_set "descr" "Test the -M flag with English months"
+}
+monthsort_english_body()
+{
+ export LC_TIME="en_US.UTF-8"
+
+ cat >expout <<EOF
+January
+February
+March
+April
+May
+June
+July
+August
+September
+October
+November
+December
+EOF
+
+ # No need to test the rest of the formats (%b and %OB) as %b is a
+ # substring of %B and %OB is the same as %B.
+ get_months_fmt '+%B'
+ atf_check -o file:expout sort -M in
+}
+
+atf_test_case monthsort_all_formats_greek
+monthsort_all_formats_greek_head()
+{
+ atf_set "descr" "Test the -M flag with all possible Greek month formats"
+}
+monthsort_all_formats_greek_body()
+{
+ # Test with the Greek locale, since, unlike English, the
+ # abbreviation/full-name and standalone formats are different.
+ export LC_TIME="el_GR.UTF-8"
+
+ # Abbreviation format (e.g Jan, Ιαν)
+ cat >expout <<EOF
+Ιαν
+Φεβ
+Μαρ
+Απρ
+Μαΐ
+Ιουν
+Ιουλ
+Αυγ
+Σεπ
+Οκτ
+Νοε
+Δεκ
+EOF
+ get_months_fmt '+%b'
+ atf_check -o file:expout sort -M in
+
+ # Full-name format (e.g January, Ιανουαρίου)
+ cat >expout <<EOF
+Ιανουαρίου
+Φεβρουαρίου
+Μαρτίου
+Απριλίου
+Μαΐου
+Ιουνίου
+Ιουλίου
+Αυγούστου
+Σεπτεμβρίου
+Οκτωβρίου
+Νοεμβρίου
+Δεκεμβρίου
+EOF
+ get_months_fmt '+%B'
+ atf_check -o file:expout sort -M in
+
+ # Standalone format (e.g January, Ιανουάριος)
+ cat >expout <<EOF
+Ιανουάριος
+Φεβρουάριος
+Μάρτιος
+Απρίλιος
+Μάϊος
+Ιούνιος
+Ιούλιος
+Αύγουστος
+Σεπτέμβριος
+Οκτώβριος
+Νοέμβριος
+Δεκέμβριος
+EOF
+ get_months_fmt '+%OB'
+ atf_check -o file:expout sort -M in
+}
+
+atf_test_case monthsort_mixed_formats_greek
+monthsort_mixed_formats_greek_head()
+{
+ atf_set "descr" "Test the -M flag with mixed Greek month formats"
+}
+monthsort_mixed_formats_greek_body()
+{
+ export LC_TIME="el_GR.UTF-8"
+
+ cat >in <<EOF
+Δεκέμβριος
+Νοεμβρίου
+Οκτ
+Σεπ
+Αυγ
+Ιούλιος
+Ιουνίου
+Μαΐου
+Απριλίου
+Μάρτιος
+Φεβρουάριος
+Ιανουάριος
+EOF
+
+ cat >expout <<EOF
+Ιανουάριος
+Φεβρουάριος
+Μάρτιος
+Απριλίου
+Μαΐου
+Ιουνίου
+Ιούλιος
+Αυγ
+Σεπ
+Οκτ
+Νοεμβρίου
+Δεκέμβριος
+EOF
+
+ atf_check -o file:expout sort -M in
+}
+
+atf_init_test_cases()
+{
+ atf_add_test_case monthsort_english
+ atf_add_test_case monthsort_all_formats_greek
+ atf_add_test_case monthsort_mixed_formats_greek
+}