summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/libc/regex/engine.c46
-rw-r--r--lib/libc/tests/regex/Makefile3
-rwxr-xr-xlib/libc/tests/regex/multibyte.sh35
3 files changed, 82 insertions, 2 deletions
diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c
index a17629d8cb782..a25bfa08ede77 100644
--- a/lib/libc/regex/engine.c
+++ b/lib/libc/regex/engine.c
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
*/
#ifdef SNAMES
+#define stepback sstepback
#define matcher smatcher
#define walk swalk
#define dissect sdissect
@@ -58,6 +59,7 @@ __FBSDID("$FreeBSD$");
#define match smat
#endif
#ifdef LNAMES
+#define stepback lstepback
#define matcher lmatcher
#define walk lwalk
#define dissect ldissect
@@ -68,6 +70,7 @@ __FBSDID("$FreeBSD$");
#define match lmat
#endif
#ifdef MNAMES
+#define stepback mstepback
#define matcher mmatcher
#define walk mwalk
#define dissect mdissect
@@ -142,6 +145,39 @@ static const char *pchar(int ch);
#endif
/*
+ * Given a multibyte string pointed to by start, step back nchar characters
+ * from current position pointed to by cur.
+ */
+static const char *
+stepback(const char *start, const char *cur, int nchar)
+{
+ const char *ret;
+ int wc, mbc;
+ mbstate_t mbs;
+ size_t clen;
+
+ if (MB_CUR_MAX == 1)
+ return ((cur - nchar) > start ? cur - nchar : NULL);
+
+ ret = cur;
+ for (wc = nchar; wc > 0; wc--) {
+ for (mbc = 1; mbc <= MB_CUR_MAX; mbc++) {
+ if ((ret - mbc) < start)
+ return (NULL);
+ memset(&mbs, 0, sizeof(mbs));
+ clen = mbrtowc(NULL, ret - mbc, mbc, &mbs);
+ if (clen != (size_t)-1 && clen != (size_t)-2)
+ break;
+ }
+ if (mbc > MB_CUR_MAX)
+ return (NULL);
+ ret -= mbc;
+ }
+
+ return (ret);
+}
+
+/*
- matcher - the actual matching engine
== static int matcher(struct re_guts *g, const char *string, \
== size_t nmatch, regmatch_t pmatch[], int eflags);
@@ -244,8 +280,13 @@ matcher(struct re_guts *g,
ZAPSTATE(&m->mbs);
/* Adjust start according to moffset, to speed things up */
- if (dp != NULL && g->moffset > -1)
- start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
+ if (dp != NULL && g->moffset > -1) {
+ const char *nstart;
+
+ nstart = stepback(start, dp, g->moffset);
+ if (nstart != NULL)
+ start = nstart;
+ }
SP("mloop", m->st, *start);
@@ -1083,6 +1124,7 @@ pchar(int ch)
#endif
#endif
+#undef stepback
#undef matcher
#undef walk
#undef dissect
diff --git a/lib/libc/tests/regex/Makefile b/lib/libc/tests/regex/Makefile
index 9a940a0239eef..8c1c5d06961f7 100644
--- a/lib/libc/tests/regex/Makefile
+++ b/lib/libc/tests/regex/Makefile
@@ -2,6 +2,9 @@
PACKAGE= tests
+# local test cases
+ATF_TESTS_SH+= multibyte
+
.include "Makefile.inc"
.include "${.CURDIR:H}/Makefile.netbsd-tests"
.include <bsd.test.mk>
diff --git a/lib/libc/tests/regex/multibyte.sh b/lib/libc/tests/regex/multibyte.sh
new file mode 100755
index 0000000000000..24d3abc0ca8e5
--- /dev/null
+++ b/lib/libc/tests/regex/multibyte.sh
@@ -0,0 +1,35 @@
+# $FreeBSD$
+
+atf_test_case multibyte
+multibyte_head()
+{
+ atf_set "descr" "Check matching multibyte characters (PR153502)"
+}
+multibyte_body()
+{
+ export LC_CTYPE="C.UTF-8"
+
+ printf 'é' | atf_check -o "inline:é" \
+ sed -ne '/^.$/p'
+ printf 'éé' | atf_check -o "inline:éé" \
+ sed -ne '/^..$/p'
+ printf 'aéa' | atf_check -o "inline:aéa" \
+ sed -ne '/a.a/p'
+ printf 'aéa'| atf_check -o "inline:aéa" \
+ sed -ne '/a.*a/p'
+ printf 'aaéaa' | atf_check -o "inline:aaéaa" \
+ sed -ne '/aa.aa/p'
+ printf 'aéaéa' | atf_check -o "inline:aéaéa" \
+ sed -ne '/a.a.a/p'
+ printf 'éa' | atf_check -o "inline:éa" \
+ sed -ne '/.a/p'
+ printf 'aéaa' | atf_check -o "inline:aéaa" \
+ sed -ne '/a.aa/p'
+ printf 'éaé' | atf_check -o "inline:éaé" \
+ sed -ne '/.a./p'
+}
+
+atf_init_test_cases()
+{
+ atf_add_test_case multibyte
+}