aboutsummaryrefslogtreecommitdiff
path: root/lib/libc
diff options
context:
space:
mode:
authorYuri Pankov <yuripv@FreeBSD.org>2018-12-12 04:23:00 +0000
committerYuri Pankov <yuripv@FreeBSD.org>2018-12-12 04:23:00 +0000
commit547bc083d614f3639f5632d9e39d79e828519318 (patch)
treec56e6d9adcf5c64c8748348d13fd59f0566b4511 /lib/libc
parent7bdc329113b8ae853baea30dcf62f6ee3a897ccd (diff)
Notes
Diffstat (limited to 'lib/libc')
-rw-r--r--lib/libc/regex/regcomp.c14
-rw-r--r--lib/libc/regex/regex2.h11
-rw-r--r--lib/libc/regex/utils.h4
-rwxr-xr-xlib/libc/tests/regex/multibyte.sh26
4 files changed, 44 insertions, 11 deletions
diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c
index 586621c5a745..d815382cb301 100644
--- a/lib/libc/regex/regcomp.c
+++ b/lib/libc/regex/regcomp.c
@@ -1841,21 +1841,29 @@ computejumps(struct parse *p, struct re_guts *g)
{
int ch;
int mindex;
+ int cmin, cmax;
+
+ /*
+ * For UTF-8 we process only the first 128 characters corresponding to
+ * the POSIX locale.
+ */
+ cmin = MB_CUR_MAX == 1 ? CHAR_MIN : 0;
+ cmax = MB_CUR_MAX == 1 ? CHAR_MAX : 127;
/* Avoid making errors worse */
if (p->error != 0)
return;
- g->charjump = (int*) malloc((NC + 1) * sizeof(int));
+ g->charjump = (int *)malloc((cmax - cmin + 1) * sizeof(int));
if (g->charjump == NULL) /* Not a fatal error */
return;
/* Adjust for signed chars, if necessary */
- g->charjump = &g->charjump[-(CHAR_MIN)];
+ g->charjump = &g->charjump[-(cmin)];
/* If the character does not exist in the pattern, the jump
* is equal to the number of characters in the pattern.
*/
- for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
+ for (ch = cmin; ch < cmax + 1; ch++)
g->charjump[ch] = g->mlen;
/* If the character does exist, compute the jump that would
diff --git a/lib/libc/regex/regex2.h b/lib/libc/regex/regex2.h
index a7c45683229c..a1a37172a55b 100644
--- a/lib/libc/regex/regex2.h
+++ b/lib/libc/regex/regex2.h
@@ -113,7 +113,7 @@ typedef struct {
wint_t max;
} crange;
typedef struct {
- unsigned char bmp[NC / 8];
+ unsigned char bmp[NC_MAX / 8];
wctype_t *types;
unsigned int ntypes;
wint_t *wides;
@@ -133,9 +133,14 @@ CHIN1(cset *cs, wint_t ch)
if (ch < NC)
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
cs->invert);
- for (i = 0; i < cs->nwides; i++)
- if (ch == cs->wides[i])
+ for (i = 0; i < cs->nwides; i++) {
+ if (cs->icase) {
+ if (ch == towlower(cs->wides[i]) ||
+ ch == towupper(cs->wides[i]))
+ return (!cs->invert);
+ } else if (ch == cs->wides[i])
return (!cs->invert);
+ }
for (i = 0; i < cs->nranges; i++)
if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max)
return (!cs->invert);
diff --git a/lib/libc/regex/utils.h b/lib/libc/regex/utils.h
index b2dba2ee4e93..72f2286a0260 100644
--- a/lib/libc/regex/utils.h
+++ b/lib/libc/regex/utils.h
@@ -39,7 +39,9 @@
/* utility definitions */
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
#define INFINITY (DUPMAX + 1)
-#define NC (CHAR_MAX - CHAR_MIN + 1)
+
+#define NC_MAX (CHAR_MAX - CHAR_MIN + 1)
+#define NC ((MB_CUR_MAX) == 1 ? (NC_MAX) : (128))
typedef unsigned char uch;
/* switch off assertions (if not already off) if no REDEBUG */
diff --git a/lib/libc/tests/regex/multibyte.sh b/lib/libc/tests/regex/multibyte.sh
index 24d3abc0ca8e..3ab02118f623 100755
--- a/lib/libc/tests/regex/multibyte.sh
+++ b/lib/libc/tests/regex/multibyte.sh
@@ -1,11 +1,11 @@
# $FreeBSD$
-atf_test_case multibyte
-multibyte_head()
+atf_test_case bmpat
+bmpat_head()
{
atf_set "descr" "Check matching multibyte characters (PR153502)"
}
-multibyte_body()
+bmpat_body()
{
export LC_CTYPE="C.UTF-8"
@@ -29,7 +29,25 @@ multibyte_body()
sed -ne '/.a./p'
}
+atf_test_case icase
+icase_head()
+{
+ atf_set "descr" "Check case-insensitive matching for characters 128-255"
+}
+icase_body()
+{
+ export LC_CTYPE="C.UTF-8"
+
+ a=$(printf '\302\265\n') # U+00B5
+ b=$(printf '\316\234\n') # U+039C
+ c=$(printf '\316\274\n') # U+03BC
+
+ echo $b | atf_check -o "inline:$b\n" sed -ne "/$a/Ip"
+ echo $c | atf_check -o "inline:$c\n" sed -ne "/$a/Ip"
+}
+
atf_init_test_cases()
{
- atf_add_test_case multibyte
+ atf_add_test_case bmpat
+ atf_add_test_case icase
}