From 547bc083d614f3639f5632d9e39d79e828519318 Mon Sep 17 00:00:00 2001 From: Yuri Pankov Date: Wed, 12 Dec 2018 04:23:00 +0000 Subject: regcomp: reduce size of bitmap for multibyte locales This fixes the obscure endless loop seen with case-insensitive patterns containing characters in 128-255 range; originally found running GNU grep test suite. Our regex implementation being kludgy translates the characters in case-insensitive pattern to bracket expression containing both cases for the character and doesn't correctly handle the case when original character is in bitmap and the other case is not, falling into the endless loop going through in p_bracket(), ordinary(), and bothcases(). Reducing the bitmap to 0-127 range for multibyte locales solves this as none of these characters have other case mapping outside of bitmap. We are also safe in the case when the original character outside of bitmap has other case mapping in the bitmap (there are several of those in our current ctype maps having unidirectional mapping into bitmap). Reviewed by: bapt, kevans, pfg Differential revision: https://reviews.freebsd.org/D18302 --- lib/libc/regex/regex2.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'lib/libc/regex/regex2.h') diff --git a/lib/libc/regex/regex2.h b/lib/libc/regex/regex2.h index a7c45683229c..a1a37172a55b 100644 --- a/lib/libc/regex/regex2.h +++ b/lib/libc/regex/regex2.h @@ -113,7 +113,7 @@ typedef struct { wint_t max; } crange; typedef struct { - unsigned char bmp[NC / 8]; + unsigned char bmp[NC_MAX / 8]; wctype_t *types; unsigned int ntypes; wint_t *wides; @@ -133,9 +133,14 @@ CHIN1(cset *cs, wint_t ch) if (ch < NC) return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^ cs->invert); - for (i = 0; i < cs->nwides; i++) - if (ch == cs->wides[i]) + for (i = 0; i < cs->nwides; i++) { + if (cs->icase) { + if (ch == towlower(cs->wides[i]) || + ch == towupper(cs->wides[i])) + return (!cs->invert); + } else if (ch == cs->wides[i]) return (!cs->invert); + } for (i = 0; i < cs->nranges; i++) if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max) return (!cs->invert); -- cgit v1.2.3