summaryrefslogtreecommitdiff
path: root/gnu/usr.bin/grep/search.c
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/usr.bin/grep/search.c')
-rw-r--r--gnu/usr.bin/grep/search.c95
1 files changed, 68 insertions, 27 deletions
diff --git a/gnu/usr.bin/grep/search.c b/gnu/usr.bin/grep/search.c
index b2514a97ab05..1ea421741a45 100644
--- a/gnu/usr.bin/grep/search.c
+++ b/gnu/usr.bin/grep/search.c
@@ -73,17 +73,22 @@ static kwset_t kwset;
static int kwset_exact_matches;
#if defined(MBS_SUPPORT)
-static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
+static char* check_multibyte_string PARAMS ((char const *buf, size_t size,
+ struct mb_cache *,
+ char const *orig_buf));
#endif
static void kwsinit PARAMS ((void));
static void kwsmusts PARAMS ((void));
static void Gcompile PARAMS ((char const *, size_t));
static void Ecompile PARAMS ((char const *, size_t));
-static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
+static size_t EGexecute PARAMS ((char const *, size_t, struct mb_cache *,
+ size_t *, int ));
static void Fcompile PARAMS ((char const *, size_t));
-static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
+static size_t Fexecute PARAMS ((char const *, size_t, struct mb_cache *,
+ size_t *, int));
static void Pcompile PARAMS ((char const *, size_t ));
-static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
+static size_t Pexecute PARAMS ((char const *, size_t, struct mb_cache *,
+ size_t *, int));
void
dfaerror (char const *mesg)
@@ -149,35 +154,66 @@ kwsmusts (void)
are not singlebyte character nor the first byte of a multibyte
character. Caller must free the array. */
static char*
-check_multibyte_string(char const *buf, size_t size)
+check_multibyte_string(char const *buf, size_t size, struct mb_cache *mb_cache,
+ char const *orig_buf)
{
char *mb_properties = xmalloc(size);
mbstate_t cur_state;
wchar_t wc;
int i;
memset(&cur_state, 0, sizeof(mbstate_t));
- memset(mb_properties, 0, sizeof(char)*size);
- for (i = 0; i < size ;)
- {
- size_t mbclen;
- mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);
- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ if (mb_cache && mb_cache->mblen_buf &&
+ orig_buf > mb_cache->orig_buf &&
+ (orig_buf + size) <= (mb_cache->orig_buf + mb_cache->len))
+ {
+ /* The cache can help us. */
+ memcpy (mb_properties,
+ mb_cache->mblen_buf + (orig_buf - mb_cache->orig_buf),
+ size);
+
+ }
+ else
+ {
+ memset(mb_properties, 0, sizeof(char)*size);
+ for (i = 0; i < size ;)
{
- /* An invalid sequence, or a truncated multibyte character.
- We treat it as a singlebyte character. */
- mbclen = 1;
+ size_t mbclen;
+ mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);
+
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ {
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a singlebyte character. */
+ mbclen = 1;
+ }
+ else if (match_icase)
+ {
+ if (iswupper((wint_t)wc))
+ {
+ wc = towlower((wint_t)wc);
+ wcrtomb(buf + i, wc, &cur_state);
+ }
+ }
+ mb_properties[i] = mbclen;
+ i += mbclen;
}
- else if (match_icase)
+
+ /* Now populate the cache. */
+ if (mb_cache)
{
- if (iswupper((wint_t)wc))
+ if (mb_cache->wcs_buf)
{
- wc = towlower((wint_t)wc);
- wcrtomb(buf + i, wc, &cur_state);
+ free (mb_cache->wcs_buf);
+ mb_cache->wcs_buf = NULL;
}
+ if (mb_cache->mblen_buf)
+ free (mb_cache->mblen_buf);
+ mb_cache->len = size;
+ mb_cache->orig_buf = orig_buf;
+ mb_cache->mblen_buf = xmalloc (size);
+ memcpy (mb_cache->mblen_buf, mb_properties, size);
}
- mb_properties[i] = mbclen;
- i += mbclen;
}
return mb_properties;
@@ -344,9 +380,11 @@ Ecompile (char const *pattern, size_t size)
}
static size_t
-EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
+EGexecute (char const *buf, size_t size, struct mb_cache *mb_cache,
+ size_t *match_size, int exact)
{
register char const *buflim, *beg, *end;
+ char const *orig_buf = buf;
char eol = eolbyte;
int backref, start, len;
struct kwsmatch kwsm;
@@ -362,7 +400,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
buf = case_buf;
}
if (kwset)
- mb_properties = check_multibyte_string(buf, size);
+ mb_properties = check_multibyte_string(buf, size, mb_cache, orig_buf);
}
#endif /* MBS_SUPPORT */
@@ -391,13 +429,13 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
--beg;
if (kwsm.index < kwset_exact_matches)
goto success_in_beg_and_end;
- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ if (dfaexec (&dfa, beg, end - beg, &backref, mb_cache) == (size_t) -1)
continue;
}
else
{
/* No good fixed strings; start with DFA. */
- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+ size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref, mb_cache);
if (offset == (size_t) -1)
break;
/* Narrow down to the line we've found. */
@@ -525,9 +563,11 @@ Fcompile (char const *pattern, size_t size)
}
static size_t
-Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
+Fexecute (char const *buf, size_t size, struct mb_cache *mb_cache,
+ size_t *match_size, int exact)
{
register char const *beg, *try, *end;
+ char const *orig_buf = buf;
register size_t len;
char eol = eolbyte;
struct kwsmatch kwsmatch;
@@ -542,7 +582,7 @@ Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
memcpy(case_buf, buf, size);
buf = case_buf;
}
- mb_properties = check_multibyte_string(buf, size);
+ mb_properties = check_multibyte_string(buf, size, mb_cache, orig_buf);
}
#endif /* MBS_SUPPORT */
@@ -707,7 +747,8 @@ Pcompile (char const *pattern, size_t size)
}
static size_t
-Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
+Pexecute (char const *buf, size_t size, struct mb_cache *mb_cache,
+ size_t *match_size, int exact)
{
#if !HAVE_LIBPCRE
abort ();