summaryrefslogtreecommitdiff
path: root/usr.bin/grep
diff options
context:
space:
mode:
authorKyle Evans <kevans@FreeBSD.org>2018-05-04 03:13:25 +0000
committerKyle Evans <kevans@FreeBSD.org>2018-05-04 03:13:25 +0000
commita2584d1b34c54485330d2bba1983b708e52dc6d4 (patch)
tree3bee28befdffadd2a48ebf6042fd311d8bf142e2 /usr.bin/grep
parent51688c129fa2766deb9fff2ba702bb3c3bd1889c (diff)
downloadsrc-test-a2584d1b34c54485330d2bba1983b708e52dc6d4.tar.gz
src-test-a2584d1b34c54485330d2bba1983b708e52dc6d4.zip
bsdgrep: annihilate our in-tree TRE, previously disabled by default
It was an old TRE that had plenty of bugs and no performance gain over regex(3). I disabled it by default in r323615, and there was some confusion about what the knob does- likely due to poor naming on my part- to the tune of "well, it sounds like it should speed things up" (mentioned by multiple people). To compound this, I have no intention of maintaining a second regex implementation. If someone would like to step up and volunteer to maintain a lean-and-mean implementation for grep, this is OK, but we have very few volunteers to maintain even our primary regex implementation.
Notes
Notes: svn path=/head/; revision=333236
Diffstat (limited to 'usr.bin/grep')
-rw-r--r--usr.bin/grep/Makefile9
-rw-r--r--usr.bin/grep/grep.c18
-rw-r--r--usr.bin/grep/grep.h7
-rw-r--r--usr.bin/grep/regex/fastmatch.c170
-rw-r--r--usr.bin/grep/regex/fastmatch.h95
-rw-r--r--usr.bin/grep/regex/glue.h67
-rw-r--r--usr.bin/grep/regex/hashtable.c270
-rw-r--r--usr.bin/grep/regex/hashtable.h35
-rw-r--r--usr.bin/grep/regex/tre-compile.c101
-rw-r--r--usr.bin/grep/regex/tre-fastmatch.c1070
-rw-r--r--usr.bin/grep/regex/tre-fastmatch.h21
-rw-r--r--usr.bin/grep/util.c17
12 files changed, 2 insertions, 1878 deletions
diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile
index 9fd5d55bf4ea4..9eec5877e348b 100644
--- a/usr.bin/grep/Makefile
+++ b/usr.bin/grep/Makefile
@@ -17,15 +17,6 @@ bsdgrep.1: grep.1
.endif
SRCS= file.c grep.c queue.c util.c
-.if ${MK_BSD_GREP_FASTMATCH} == "yes"
-# Extra files ported backported for some regex improvements
-.PATH: ${.CURDIR}/regex
-SRCS+= fastmatch.c hashtable.c tre-compile.c tre-fastmatch.c
-CFLAGS+=-I${.CURDIR}/regex
-.else
-CFLAGS+= -DWITHOUT_FASTMATCH
-.endif
-
SCRIPTS= zgrep.sh
LINKS= ${BINDIR}/zgrep ${BINDIR}/zfgrep \
${BINDIR}/zgrep ${BINDIR}/zegrep \
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index 7d9dd2481fd86..827c6b987a0f6 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -51,9 +51,6 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <unistd.h>
-#ifndef WITHOUT_FASTMATCH
-#include "fastmatch.h"
-#endif
#include "grep.h"
#ifndef WITHOUT_NLS
@@ -96,9 +93,6 @@ unsigned int patterns;
static unsigned int pattern_sz;
struct pat *pattern;
regex_t *r_pattern;
-#ifndef WITHOUT_FASTMATCH
-fastmatch_t *fg_pattern;
-#endif
/* Filename exclusion/inclusion patterns */
unsigned int fpatterns, dpatterns;
@@ -712,9 +706,6 @@ main(int argc, char *argv[])
usage();
}
-#ifndef WITHOUT_FASTMATCH
- fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
-#endif
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
/* Don't process any patterns if we have a blank one */
@@ -725,15 +716,6 @@ main(int argc, char *argv[])
#endif
/* Check if cheating is allowed (always is for fgrep). */
for (i = 0; i < patterns; ++i) {
-#ifndef WITHOUT_FASTMATCH
- /*
- * Attempt compilation with fastmatch regex and
- * fallback to regex(3) if it fails.
- */
- if (fastncomp(&fg_pattern[i], pattern[i].pat,
- pattern[i].len, cflags) == 0)
- continue;
-#endif
c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
if (c != 0) {
regerror(c, &r_pattern[i], re_error,
diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h
index 5ddd4d01b9e95..41a8a812309fe 100644
--- a/usr.bin/grep/grep.h
+++ b/usr.bin/grep/grep.h
@@ -38,10 +38,6 @@
#include <stdio.h>
#include <zlib.h>
-#ifndef WITHOUT_FASTMATCH
-#include "fastmatch.h"
-#endif
-
#ifdef WITHOUT_NLS
#define getstr(n) errstr[n]
#else
@@ -131,9 +127,6 @@ extern unsigned int dpatterns, fpatterns, patterns;
extern struct pat *pattern;
extern struct epat *dpattern, *fpattern;
extern regex_t *er_pattern, *r_pattern;
-#ifndef WITHOUT_FASTMATCH
-extern fastmatch_t *fg_pattern;
-#endif
/* For regex errors */
#define RE_ERROR_BUF 512
diff --git a/usr.bin/grep/regex/fastmatch.c b/usr.bin/grep/regex/fastmatch.c
deleted file mode 100644
index 6e3e3f8055a97..0000000000000
--- a/usr.bin/grep/regex/fastmatch.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/* $FreeBSD$ */
-
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (C) 2011 Gabor Kovesdan <gabor@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "glue.h"
-
-#include <errno.h>
-#include <fastmatch.h>
-#include <regex.h>
-#include <string.h>
-
-#include "tre-fastmatch.h"
-
-int
-tre_fixncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
-{
- int ret;
- tre_char_t *wregex;
- size_t wlen;
-
- if (n != 0)
- {
- ret = tre_convert_pattern(regex, n, &wregex, &wlen);
- if (ret != REG_OK)
- return ret;
- else
- ret = tre_compile_literal(preg, wregex, wlen, cflags);
- tre_free_pattern(wregex);
- return ret;
- }
- else
- return tre_compile_literal(preg, NULL, 0, cflags);
-}
-
-int
-tre_fastncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
-{
- int ret;
- tre_char_t *wregex;
- size_t wlen;
-
- if (n != 0)
- {
- ret = tre_convert_pattern(regex, n, &wregex, &wlen);
- if (ret != REG_OK)
- return ret;
- else
- ret = (cflags & REG_LITERAL)
- ? tre_compile_literal(preg, wregex, wlen, cflags)
- : tre_compile_fast(preg, wregex, wlen, cflags);
- tre_free_pattern(wregex);
- return ret;
- }
- else
- return tre_compile_literal(preg, NULL, 0, cflags);
-}
-
-
-int
-tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags)
-{
- return tre_fixncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
-}
-
-int
-tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags)
-{
- return tre_fastncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
-}
-
-int
-tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
-{
- return tre_compile_literal(preg, regex, n, cflags);
-}
-
-int
-tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
-{
- return (cflags & REG_LITERAL) ?
- tre_compile_literal(preg, regex, n, cflags) :
- tre_compile_fast(preg, regex, n, cflags);
-}
-
-int
-tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
-{
- return tre_fixwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
-}
-
-int
-tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
-{
- return tre_fastwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
-}
-
-void
-tre_fastfree(fastmatch_t *preg)
-{
- tre_free_fast(preg);
-}
-
-int
-tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
- size_t nmatch, regmatch_t pmatch[], int eflags)
-{
- tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
-
- if (eflags & REG_STARTEND)
- CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
- type, nmatch, pmatch, eflags));
- else
- return tre_match_fast(preg, string, len, type, nmatch,
- pmatch, eflags);
-}
-
-int
-tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
- regmatch_t pmatch[], int eflags)
-{
- return tre_fastnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
-}
-
-int
-tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
- size_t nmatch, regmatch_t pmatch[], int eflags)
-{
- tre_str_type_t type = STR_WIDE;
-
- if (eflags & REG_STARTEND)
- CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
- type, nmatch, pmatch, eflags));
- else
- return tre_match_fast(preg, string, len, type, nmatch,
- pmatch, eflags);
-}
-
-int
-tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
- size_t nmatch, regmatch_t pmatch[], int eflags)
-{
- return tre_fastwnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
-}
-
diff --git a/usr.bin/grep/regex/fastmatch.h b/usr.bin/grep/regex/fastmatch.h
deleted file mode 100644
index 15178a02fa680..0000000000000
--- a/usr.bin/grep/regex/fastmatch.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/* $FreeBSD$ */
-
-#ifndef FASTMATCH_H
-#define FASTMATCH_H 1
-
-#include <limits.h>
-#include <regex.h>
-#include <stdbool.h>
-#include <wchar.h>
-
-typedef struct {
- size_t wlen;
- size_t len;
- wchar_t *wpattern;
- bool *wescmap;
- unsigned int qsBc[UCHAR_MAX + 1];
- unsigned int *bmGs;
- char *pattern;
- bool *escmap;
- unsigned int defBc;
- void *qsBc_table;
- unsigned int *sbmGs;
- const char *re_endp;
-
- /* flags */
- bool hasdot;
- bool bol;
- bool eol;
- bool word;
- bool icase;
- bool newline;
- bool nosub;
- bool matchall;
- bool reversed;
-} fastmatch_t;
-
-extern int
-tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags);
-
-extern int
-tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags);
-
-extern int
-tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
- regmatch_t pmatch[], int eflags);
-
-extern void
-tre_fastfree(fastmatch_t *preg);
-
-extern int
-tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
-
-extern int
-tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
-
-extern int
-tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
- size_t nmatch, regmatch_t pmatch[], int eflags);
-
-/* Versions with a maximum length argument and therefore the capability to
- handle null characters in the middle of the strings. */
-extern int
-tre_fixncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
-
-extern int
-tre_fastncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
-
-extern int
-tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
- size_t nmatch, regmatch_t pmatch[], int eflags);
-
-extern int
-tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
-
-extern int
-tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
-
-extern int
-tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
- size_t nmatch, regmatch_t pmatch[], int eflags);
-
-#define fixncomp tre_fixncomp
-#define fastncomp tre_fastncomp
-#define fixcomp tre_fixcomp
-#define fastcomp tre_fastcomp
-#define fixwncomp tre_fixwncomp
-#define fastwncomp tre_fastwncomp
-#define fixwcomp tre_fixwcomp
-#define fastwcomp tre_fastwcomp
-#define fastfree tre_fastfree
-#define fastnexec tre_fastnexec
-#define fastexec tre_fastexec
-#define fastwnexec tre_fastwnexec
-#define fastwexec tre_fastwexec
-#endif /* FASTMATCH_H */
diff --git a/usr.bin/grep/regex/glue.h b/usr.bin/grep/regex/glue.h
deleted file mode 100644
index 0c54e98bd79b3..0000000000000
--- a/usr.bin/grep/regex/glue.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* $FreeBSD$ */
-
-#ifndef GLUE_H
-#define GLUE_H
-
-#include <limits.h>
-#undef RE_DUP_MAX
-#include <regex.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#define TRE_WCHAR 1
-#define TRE_MULTIBYTE 1
-#define HAVE_MBSTATE_T 1
-
-#define TRE_CHAR(n) L##n
-#define CHF "%lc"
-
-#define tre_char_t wchar_t
-#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
-#define tre_strlen wcslen
-#define tre_isspace iswspace
-#define tre_isalnum iswalnum
-
-#define REG_OK 0
-#define REG_LITERAL 0020
-#define REG_WORD 0100
-#define REG_GNU 0400
-
-#define TRE_MB_CUR_MAX MB_CUR_MAX
-
-#ifndef _GREP_DEBUG
-#define DPRINT(msg)
-#else
-#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
-#endif
-
-#define MIN(a,b) ((a > b) ? (b) : (a))
-#define MAX(a,b) ((a > b) ? (a) : (b))
-
-typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
-
-#define CALL_WITH_OFFSET(fn) \
- do \
- { \
- size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so); \
- size_t offset = pmatch[0].rm_so; \
- int ret; \
- \
- if ((long long)pmatch[0].rm_eo - pmatch[0].rm_so < 0) \
- return REG_NOMATCH; \
- ret = fn; \
- for (unsigned i = 0; (!preg->nosub && (i < nmatch)); i++) \
- { \
- pmatch[i].rm_so += offset; \
- pmatch[i].rm_eo += offset; \
- } \
- return ret; \
- } while (0 /*CONSTCOND*/)
-
-int
-tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
- size_t *wn);
-
-void
-tre_free_pattern(tre_char_t *wregex);
-#endif
diff --git a/usr.bin/grep/regex/hashtable.c b/usr.bin/grep/regex/hashtable.c
deleted file mode 100644
index 1767d0a16d661..0000000000000
--- a/usr.bin/grep/regex/hashtable.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/* $FreeBSD$ */
-
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (C) 2011 Gabor Kovesdan <gabor@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "glue.h"
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "hashtable.h"
-
-
-/*
- * Return a 32-bit hash of the given buffer. The init
- * value should be 0, or the previous hash value to extend
- * the previous hash.
- */
-static uint32_t
-hash32_buf(const void *buf, size_t len, uint32_t hash)
-{
- const unsigned char *p = buf;
-
- while (len--)
- hash = HASHSTEP(hash, *p++);
-
- return hash;
-}
-
-/*
- * Initializes a hash table that can hold table_size number of entries,
- * each of which has a key of key_size bytes and a value of value_size
- * bytes. On successful allocation returns a pointer to the hash table.
- * Otherwise, returns NULL and sets errno to indicate the error.
- */
-hashtable
-*hashtable_init(size_t table_size, size_t key_size, size_t value_size)
-{
- hashtable *tbl;
-
- DPRINT(("hashtable_init: table_size %zu, key_size %zu, value_size %zu\n",
- table_size, key_size, value_size));
-
- tbl = malloc(sizeof(hashtable));
- if (tbl == NULL)
- goto mem1;
-
- tbl->entries = calloc(sizeof(hashtable_entry *), table_size);
- if (tbl->entries == NULL)
- goto mem2;
-
- tbl->table_size = table_size;
- tbl->usage = 0;
- tbl->key_size = key_size;
- tbl->value_size = value_size;
-
- return (tbl);
-
-mem2:
- free(tbl);
-mem1:
- DPRINT(("hashtable_init: allocation failed\n"));
- errno = ENOMEM;
- return (NULL);
-}
-
-/*
- * Places the key-value pair to the hashtable tbl.
- * Returns:
- * HASH_OK: if the key was not present in the hash table yet
- * but the kay-value pair has been successfully added.
- * HASH_UPDATED: if the value for the key has been updated with the
- * new value.
- * HASH_FULL: if the hash table is full and the entry could not
- * be added.
- * HASH_FAIL: if an error has occurred and errno has been set to
- * indicate the error.
- */
-int
-hashtable_put(hashtable *tbl, const void *key, const void *value)
-{
- uint32_t hash = 0;
-
- if (tbl->table_size == tbl->usage)
- {
- DPRINT(("hashtable_put: hashtable is full\n"));
- return (HASH_FULL);
- }
-
- hash = hash32_buf(key, tbl->key_size, hash) % tbl->table_size;
- DPRINT(("hashtable_put: calculated hash %" PRIu32 "\n", hash));
-
- /*
- * On hash collision entries are inserted at the next free space,
- * so we have to increase the index until we either find an entry
- * with the same key (and update it) or we find a free space.
- */
- for(;;)
- {
- if (tbl->entries[hash] == NULL)
- break;
- else if (memcmp(tbl->entries[hash]->key, key, tbl->key_size) == 0)
- {
- memcpy(tbl->entries[hash]->value, value, tbl->value_size);
- DPRINT(("hashtable_put: effective location is %" PRIu32
- ", entry updated\n", hash));
- return (HASH_UPDATED);
- }
- if (++hash == tbl->table_size)
- hash = 0;
- }
-
- DPRINT(("hashtable_put: effective location is %" PRIu32 "\n", hash));
-
- tbl->entries[hash] = malloc(sizeof(hashtable_entry));
- if (tbl->entries[hash] == NULL)
- {
- errno = ENOMEM;
- goto mem1;
- }
-
- tbl->entries[hash]->key = malloc(tbl->key_size);
- if (tbl->entries[hash]->key == NULL)
- {
- errno = ENOMEM;
- goto mem2;
- }
-
- tbl->entries[hash]->value = malloc(tbl->value_size);
- if (tbl->entries[hash]->value == NULL)
- {
- errno = ENOMEM;
- goto mem3;
- }
-
- memcpy(tbl->entries[hash]->key, key, tbl->key_size);
- memcpy(tbl->entries[hash]->value, value, tbl->value_size);
- tbl->usage++;
-
- DPRINT(("hashtable_put: entry successfully inserted\n"));
-
- return (HASH_OK);
-
-mem3:
- free(tbl->entries[hash]->key);
-mem2:
- free(tbl->entries[hash]);
-mem1:
- DPRINT(("hashtable_put: insertion failed\n"));
- return (HASH_FAIL);
-}
-
-static hashtable_entry
-**hashtable_lookup(const hashtable *tbl, const void *key)
-{
- uint32_t hash = 0;
-
- hash = hash32_buf(key, tbl->key_size, hash) % tbl->table_size;
-
- for (;;)
- {
- if (tbl->entries[hash] == NULL)
- return (NULL);
- else if (memcmp(key, tbl->entries[hash]->key, tbl->key_size) == 0)
- {
- DPRINT(("hashtable_lookup: entry found at location %" PRIu32 "\n", hash));
- return (&tbl->entries[hash]);
- }
-
- if (++hash == tbl->table_size)
- hash = 0;
- }
-}
-
-/*
- * Retrieves the value for key from the hash table tbl and places
- * it to the space indicated by the value argument.
- * Returns HASH_OK if the value has been found and retrieved or
- * HASH_NOTFOUND otherwise.
- */
-int
-hashtable_get(hashtable *tbl, const void *key, void *value)
-{
- hashtable_entry **entry;
-
- entry = hashtable_lookup(tbl, key);
- if (entry == NULL)
- {
- DPRINT(("hashtable_get: entry is not available in the hashtable\n"));
- return (HASH_NOTFOUND);
- }
-
- memcpy(value, (*entry)->value, tbl->value_size);
- DPRINT(("hashtable_get: entry successfully copied into output buffer\n"));
- return (HASH_OK);
-}
-
-/*
- * Removes the entry with the specifified key from the hash table
- * tbl. Returns HASH_OK if the entry has been found and removed
- * or HASH_NOTFOUND otherwise.
- */
-int
-hashtable_remove(hashtable *tbl, const void *key)
-{
- hashtable_entry **entry;
-
- entry = hashtable_lookup(tbl, key);
- if (entry == NULL)
- {
- DPRINT(("hashtable_remove: entry is not available in the hashtable\n"));
- return (HASH_NOTFOUND);
- }
-
- free((*entry)->key);
- free((*entry)->value);
- free(*entry);
- *entry = NULL;
-
- tbl->usage--;
- DPRINT(("hashtable_remove: entry successfully removed\n"));
- return (HASH_OK);
-}
-
-/*
- * Frees the resources associated with the hash table tbl.
- */
-void
-hashtable_free(hashtable *tbl)
-{
- if (tbl == NULL)
- return;
-
- for (unsigned int i = 0; i < tbl->table_size; i++)
- if ((tbl->entries[i] != NULL))
- {
- free(tbl->entries[i]->key);
- free(tbl->entries[i]->value);
- }
-
- free(tbl->entries);
- DPRINT(("hashtable_free: resources are successfully freed\n"));
-}
diff --git a/usr.bin/grep/regex/hashtable.h b/usr.bin/grep/regex/hashtable.h
deleted file mode 100644
index f8b1daa510b6c..0000000000000
--- a/usr.bin/grep/regex/hashtable.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* $FreeBSD$ */
-
-#ifndef HASHTABLE_H
-#define HASHTABLE_H 1
-
-#include <sys/types.h>
-
-#define HASH_OK 0
-#define HASH_UPDATED 1
-#define HASH_FAIL 2
-#define HASH_FULL 3
-#define HASH_NOTFOUND 4
-
-#define HASHSTEP(x,c) (((x << 5) + x) + (c))
-
-typedef struct {
- void *key;
- void *value;
-} hashtable_entry;
-
-typedef struct {
- size_t key_size;
- size_t table_size;
- size_t usage;
- size_t value_size;
- hashtable_entry **entries;
-} hashtable;
-
-void hashtable_free(hashtable *);
-int hashtable_get(hashtable *, const void *, void *);
-hashtable *hashtable_init(size_t, size_t, size_t);
-int hashtable_put(hashtable *, const void *, const void *);
-int hashtable_remove(hashtable *, const void *);
-
-#endif /* HASHTABLE.H */
diff --git a/usr.bin/grep/regex/tre-compile.c b/usr.bin/grep/regex/tre-compile.c
deleted file mode 100644
index dbe315382aba4..0000000000000
--- a/usr.bin/grep/regex/tre-compile.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* $FreeBSD$ */
-
-#include "glue.h"
-
-#include <stdio.h>
-#include <assert.h>
-#include <errno.h>
-#include <regex.h>
-#include <string.h>
-#include <wchar.h>
-
-int
-tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
- size_t *wn)
-{
-#if TRE_WCHAR
- tre_char_t *wregex;
- size_t wlen;
-
- wregex = malloc(sizeof(tre_char_t) * (n + 1));
- if (wregex == NULL)
- return REG_ESPACE;
-
- /* If the current locale uses the standard single byte encoding of
- characters, we don't do a multibyte string conversion. If we did,
- many applications which use the default locale would break since
- the default "C" locale uses the 7-bit ASCII character set, and
- all characters with the eighth bit set would be considered invalid. */
-#if TRE_MULTIBYTE
- if (TRE_MB_CUR_MAX == 1)
-#endif /* TRE_MULTIBYTE */
- {
- unsigned int i;
- const unsigned char *str = (const unsigned char *)regex;
- tre_char_t *wstr = wregex;
-
- for (i = 0; i < n; i++)
- *(wstr++) = *(str++);
- wlen = n;
- }
-#if TRE_MULTIBYTE
- else
- {
- int consumed;
- tre_char_t *wcptr = wregex;
-#ifdef HAVE_MBSTATE_T
- mbstate_t state;
- memset(&state, '\0', sizeof(state));
-#endif /* HAVE_MBSTATE_T */
- while (n > 0)
- {
- consumed = tre_mbrtowc(wcptr, regex, n, &state);
-
- switch (consumed)
- {
- case 0:
- if (*regex == '\0')
- consumed = 1;
- else
- {
- free(wregex);
- return REG_BADPAT;
- }
- break;
- case -1:
- DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
- free(wregex);
- return REG_BADPAT;
- case -2:
- /* The last character wasn't complete. Let's not call it a
- fatal error. */
- consumed = n;
- break;
- }
- regex += consumed;
- n -= consumed;
- wcptr++;
- }
- wlen = wcptr - wregex;
- }
-#endif /* TRE_MULTIBYTE */
- wregex[wlen] = L'\0';
- *w = wregex;
- *wn = wlen;
- return REG_OK;
-#else /* !TRE_WCHAR */
- {
- *w = (tre_char_t * const *)regex;
- *wn = n;
- return REG_OK;
- }
-#endif /* !TRE_WCHAR */
-}
-
-void
-tre_free_pattern(tre_char_t *wregex)
-{
-#if TRE_WCHAR
- free(wregex);
-#endif
-}
diff --git a/usr.bin/grep/regex/tre-fastmatch.c b/usr.bin/grep/regex/tre-fastmatch.c
deleted file mode 100644
index c28645602fa46..0000000000000
--- a/usr.bin/grep/regex/tre-fastmatch.c
+++ /dev/null
@@ -1,1070 +0,0 @@
-/* $FreeBSD$ */
-
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
- * Copyright (C) 2008-2011 Gabor Kovesdan <gabor@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "glue.h"
-
-#include <ctype.h>
-#include <limits.h>
-#include <regex.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#ifdef TRE_WCHAR
-#include <wchar.h>
-#include <wctype.h>
-#endif
-
-#include "hashtable.h"
-#include "tre-fastmatch.h"
-
-static int fastcmp(const fastmatch_t *fg, const void *data,
- tre_str_type_t type);
-
-/*
- * Clean up if pattern compilation fails.
- */
-#define FAIL_COMP(errcode) \
- { \
- if (fg->pattern) \
- free(fg->pattern); \
- if (fg->wpattern) \
- free(fg->wpattern); \
- if (fg->qsBc_table) \
- hashtable_free(fg->qsBc_table); \
- fg = NULL; \
- return errcode; \
- }
-
-/*
- * Skips n characters in the input string and assigns the start
- * address to startptr. Note: as per IEEE Std 1003.1-2008
- * matching is based on bit pattern not character representations
- * so we can handle MB strings as byte sequences just like
- * SB strings.
- */
-#define SKIP_CHARS(n) \
- switch (type) \
- { \
- case STR_WIDE: \
- startptr = str_wide + n; \
- break; \
- default: \
- startptr = str_byte + n; \
- }
-
-/*
- * Converts the wide string pattern to SB/MB string and stores
- * it in fg->pattern. Sets fg->len to the byte length of the
- * converted string.
- */
-#define STORE_MBS_PAT \
- { \
- size_t siz; \
- \
- siz = wcstombs(NULL, fg->wpattern, 0); \
- if (siz == (size_t)-1) \
- return REG_BADPAT; \
- fg->len = siz; \
- fg->pattern = malloc(siz + 1); \
- if (fg->pattern == NULL) \
- return REG_ESPACE; \
- wcstombs(fg->pattern, fg->wpattern, siz); \
- fg->pattern[siz] = '\0'; \
- } \
-
-#define CONV_MBS_PAT(src, dest, destsz) \
- { \
- destsz = wcstombs(NULL, src, 0); \
- if (destsz == (size_t)-1) \
- return REG_BADPAT; \
- dest = malloc(destsz + 1); \
- if (dest == NULL) \
- return REG_ESPACE; \
- wcstombs(dest, src, destsz); \
- dest[destsz] = '\0'; \
- } \
-
-#define IS_OUT_OF_BOUNDS \
- ((!fg->reversed \
- ? ((type == STR_WIDE) ? ((j + fg->wlen) > len) \
- : ((j + fg->len) > len)) \
- : (j < 0)))
-
-/*
- * Checks whether the new position after shifting in the input string
- * is out of the bounds and break out from the loop if so.
- */
-#define CHECKBOUNDS \
- if (IS_OUT_OF_BOUNDS) \
- break; \
-
-/*
- * Shifts in the input string after a mismatch. The position of the
- * mismatch is stored in the mismatch variable.
- */
-#define SHIFT \
- CHECKBOUNDS; \
- \
- { \
- int r = -1; \
- unsigned int bc = 0, gs = 0, ts; \
- \
- switch (type) \
- { \
- case STR_WIDE: \
- if (!fg->hasdot) \
- { \
- if (u != 0 && (unsigned)mismatch == fg->wlen - 1 - shift) \
- mismatch -= u; \
- v = fg->wlen - 1 - mismatch; \
- r = hashtable_get(fg->qsBc_table, \
- &str_wide[!fg->reversed ? (size_t)j + fg->wlen \
- : (size_t)j - 1], &bc); \
- gs = fg->bmGs[mismatch]; \
- } \
- bc = (r == HASH_OK) ? bc : fg->defBc; \
- DPRINT(("tre_fast_match: mismatch on character" CHF ", " \
- "BC %d, GS %d\n", \
- ((const tre_char_t *)startptr)[mismatch + 1], \
- bc, gs)); \
- break; \
- default: \
- if (!fg->hasdot) \
- { \
- if (u != 0 && (unsigned)mismatch == fg->len - 1 - shift) \
- mismatch -= u; \
- v = fg->len - 1 - mismatch; \
- gs = fg->sbmGs[mismatch]; \
- } \
- bc = fg->qsBc[((const unsigned char *)str_byte) \
- [!fg->reversed ? (size_t)j + fg->len \
- : (size_t)j - 1]]; \
- DPRINT(("tre_fast_match: mismatch on character %c, " \
- "BC %d, GS %d\n", \
- ((const unsigned char *)startptr)[mismatch + 1], \
- bc, gs)); \
- } \
- if (fg->hasdot) \
- shift = bc; \
- else \
- { \
- ts = (u >= v) ? (u - v) : 0; \
- shift = MAX(ts, bc); \
- shift = MAX(shift, gs); \
- if (shift == gs) \
- u = MIN((type == STR_WIDE ? fg->wlen : fg->len) - shift, v); \
- else \
- { \
- if (ts < bc) \
- shift = MAX(shift, u + 1); \
- u = 0; \
- } \
- } \
- DPRINT(("tre_fast_match: shifting %u characters\n", shift)); \
- j = !fg->reversed ? j + shift : j - shift; \
- }
-
-/*
- * Normal Quick Search would require a shift based on the position the
- * next character after the comparison is within the pattern. With
- * wildcards, the position of the last dot effects the maximum shift
- * distance.
- * The closer to the end the wild card is the slower the search.
- *
- * Examples:
- * Pattern Max shift
- * ------- ---------
- * this 5
- * .his 4
- * t.is 3
- * th.s 2
- * thi. 1
- */
-
-/*
- * Fills in the bad character shift array for SB/MB strings.
- */
-#define FILL_QSBC \
- if (fg->reversed) \
- { \
- _FILL_QSBC_REVERSED \
- } \
- else \
- { \
- _FILL_QSBC \
- }
-
-#define _FILL_QSBC \
- for (unsigned int i = 0; i <= UCHAR_MAX; i++) \
- fg->qsBc[i] = fg->len - hasdot; \
- for (unsigned int i = hasdot + 1; i < fg->len; i++) \
- { \
- fg->qsBc[(unsigned char)fg->pattern[i]] = fg->len - i; \
- DPRINT(("BC shift for char %c is %zu\n", fg->pattern[i], \
- fg->len - i)); \
- if (fg->icase) \
- { \
- char c = islower((unsigned char)fg->pattern[i]) ? \
- toupper((unsigned char)fg->pattern[i]) : \
- tolower((unsigned char)fg->pattern[i]); \
- fg->qsBc[(unsigned char)c] = fg->len - i; \
- DPRINT(("BC shift for char %c is %zu\n", c, fg->len - i)); \
- } \
- }
-
-#define _FILL_QSBC_REVERSED \
- for (unsigned int i = 0; i <= UCHAR_MAX; i++) \
- fg->qsBc[i] = firstdot + 1; \
- for (int i = firstdot - 1; i >= 0; i--) \
- { \
- fg->qsBc[(unsigned char)fg->pattern[i]] = i + 1; \
- DPRINT(("Reverse BC shift for char %c is %d\n", fg->pattern[i], \
- i + 1)); \
- if (fg->icase) \
- { \
- char c = islower((unsigned char)fg->pattern[i]) ? \
- toupper((unsigned char)fg->pattern[i]) : \
- tolower((unsigned char)fg->pattern[i]); \
- fg->qsBc[(unsigned char)c] = i + 1; \
- DPRINT(("Reverse BC shift for char %c is %d\n", c, i + 1)); \
- } \
- }
-
-/*
- * Fills in the bad character shifts into a hastable for wide strings.
- * With wide characters it is not possible any more to use a normal
- * array because there are too many characters and we could not
- * provide enough memory. Fortunately, we only have to store distinct
- * values for so many characters as the number of distinct characters
- * in the pattern, so we can store them in a hashtable and store a
- * default shift value for the rest.
- */
-#define FILL_QSBC_WIDE \
- if (fg->reversed) \
- { \
- _FILL_QSBC_WIDE_REVERSED \
- } \
- else \
- { \
- _FILL_QSBC_WIDE \
- }
-
-#define _FILL_QSBC_WIDE \
- /* Adjust the shift based on location of the last dot ('.'). */ \
- fg->defBc = fg->wlen - whasdot; \
- \
- /* Preprocess pattern. */ \
- fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4), \
- sizeof(tre_char_t), sizeof(int)); \
- if (!fg->qsBc_table) \
- FAIL_COMP(REG_ESPACE); \
- for (unsigned int i = whasdot + 1; i < fg->wlen; i++) \
- { \
- int k = fg->wlen - i; \
- int r; \
- \
- r = hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k); \
- if ((r == HASH_FAIL) || (r == HASH_FULL)) \
- FAIL_COMP(REG_ESPACE); \
- DPRINT(("BC shift for wide char " CHF " is %d\n", fg->wpattern[i],\
- k)); \
- if (fg->icase) \
- { \
- tre_char_t wc = iswlower(fg->wpattern[i]) ? \
- towupper(fg->wpattern[i]) : towlower(fg->wpattern[i]); \
- r = hashtable_put(fg->qsBc_table, &wc, &k); \
- if ((r == HASH_FAIL) || (r == HASH_FULL)) \
- FAIL_COMP(REG_ESPACE); \
- DPRINT(("BC shift for wide char " CHF " is %d\n", wc, k)); \
- } \
- }
-
-#define _FILL_QSBC_WIDE_REVERSED \
- /* Adjust the shift based on location of the last dot ('.'). */ \
- fg->defBc = (size_t)wfirstdot; \
- \
- /* Preprocess pattern. */ \
- fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4), \
- sizeof(tre_char_t), sizeof(int)); \
- if (!fg->qsBc_table) \
- FAIL_COMP(REG_ESPACE); \
- for (int i = wfirstdot - 1; i >= 0; i--) \
- { \
- int k = i + 1; \
- int r; \
- \
- r = hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k); \
- if ((r == HASH_FAIL) || (r == HASH_FULL)) \
- FAIL_COMP(REG_ESPACE); \
- DPRINT(("Reverse BC shift for wide char " CHF " is %d\n", \
- fg->wpattern[i], k)); \
- if (fg->icase) \
- { \
- tre_char_t wc = iswlower(fg->wpattern[i]) ? \
- towupper(fg->wpattern[i]) : towlower(fg->wpattern[i]); \
- r = hashtable_put(fg->qsBc_table, &wc, &k); \
- if ((r == HASH_FAIL) || (r == HASH_FULL)) \
- FAIL_COMP(REG_ESPACE); \
- DPRINT(("Reverse BC shift for wide char " CHF " is %d\n", wc, \
- k)); \
- } \
- }
-
-#ifdef _GREP_DEBUG
-#define DPRINT_BMGS(len, fmt_str, sh) \
- for (unsigned int i = 0; i < len; i++) \
- DPRINT((fmt_str, i, sh[i]));
-#else
-#define DPRINT_BMGS(len, fmt_str, sh) \
- do { } while(/*CONSTCOND*/0)
-#endif
-
-/*
- * Fills in the good suffix table for SB/MB strings.
- */
-#define FILL_BMGS \
- if (fg->len > 0 && !fg->hasdot) \
- { \
- fg->sbmGs = malloc(fg->len * sizeof(*fg->sbmGs)); \
- if (!fg->sbmGs) \
- return REG_ESPACE; \
- if (fg->len == 1) \
- fg->sbmGs[0] = 1; \
- else \
- _FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false); \
- DPRINT_BMGS(fg->len, "GS shift for pos %d is %d\n", fg->sbmGs); \
- }
-
-/*
- * Fills in the good suffix table for wide strings.
- */
-#define FILL_BMGS_WIDE \
- if (fg->wlen > 0 && !fg->hasdot) \
- { \
- fg->bmGs = malloc(fg->wlen * sizeof(*fg->bmGs)); \
- if (!fg->bmGs) \
- return REG_ESPACE; \
- if (fg->wlen == 1) \
- fg->bmGs[0] = 1; \
- else \
- _FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true); \
- DPRINT_BMGS(fg->wlen, "GS shift (wide) for pos %d is %d\n", \
- fg->bmGs); \
- }
-
-#define _FILL_BMGS(arr, pat, plen, wide) \
- { \
- char *p; \
- tre_char_t *wp; \
- \
- if (wide) \
- { \
- if (fg->icase) \
- { \
- wp = malloc(plen * sizeof(tre_char_t)); \
- if (wp == NULL) \
- return REG_ESPACE; \
- for (unsigned int i = 0; i < plen; i++) \
- wp[i] = towlower(pat[i]); \
- _CALC_BMGS(arr, wp, plen); \
- free(wp); \
- } \
- else \
- _CALC_BMGS(arr, pat, plen); \
- } \
- else \
- { \
- if (fg->icase) \
- { \
- p = malloc(plen); \
- if (p == NULL) \
- return REG_ESPACE; \
- for (unsigned int i = 0; i < plen; i++) \
- p[i] = tolower((unsigned char)pat[i]); \
- _CALC_BMGS(arr, p, plen); \
- free(p); \
- } \
- else \
- _CALC_BMGS(arr, pat, plen); \
- } \
- }
-
-#define _CALC_BMGS(arr, pat, plen) \
- { \
- int f = 0, g; \
- \
- int *suff = malloc(plen * sizeof(int)); \
- if (suff == NULL) \
- return REG_ESPACE; \
- \
- suff[plen - 1] = plen; \
- g = plen - 1; \
- for (int i = plen - 2; i >= 0; i--) \
- { \
- if (i > g && suff[i + plen - 1 - f] < i - g) \
- suff[i] = suff[i + plen - 1 - f]; \
- else \
- { \
- if (i < g) \
- g = i; \
- f = i; \
- while (g >= 0 && pat[g] == pat[g + plen - 1 - f]) \
- g--; \
- suff[i] = f - g; \
- } \
- } \
- \
- for (unsigned int i = 0; i < plen; i++) \
- arr[i] = plen; \
- g = 0; \
- for (int i = plen - 1; i >= 0; i--) \
- if (suff[i] == i + 1) \
- for(; (unsigned long)g < plen - 1 - i; g++) \
- if (arr[g] == plen) \
- arr[g] = plen - 1 - i; \
- for (unsigned int i = 0; i <= plen - 2; i++) \
- arr[plen - 1 - suff[i]] = plen - 1 - i; \
- \
- free(suff); \
- }
-
-/*
- * Copies the pattern pat having length n to p and stores
- * the size in l.
- */
-#define SAVE_PATTERN(src, srclen, dst, dstlen) \
- dstlen = srclen; \
- dst = malloc((dstlen + 1) * sizeof(tre_char_t)); \
- if (dst == NULL) \
- return REG_ESPACE; \
- if (dstlen > 0) \
- memcpy(dst, src, dstlen * sizeof(tre_char_t)); \
- dst[dstlen] = TRE_CHAR('\0');
-
-/*
- * Initializes pattern compiling.
- */
-#define INIT_COMP \
- /* Initialize. */ \
- memset(fg, 0, sizeof(*fg)); \
- fg->icase = (cflags & REG_ICASE); \
- fg->word = (cflags & REG_WORD); \
- fg->newline = (cflags & REG_NEWLINE); \
- fg->nosub = (cflags & REG_NOSUB); \
- \
- /* Cannot handle REG_ICASE with MB string */ \
- if (fg->icase && (TRE_MB_CUR_MAX > 1) && n > 0) \
- { \
- DPRINT(("Cannot use fast matcher for MBS with REG_ICASE\n")); \
- return REG_BADPAT; \
- }
-
-/*
- * Checks whether we have a 0-length pattern that will match
- * anything. If literal is set to false, the EOL anchor is also
- * taken into account.
- */
-#define CHECK_MATCHALL(literal) \
- if (!literal && n == 1 && pat[0] == TRE_CHAR('$')) \
- { \
- n--; \
- fg->eol = true; \
- } \
- \
- if (n == 0) \
- { \
- fg->matchall = true; \
- fg->pattern = malloc(sizeof(char)); \
- if (!fg->pattern) \
- FAIL_COMP(REG_ESPACE); \
- fg->pattern[0] = '\0'; \
- fg->wpattern = malloc(sizeof(tre_char_t)); \
- if (!fg->wpattern) \
- FAIL_COMP(REG_ESPACE); \
- fg->wpattern[0] = TRE_CHAR('\0'); \
- DPRINT(("Matching every input\n")); \
- return REG_OK; \
- }
-
-/*
- * Returns: REG_OK on success, error code otherwise
- */
-int
-tre_compile_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n,
- int cflags)
-{
- size_t hasdot = 0, whasdot = 0;
- ssize_t firstdot = -1, wfirstdot = -1;
-
- INIT_COMP;
-
- CHECK_MATCHALL(true);
-
- /* Cannot handle word boundaries with MB string */
- if (fg->word && (TRE_MB_CUR_MAX > 1))
- return REG_BADPAT;
-
-#ifdef TRE_WCHAR
- SAVE_PATTERN(pat, n, fg->wpattern, fg->wlen);
- STORE_MBS_PAT;
-#else
- SAVE_PATTERN(pat, n, fg->pattern, fg->len);
-#endif
-
- DPRINT(("tre_compile_literal: pattern: %s, len %zu, icase: %c, word: %c, "
- "newline %c\n", fg->pattern, fg->len, fg->icase ? 'y' : 'n',
- fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n'));
-
- FILL_QSBC;
- FILL_BMGS;
-#ifdef TRE_WCHAR
- FILL_QSBC_WIDE;
- FILL_BMGS_WIDE;
-#endif
-
- return REG_OK;
-}
-
-/*
- * Returns: REG_OK on success, error code otherwise
- */
-int
-tre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n,
- int cflags)
-{
- tre_char_t *tmp;
- size_t pos = 0, hasdot = 0, whasdot = 0;
- ssize_t firstdot = -1, wfirstdot = -1;
- bool escaped = false;
- bool *_escmap = NULL;
-
- INIT_COMP;
-
- /* Remove beginning-of-line character ('^'). */
- if (pat[0] == TRE_CHAR('^'))
- {
- fg->bol = true;
- n--;
- pat++;
- }
-
- CHECK_MATCHALL(false);
-
- /* Handle word-boundary matching when GNU extensions are enabled */
- if ((cflags & REG_GNU) && (n >= 14) &&
- (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) &&
- (memcmp(pat + n - 7, TRE_CHAR("[[:>:]]"),
- 7 * sizeof(tre_char_t)) == 0))
- {
- n -= 14;
- pat += 7;
- fg->word = true;
- }
-
- /* Cannot handle word boundaries with MB string */
- if (fg->word && (TRE_MB_CUR_MAX > 1))
- return REG_BADPAT;
-
- tmp = malloc((n + 1) * sizeof(tre_char_t));
- if (tmp == NULL)
- return REG_ESPACE;
-
-/* Copies the char into the stored pattern and skips to the next char. */
-#define STORE_CHAR \
- do \
- { \
- tmp[pos++] = pat[i]; \
- escaped = false; \
- continue; \
- } while (0)
-
- /* Traverse the input pattern for processing */
- for (unsigned int i = 0; i < n; i++)
- {
- switch (pat[i])
- {
- case TRE_CHAR('\\'):
- if (escaped)
- STORE_CHAR;
- else if (i == n - 1)
- goto badpat;
- else
- escaped = true;
- continue;
- case TRE_CHAR('['):
- if (escaped)
- STORE_CHAR;
- else
- goto badpat;
- continue;
- case TRE_CHAR('*'):
- if (escaped || (!(cflags & REG_EXTENDED) && (i == 0)))
- STORE_CHAR;
- else
- goto badpat;
- continue;
- case TRE_CHAR('+'):
- case TRE_CHAR('?'):
- if ((cflags & REG_EXTENDED) && (i == 0))
- goto badpat;
- else if ((cflags & REG_EXTENDED) ^ !escaped)
- STORE_CHAR;
- else
- goto badpat;
- continue;
- case TRE_CHAR('.'):
- if (escaped)
- {
- if (!_escmap)
- _escmap = calloc(n, sizeof(bool));
- if (!_escmap)
- {
- free(tmp);
- return REG_ESPACE;
- }
- _escmap[i] = true;
- STORE_CHAR;
- }
- else
- {
- whasdot = i;
- if (wfirstdot == -1)
- wfirstdot = i;
- STORE_CHAR;
- }
- continue;
- case TRE_CHAR('^'):
- STORE_CHAR;
- continue;
- case TRE_CHAR('$'):
- if (!escaped && (i == n - 1))
- fg->eol = true;
- else
- STORE_CHAR;
- continue;
- case TRE_CHAR('('):
- if ((cflags & REG_EXTENDED) ^ escaped)
- goto badpat;
- else
- STORE_CHAR;
- continue;
- case TRE_CHAR('{'):
- if (!(cflags & REG_EXTENDED) ^ escaped)
- STORE_CHAR;
- else if (!(cflags & REG_EXTENDED) && (i == 0))
- STORE_CHAR;
- else if ((cflags & REG_EXTENDED) && (i == 0))
- continue;
- else
- goto badpat;
- continue;
- case TRE_CHAR('|'):
- if ((cflags & REG_EXTENDED) ^ escaped)
- goto badpat;
- else
- STORE_CHAR;
- continue;
- default:
- if (escaped)
- goto badpat;
- else
- STORE_CHAR;
- continue;
- }
- continue;
-badpat:
- free(tmp);
- DPRINT(("tre_compile_fast: compilation of pattern failed, falling"
- "back to NFA\n"));
- return REG_BADPAT;
- }
-
- fg->hasdot = wfirstdot > -1;
-
- /*
- * The pattern has been processed and copied to tmp as a literal string
- * with escapes, anchors (^$) and the word boundary match character
- * classes stripped out.
- */
-#ifdef TRE_WCHAR
- SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen);
- fg->wescmap = _escmap;
- STORE_MBS_PAT;
-
- /*
- * The position of dots and escaped dots is different in the MB string
- * than in to the wide string so traverse the converted string, as well,
- * to store these positions.
- */
- if (fg->hasdot || (fg->wescmap != NULL))
- {
- if (fg->wescmap != NULL)
- {
- fg->escmap = calloc(fg->len, sizeof(bool));
- if (fg->escmap == NULL)
- {
- tre_free_fast(fg);
- return REG_ESPACE;
- }
- }
-
- escaped = false;
- char *_checkpat = NULL;
- size_t _checklen = 0;
- unsigned int escofs = 0;
- /*
- * Make a copy here of the original pattern, because fg->pattern has
- * already been stripped of all escape sequences in the above processing.
- * This is necessary if we wish to later treat fg->escmap as an actual,
- * functional replacement of fg->wescmap.
- */
- CONV_MBS_PAT(pat, _checkpat, _checklen);
- for (unsigned int i = 0; i < n; i++)
- if (_checkpat[i] == '\\')
- {
- escaped = !escaped;
- if (escaped)
- ++escofs;
- }
- else if (_checkpat[i] == '.' && fg->escmap != NULL && escaped)
- {
- fg->escmap[i - escofs] = true;
- escaped = false;
- }
- else if (_checkpat[i] == '.' && !escaped)
- {
- hasdot = i;
- if (firstdot == -1)
- firstdot = i;
- }
- else
- escaped = false;
- free(_checkpat);
- }
-#else
- SAVE_PATTERN(tmp, pos, fg->pattern, fg->len);
- fg->escmap = _escmap;
-#endif
-
- free(tmp);
-
- DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, "
- "icase: %c, word: %c, newline %c\n", fg->pattern, fg->len,
- fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n',
- fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n',
- fg->newline ? 'y' : 'n'));
-
- /* Check whether reverse QS algorithm is more efficient */
- if ((wfirstdot > -1) && (fg->wlen - whasdot + 1 < (size_t)wfirstdot) &&
- fg->nosub)
- {
- fg->reversed = true;
- DPRINT(("tre_compile_fast: using reverse QS algorithm\n"));
- }
-
- FILL_QSBC;
- FILL_BMGS;
-#ifdef TRE_WCHAR
- FILL_QSBC_WIDE;
- FILL_BMGS_WIDE;
-#endif
-
- return REG_OK;
-}
-
-#define _SHIFT_ONE \
- { \
- shift = 1; \
- j = !fg->reversed ? j + shift : j - shift; \
- continue; \
- }
-
-#define _BBOUND_COND \
- ((type == STR_WIDE) ? \
- ((j == 0) || !(tre_isalnum(str_wide[j - 1]) || \
- (str_wide[j - 1] == TRE_CHAR('_')))) : \
- ((j == 0) || !(tre_isalnum(str_byte[j - 1]) || \
- (str_byte[j - 1] == '_'))))
-
-#define _EBOUND_COND \
- ((type == STR_WIDE) ? \
- ((j + fg->wlen == len) || !(tre_isalnum(str_wide[j + fg->wlen]) || \
- (str_wide[j + fg->wlen] == TRE_CHAR('_')))) : \
- ((j + fg->len == len) || !(tre_isalnum(str_byte[j + fg->len]) || \
- (str_byte[j + fg->len] == '_'))))
-
-/*
- * Condition to check whether the match on position j is on a
- * word boundary.
- */
-#define IS_ON_WORD_BOUNDARY \
- (_BBOUND_COND && _EBOUND_COND)
-
-/*
- * Checks word boundary and shifts one if match is not on a
- * boundary.
- */
-#define CHECK_WORD_BOUNDARY \
- if (!IS_ON_WORD_BOUNDARY) \
- _SHIFT_ONE;
-
-#define _BOL_COND \
- ((j == 0) || ((type == STR_WIDE) ? (str_wide[j - 1] == TRE_CHAR('\n'))\
- : (str_byte[j - 1] == '\n')))
-
-/*
- * Checks BOL anchor and shifts one if match is not on a
- * boundary.
- */
-#define CHECK_BOL_ANCHOR \
- if (!_BOL_COND) \
- _SHIFT_ONE;
-
-#define _EOL_COND \
- ((type == STR_WIDE) \
- ? ((j + fg->wlen == len) || \
- (str_wide[j + fg->wlen] == TRE_CHAR('\n'))) \
- : ((j + fg->len == len) || (str_byte[j + fg->wlen] == '\n')))
-
-/*
- * Checks EOL anchor and shifts one if match is not on a
- * boundary.
- */
-#define CHECK_EOL_ANCHOR \
- if (!_EOL_COND) \
- _SHIFT_ONE;
-
-/*
- * Executes matching of the precompiled pattern on the input string.
- * Returns REG_OK or REG_NOMATCH depending on if we find a match or not.
- */
-int
-tre_match_fast(const fastmatch_t *fg, const void *data, size_t len,
- tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags)
-{
- unsigned int shift, u = 0, v = 0;
- ssize_t j = 0;
- int ret = REG_NOMATCH;
- int mismatch;
- const char *str_byte = data;
- const void *startptr = NULL;
- const tre_char_t *str_wide = data;
-
- /* Calculate length if unspecified. */
- if (len == (size_t)-1)
- switch (type)
- {
- case STR_WIDE:
- len = tre_strlen(str_wide);
- break;
- default:
- len = strlen(str_byte);
- break;
- }
-
- /* Shortcut for empty pattern */
- if (fg->matchall)
- {
- if (!fg->nosub && nmatch >= 1)
- {
- pmatch[0].rm_so = 0;
- pmatch[0].rm_eo = len;
- }
- if (fg->bol && fg->eol)
- return (len == 0) ? REG_OK : REG_NOMATCH;
- else
- return REG_OK;
- }
-
- /* No point in going farther if we do not have enough data. */
- switch (type)
- {
- case STR_WIDE:
- if (len < fg->wlen)
- return ret;
- shift = fg->wlen;
- break;
- default:
- if (len < fg->len)
- return ret;
- shift = fg->len;
- }
-
- /*
- * REG_NOTBOL means not anchoring ^ to the beginning of the line, so we
- * can shift one because there can't be a match at the beginning.
- */
- if (fg->bol && (eflags & REG_NOTBOL))
- j = 1;
-
- /*
- * Like above, we cannot have a match at the very end when anchoring to
- * the end and REG_NOTEOL is specified.
- */
- if (fg->eol && (eflags & REG_NOTEOL))
- len--;
-
- if (fg->reversed)
- j = len - (type == STR_WIDE ? fg->wlen : fg->len);
-
-
- /* Only try once at the beginning or ending of the line. */
- if ((fg->bol || fg->eol) && !fg->newline && !(eflags & REG_NOTBOL) &&
- !(eflags & REG_NOTEOL))
- {
- /* Simple text comparison. */
- if (!((fg->bol && fg->eol) &&
- (type == STR_WIDE ? (len != fg->wlen) : (len != fg->len))))
- {
- /* Determine where in data to start search at. */
- j = fg->eol ? len - (type == STR_WIDE ? fg->wlen : fg->len) : 0;
- SKIP_CHARS(j);
- mismatch = fastcmp(fg, startptr, type);
- if (mismatch == REG_OK)
- {
- if (fg->word && !IS_ON_WORD_BOUNDARY)
- return ret;
- if (!fg->nosub && nmatch >= 1)
- {
- pmatch[0].rm_so = j;
- pmatch[0].rm_eo = j + (type == STR_WIDE ? fg->wlen : fg->len);
- }
- return REG_OK;
- }
- }
- }
- else
- {
- /* Quick Search / Turbo Boyer-Moore algorithm. */
- do
- {
- SKIP_CHARS(j);
- mismatch = fastcmp(fg, startptr, type);
- if (mismatch == REG_OK)
- {
- if (fg->word)
- CHECK_WORD_BOUNDARY;
- if (fg->bol)
- CHECK_BOL_ANCHOR;
- if (fg->eol)
- CHECK_EOL_ANCHOR;
- if (!fg->nosub && nmatch >= 1)
- {
- pmatch[0].rm_so = j;
- pmatch[0].rm_eo = j + ((type == STR_WIDE) ? fg->wlen : fg->len);
- }
- return REG_OK;
- }
- else if (mismatch > 0)
- return mismatch;
- mismatch = -mismatch - 1;
- SHIFT;
- } while (!IS_OUT_OF_BOUNDS);
- }
- return ret;
-}
-
-/*
- * Frees the resources that were allocated when the pattern was compiled.
- */
-void
-tre_free_fast(fastmatch_t *fg)
-{
-
- DPRINT(("tre_fast_free: freeing structures for pattern %s\n",
- fg->pattern));
-
-#ifdef TRE_WCHAR
- hashtable_free(fg->qsBc_table);
- if (!fg->hasdot)
- free(fg->bmGs);
- if (fg->wescmap)
- free(fg->wescmap);
- free(fg->wpattern);
-#endif
- if (!fg->hasdot)
- free(fg->sbmGs);
- if (fg->escmap)
- free(fg->escmap);
- free(fg->pattern);
-}
-
-/*
- * Returns: -(i + 1) on failure (position that it failed with minus sign)
- * error code on error
- * REG_OK on success
- */
-static inline int
-fastcmp(const fastmatch_t *fg, const void *data, tre_str_type_t type)
-{
- const char *str_byte = data;
- const char *pat_byte = fg->pattern;
- const tre_char_t *str_wide = data;
- const tre_char_t *pat_wide = fg->wpattern;
- const bool *escmap = (type == STR_WIDE) ? fg->wescmap : fg->escmap;
- size_t len = (type == STR_WIDE) ? fg->wlen : fg->len;
- int ret = REG_OK;
-
- /* Compare the pattern and the input char-by-char from the last position. */
- for (int i = len - 1; i >= 0; i--) {
- switch (type)
- {
- case STR_WIDE:
-
- /* Check dot */
- if (fg->hasdot && pat_wide[i] == TRE_CHAR('.') &&
- (!escmap || !escmap[i]) &&
- (!fg->newline || (str_wide[i] != TRE_CHAR('\n'))))
- continue;
-
- /* Compare */
- if (fg->icase ? (towlower(pat_wide[i]) == towlower(str_wide[i]))
- : (pat_wide[i] == str_wide[i]))
- continue;
- break;
- default:
- /* Check dot */
- if (fg->hasdot && pat_byte[i] == '.' &&
- (!escmap || !escmap[i]) &&
- (!fg->newline || (str_byte[i] != '\n')))
- continue;
-
- /* Compare */
- if (fg->icase ? (tolower((unsigned char)pat_byte[i]) == tolower((unsigned char)str_byte[i]))
- : (pat_byte[i] == str_byte[i]))
- continue;
- }
- DPRINT(("fastcmp: mismatch at position %d\n", i));
- ret = -(i + 1);
- break;
- }
- return ret;
-}
diff --git a/usr.bin/grep/regex/tre-fastmatch.h b/usr.bin/grep/regex/tre-fastmatch.h
deleted file mode 100644
index f72397c58c339..0000000000000
--- a/usr.bin/grep/regex/tre-fastmatch.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* $FreeBSD$ */
-
-#ifndef TRE_FASTMATCH_H
-#define TRE_FASTMATCH_H 1
-
-#include <fastmatch.h>
-#include <hashtable.h>
-#include <limits.h>
-#include <regex.h>
-#include <stdbool.h>
-
-#include "hashtable.h"
-
-int tre_compile_literal(fastmatch_t *preg, const tre_char_t *regex,
- size_t, int);
-int tre_compile_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int);
-int tre_match_fast(const fastmatch_t *fg, const void *data, size_t len,
- tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags);
-void tre_free_fast(fastmatch_t *preg);
-
-#endif /* TRE_FASTMATCH_H */
diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c
index 695853a6be2f1..e53558e5e0bac 100644
--- a/usr.bin/grep/util.c
+++ b/usr.bin/grep/util.c
@@ -52,9 +52,6 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include <wctype.h>
-#ifndef WITHOUT_FASTMATCH
-#include "fastmatch.h"
-#endif
#include "grep.h"
static bool first_match = true;
@@ -512,14 +509,8 @@ procline(struct parsec *pc)
r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
else
#endif
-#ifndef WITHOUT_FASTMATCH
- if (fg_pattern[i].pattern)
- r = fastexec(&fg_pattern[i],
- pc->ln.dat, 1, &pmatch, leflags);
- else
-#endif
- r = regexec(&r_pattern[i], pc->ln.dat, 1,
- &pmatch, leflags);
+ r = regexec(&r_pattern[i], pc->ln.dat, 1, &pmatch,
+ leflags);
if (r != 0)
continue;
/* Check for full match */
@@ -527,11 +518,7 @@ procline(struct parsec *pc)
(size_t)pmatch.rm_eo != pc->ln.len))
continue;
/* Check for whole word match */
-#ifndef WITHOUT_FASTMATCH
- if (wflag || fg_pattern[i].word) {
-#else
if (wflag) {
-#endif
wbegin = wend = L' ';
if (pmatch.rm_so != 0 &&
sscanf(&pc->ln.dat[pmatch.rm_so - 1],