diff options
Diffstat (limited to 'gnu/usr.bin/grep/dfa.c')
-rw-r--r-- | gnu/usr.bin/grep/dfa.c | 247 |
1 files changed, 93 insertions, 154 deletions
diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c index 40a926b35d874..368348dc17630 100644 --- a/gnu/usr.bin/grep/dfa.c +++ b/gnu/usr.bin/grep/dfa.c @@ -77,6 +77,16 @@ extern void free(); #define ISCNTRL(C) (isascii(C) && iscntrl(C)) #endif +/* ISASCIIDIGIT differs from ISDIGIT, as follows: + - Its arg may be any int or unsigned int; it need not be an unsigned char. + - It's guaranteed to evaluate its argument exactly once. + - It's typically faster. + Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that + only '0' through '9' are digits. Prefer ISASCIIDIGIT to ISDIGIT unless + it's important to use the locale's definition of `digit' even when the + host does not conform to Posix. */ +#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9) + /* If we (don't) have I18N. */ /* glibc defines _ */ #ifndef _ @@ -145,9 +155,7 @@ static char **addlists PARAMS ((char **old, char **new)); static char **inboth PARAMS ((char **left, char **right)); static ptr_t -xcalloc(n, s) - size_t n; - size_t s; +xcalloc (size_t n, size_t s) { ptr_t r = calloc(n, s); @@ -157,8 +165,7 @@ xcalloc(n, s) } static ptr_t -xmalloc(n) - size_t n; +xmalloc (size_t n) { ptr_t r = malloc(n); @@ -169,9 +176,7 @@ xmalloc(n) } static ptr_t -xrealloc(p, n) - ptr_t p; - size_t n; +xrealloc (ptr_t p, size_t n) { ptr_t r = realloc(p, n); @@ -197,8 +202,7 @@ xrealloc(p, n) #ifdef DEBUG static void -prtok(t) - token t; +prtok (token t) { char *s; @@ -236,33 +240,25 @@ prtok(t) /* Stuff pertaining to charclasses. */ static int -tstbit(b, c) - int b; - charclass c; +tstbit (int b, charclass c) { return c[b / INTBITS] & 1 << b % INTBITS; } static void -setbit(b, c) - int b; - charclass c; +setbit (int b, charclass c) { c[b / INTBITS] |= 1 << b % INTBITS; } static void -clrbit(b, c) - int b; - charclass c; +clrbit (int b, charclass c) { c[b / INTBITS] &= ~(1 << b % INTBITS); } static void -copyset(src, dst) - charclass src; - charclass dst; +copyset (charclass src, charclass dst) { int i; @@ -271,8 +267,7 @@ copyset(src, dst) } static void -zeroset(s) - charclass s; +zeroset (charclass s) { int i; @@ -281,8 +276,7 @@ zeroset(s) } static void -notset(s) - charclass s; +notset (charclass s) { int i; @@ -291,9 +285,7 @@ notset(s) } static int -equal(s1, s2) - charclass s1; - charclass s2; +equal (charclass s1, charclass s2) { int i; @@ -308,8 +300,7 @@ static struct dfa *dfa; /* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */ static int -charclass_index(s) - charclass s; +charclass_index (charclass s) { int i; @@ -333,10 +324,7 @@ static unsigned char eolbyte; /* Entry point to set syntax options. */ void -dfasyntax(bits, fold, eol) - reg_syntax_t bits; - int fold; - int eol; +dfasyntax (reg_syntax_t bits, int fold, int eol) { syntax_bits_set = 1; syntax_bits = bits; @@ -362,10 +350,12 @@ static int minrep, maxrep; /* Repeat counts for {m,n}. */ #define FETCH(c, eoferr) \ { \ if (! lexleft) \ - if (eoferr != 0) \ - dfaerror(eoferr); \ - else \ - return lasttok = END; \ + { \ + if (eoferr != 0) \ + dfaerror (eoferr); \ + else \ + return lasttok = END; \ + } \ (c) = (unsigned char) *lexptr++; \ --lexleft; \ } @@ -388,8 +378,8 @@ FUNC(is_print, ISPRINT) FUNC(is_graph, ISGRAPH) FUNC(is_cntrl, ISCNTRL) -static int is_blank(c) -int c; +static int +is_blank (int c) { return (c == ' ' || c == '\t'); } @@ -420,8 +410,7 @@ static struct { #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_') static int -looking_at(s) - const char *s; +looking_at (char const *s) { size_t len; @@ -432,12 +421,14 @@ looking_at(s) } static token -lex() +lex (void) { token c, c1, c2; int backslash = 0, invert; charclass ccl; int i; + char lo[2]; + char hi[2]; /* Basic plan: We fetch a character. If it's a backslash, we set the backslash flag and go through the loop again. @@ -570,10 +561,10 @@ lex() int lo = -1, hi = -1; char const *p = lexptr; char const *lim = p + lexleft; - for (; p != lim && ISDIGIT (*p); p++) + for (; p != lim && ISASCIIDIGIT (*p); p++) lo = (lo < 0 ? 0 : lo * 10) + *p - '0'; if (p != lim && *p == ',') - while (++p != lim && ISDIGIT (*p)) + while (++p != lim && ISASCIIDIGIT (*p)) hi = (hi < 0 ? 0 : hi * 10) + *p - '0'; else hi = lo; @@ -588,13 +579,13 @@ lex() {M,} - minimum count, maximum is infinity {M,N} - M through N */ FETCH(c, _("unfinished repeat count")); - if (ISDIGIT(c)) + if (ISASCIIDIGIT (c)) { minrep = c - '0'; for (;;) { FETCH(c, _("unfinished repeat count")); - if (!ISDIGIT(c)) + if (! ISASCIIDIGIT (c)) break; minrep = 10 * minrep + c - '0'; } @@ -604,7 +595,7 @@ lex() if (c == ',') { FETCH (c, _("unfinished repeat count")); - if (! ISDIGIT (c)) + if (! ISASCIIDIGIT (c)) maxrep = -1; else { @@ -612,7 +603,7 @@ lex() for (;;) { FETCH (c, _("unfinished repeat count")); - if (! ISDIGIT (c)) + if (! ISASCIIDIGIT (c)) break; maxrep = 10 * maxrep + c - '0'; } @@ -751,16 +742,26 @@ lex() } else c2 = c; - while (c <= c2) + + lo[0] = c; lo[1] = '\0'; + hi[0] = c2; hi[1] = '\0'; + for (c = 0; c < NOTCHAR; c++) { - setbit(c, ccl); - if (case_fold) - if (ISUPPER(c)) - setbit(tolower(c), ccl); - else if (ISLOWER(c)) - setbit(toupper(c), ccl); - ++c; + char ch[2]; + ch[0] = c; ch[1] = '\0'; + if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0) + { + setbit (c, ccl); + if (case_fold) + { + if (ISUPPER (c)) + setbit (tolower (c), ccl); + else if (ISLOWER (c)) + setbit (toupper (c), ccl); + } + } } + skip: ; } @@ -809,8 +810,7 @@ static int depth; /* Current depth of a hypothetical stack /* Add the given token to the parse tree, maintaining the depth count and updating the maximum depth if necessary. */ static void -addtok(t) - token t; +addtok (token t) { REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex); dfa->tokens[dfa->tindex++] = t; @@ -869,7 +869,7 @@ addtok(t) The parser builds a parse tree in postfix form in an array of tokens. */ static void -atom() +atom (void) { if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD @@ -892,8 +892,7 @@ atom() /* Return the number of tokens in the given subexpression. */ static int -nsubtoks(tindex) -int tindex; +nsubtoks (int tindex) { int ntoks1; @@ -915,8 +914,7 @@ int tindex; /* Copy the given subexpression to the top of the tree. */ static void -copytoks(tindex, ntokens) - int tindex, ntokens; +copytoks (int tindex, int ntokens) { int i; @@ -925,7 +923,7 @@ copytoks(tindex, ntokens) } static void -closure() +closure (void) { int tindex, ntokens, i; @@ -960,7 +958,7 @@ closure() } static void -branch() +branch (void) { closure(); while (tok != RPAREN && tok != OR && tok >= 0) @@ -971,8 +969,7 @@ branch() } static void -regexp(toplevel) - int toplevel; +regexp (int toplevel) { branch(); while (tok == OR) @@ -990,11 +987,7 @@ regexp(toplevel) length of the string, so s can include NUL characters. D is a pointer to the struct dfa to parse into. */ void -dfaparse(s, len, d) - char *s; - size_t len; - struct dfa *d; - +dfaparse (char *s, size_t len, struct dfa *d) { dfa = d; lexstart = lexptr = s; @@ -1027,9 +1020,7 @@ dfaparse(s, len, d) /* Copy one set to another; the destination must be large enough. */ static void -copy(src, dst) - position_set *src; - position_set *dst; +copy (position_set *src, position_set *dst) { int i; @@ -1043,9 +1034,7 @@ copy(src, dst) the same index then their constraints are logically or'd together. S->elems must point to an array large enough to hold the resulting set. */ static void -insert(p, s) - position p; - position_set *s; +insert (position p, position_set *s) { int i; position t1, t2; @@ -1070,10 +1059,7 @@ insert(p, s) /* Merge two sets of positions into a third. The result is exactly as if the positions of both sets were inserted into an initially empty set. */ static void -merge(s1, s2, m) - position_set *s1; - position_set *s2; - position_set *m; +merge (position_set *s1, position_set *s2, position_set *m) { int i = 0, j = 0; @@ -1096,9 +1082,7 @@ merge(s1, s2, m) /* Delete a position from a set. */ static void -delete(p, s) - position p; - position_set *s; +delete (position p, position_set *s) { int i; @@ -1115,11 +1099,7 @@ delete(p, s) state. Newline and letter tell whether we got here on a newline or letter, respectively. */ static int -state_index(d, s, newline, letter) - struct dfa *d; - position_set *s; - int newline; - int letter; +state_index (struct dfa *d, position_set *s, int newline, int letter) { int hash = 0; int constraint; @@ -1184,12 +1164,8 @@ state_index(d, s, newline, letter) that position with the elements of its follow labeled with an appropriate constraint. Repeat exhaustively until no funny positions are left. S->elems must be large enough to hold the result. */ -static void epsclosure PARAMS ((position_set *s, struct dfa *d)); - static void -epsclosure(s, d) - position_set *s; - struct dfa *d; +epsclosure (position_set *s, struct dfa *d) { int i, j; int *visited; @@ -1301,9 +1277,7 @@ epsclosure(s, d) scheme; the number of elements in each set deeper in the stack can be used to determine the address of a particular set's array. */ void -dfaanalyze(d, searchflag) - struct dfa *d; - int searchflag; +dfaanalyze (struct dfa *d, int searchflag) { int *nullable; /* Nullable stack. */ int *nfirstpos; /* Element count stack for firstpos sets. */ @@ -1564,10 +1538,7 @@ dfaanalyze(d, searchflag) create a new group labeled with the characters of C and insert this position in that group. */ void -dfastate(s, d, trans) - int s; - struct dfa *d; - int trans[]; +dfastate (int s, struct dfa *d, int trans[]) { position_set grps[NOTCHAR]; /* As many as will ever be needed. */ charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */ @@ -1809,9 +1780,7 @@ dfastate(s, d, trans) TODO: Improve this comment, get rid of the unnecessary redundancy. */ static void -build_state(s, d) - int s; - struct dfa *d; +build_state (int s, struct dfa *d) { int *trans; /* The new transition table. */ int i; @@ -1887,8 +1856,7 @@ build_state(s, d) } static void -build_state_zero(d) - struct dfa *d; +build_state_zero (struct dfa *d) { d->tralloc = 1; d->trcount = 0; @@ -1914,13 +1882,8 @@ build_state_zero(d) match needs to be verified by a backtracking matcher. Otherwise we store a 0 in *backref. */ char * -dfaexec(d, begin, end, newline, count, backref) - struct dfa *d; - char *begin; - char *end; - int newline; - int *count; - int *backref; +dfaexec (struct dfa *d, char *begin, char *end, + int newline, int *count, int *backref) { register int s, s1, tmp; /* Current state. */ register unsigned char *p; /* Current input character. */ @@ -2001,8 +1964,7 @@ dfaexec(d, begin, end, newline, count, backref) /* Initialize the components of a dfa that the other routines don't initialize for themselves. */ void -dfainit(d) - struct dfa *d; +dfainit (struct dfa *d) { d->calloc = 1; MALLOC(d->charclasses, charclass, d->calloc); @@ -2020,11 +1982,7 @@ dfainit(d) /* Parse and analyze a single string of the given length. */ void -dfacomp(s, len, d, searchflag) - char *s; - size_t len; - struct dfa *d; - int searchflag; +dfacomp (char *s, size_t len, struct dfa *d, int searchflag) { if (case_fold) /* dummy folding in service of dfamust() */ { @@ -2063,8 +2021,7 @@ dfacomp(s, len, d, searchflag) /* Free the storage held by the components of a dfa. */ void -dfafree(d) - struct dfa *d; +dfafree (struct dfa *d) { int i; struct dfamust *dm, *ndm; @@ -2176,9 +2133,7 @@ dfafree(d) 'psi|epsilon' is likelier)? */ static char * -icatalloc(old, new) - char *old; - char *new; +icatalloc (char *old, char *new) { char *result; size_t oldsize, newsize; @@ -2199,16 +2154,13 @@ icatalloc(old, new) } static char * -icpyalloc(string) - char *string; +icpyalloc (char *string) { return icatalloc((char *) NULL, string); } static char * -istrstr(lookin, lookfor) - char *lookin; - char *lookfor; +istrstr (char *lookin, char *lookfor) { char *cp; size_t len; @@ -2221,16 +2173,14 @@ istrstr(lookin, lookfor) } static void -ifree(cp) - char *cp; +ifree (char *cp) { if (cp != NULL) free(cp); } static void -freelist(cpp) - char **cpp; +freelist (char **cpp) { int i; @@ -2244,10 +2194,7 @@ freelist(cpp) } static char ** -enlist(cpp, new, len) - char **cpp; - char *new; - size_t len; +enlist (char **cpp, char *new, size_t len) { int i, j; @@ -2292,9 +2239,7 @@ enlist(cpp, new, len) list of their distinct common substrings. Return NULL if something seems wild. */ static char ** -comsubs(left, right) - char *left; - char *right; +comsubs (char *left, char *right) { char **cpp; char *lcp; @@ -2328,9 +2273,7 @@ comsubs(left, right) } static char ** -addlists(old, new) -char **old; -char **new; +addlists (char **old, char **new) { int i; @@ -2348,9 +2291,7 @@ char **new; /* Given two lists of substrings, return a new list giving substrings common to both. */ static char ** -inboth(left, right) - char **left; - char **right; +inboth (char **left, char **right) { char **both; char **temp; @@ -2391,16 +2332,14 @@ typedef struct } must; static void -resetmust(mp) -must *mp; +resetmust (must *mp) { mp->left[0] = mp->right[0] = mp->is[0] = '\0'; freelist(mp->in); } static void -dfamust(dfa) -struct dfa *dfa; +dfamust (struct dfa *dfa) { must *musts; must *mp; |