summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJung-uk Kim <jkim@FreeBSD.org>2013-05-16 23:23:37 +0000
committerJung-uk Kim <jkim@FreeBSD.org>2013-05-16 23:23:37 +0000
commit825c908da4b8bbcc99f7db3f94b9fb11b0938476 (patch)
tree85f93809ea3f8e552387079a13d6de4a55641504
parentf90f2cd13ef160917740165ec7fe34eb4d7f1b14 (diff)
Notes
-rw-r--r--posix/regex.h593
-rw-r--r--regcomp.c903
-rw-r--r--regex.c39
-rw-r--r--regex.h600
-rw-r--r--regex_internal.c684
-rw-r--r--regex_internal.h209
-rw-r--r--regexec.c943
7 files changed, 1958 insertions, 2013 deletions
diff --git a/posix/regex.h b/posix/regex.h
deleted file mode 100644
index b2d9a62fec97..000000000000
--- a/posix/regex.h
+++ /dev/null
@@ -1,593 +0,0 @@
-/* Definitions for data structures and routines for the regular
- expression library.
- Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef _REGEX_H
-#define _REGEX_H 1
-
-#include <sys/types.h>
-
-/* Allow the use in C++ code. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* POSIX says that <sys/types.h> must be included (by the caller) before
- <regex.h>. */
-
-#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
-/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
- should be there. */
-# include <stddef.h>
-#endif
-
-/* The following two types have to be signed and unsigned integer type
- wide enough to hold a value of a pointer. For most ANSI compilers
- ptrdiff_t and size_t should be likely OK. Still size of these two
- types is 2 for Microsoft C. Ugh... */
-typedef long int s_reg_t;
-typedef unsigned long int active_reg_t;
-
-/* The following bits are used to determine the regexp syntax we
- recognize. The set/not-set meanings are chosen so that Emacs syntax
- remains the value 0. The bits are given in alphabetical order, and
- the definitions shifted by one from the previous bit; thus, when we
- add or remove a bit, only one other definition need change. */
-typedef unsigned long int reg_syntax_t;
-
-/* If this bit is not set, then \ inside a bracket expression is literal.
- If set, then such a \ quotes the following character. */
-#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
-
-/* If this bit is not set, then + and ? are operators, and \+ and \? are
- literals.
- If set, then \+ and \? are operators and + and ? are literals. */
-#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
-
-/* If this bit is set, then character classes are supported. They are:
- [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
- [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
- If not set, then character classes are not supported. */
-#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
-
-/* If this bit is set, then ^ and $ are always anchors (outside bracket
- expressions, of course).
- If this bit is not set, then it depends:
- ^ is an anchor if it is at the beginning of a regular
- expression or after an open-group or an alternation operator;
- $ is an anchor if it is at the end of a regular expression, or
- before a close-group or an alternation operator.
-
- This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
- POSIX draft 11.2 says that * etc. in leading positions is undefined.
- We already implemented a previous draft which made those constructs
- invalid, though, so we haven't changed the code back. */
-#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
-
-/* If this bit is set, then special characters are always special
- regardless of where they are in the pattern.
- If this bit is not set, then special characters are special only in
- some contexts; otherwise they are ordinary. Specifically,
- * + ? and intervals are only special when not after the beginning,
- open-group, or alternation operator. */
-#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
-
-/* If this bit is set, then *, +, ?, and { cannot be first in an re or
- immediately after an alternation or begin-group operator. */
-#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
-
-/* If this bit is set, then . matches newline.
- If not set, then it doesn't. */
-#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
-
-/* If this bit is set, then . doesn't match NUL.
- If not set, then it does. */
-#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
-
-/* If this bit is set, nonmatching lists [^...] do not match newline.
- If not set, they do. */
-#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
-
-/* If this bit is set, either \{...\} or {...} defines an
- interval, depending on RE_NO_BK_BRACES.
- If not set, \{, \}, {, and } are literals. */
-#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
-
-/* If this bit is set, +, ? and | aren't recognized as operators.
- If not set, they are. */
-#define RE_LIMITED_OPS (RE_INTERVALS << 1)
-
-/* If this bit is set, newline is an alternation operator.
- If not set, newline is literal. */
-#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
-
-/* If this bit is set, then `{...}' defines an interval, and \{ and \}
- are literals.
- If not set, then `\{...\}' defines an interval. */
-#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
-
-/* If this bit is set, (...) defines a group, and \( and \) are literals.
- If not set, \(...\) defines a group, and ( and ) are literals. */
-#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
-
-/* If this bit is set, then \<digit> matches <digit>.
- If not set, then \<digit> is a back-reference. */
-#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
-
-/* If this bit is set, then | is an alternation operator, and \| is literal.
- If not set, then \| is an alternation operator, and | is literal. */
-#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
-
-/* If this bit is set, then an ending range point collating higher
- than the starting range point, as in [z-a], is invalid.
- If not set, then when ending range point collates higher than the
- starting range point, the range is ignored. */
-#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
-
-/* If this bit is set, then an unmatched ) is ordinary.
- If not set, then an unmatched ) is invalid. */
-#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
-
-/* If this bit is set, succeed as soon as we match the whole pattern,
- without further backtracking. */
-#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
-
-/* If this bit is set, do not process the GNU regex operators.
- If not set, then the GNU regex operators are recognized. */
-#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
-
-/* If this bit is set, turn on internal regex debugging.
- If not set, and debugging was on, turn it off.
- This only works if regex.c is compiled -DDEBUG.
- We define this bit always, so that all that's needed to turn on
- debugging is to recompile regex.c; the calling code can always have
- this bit set, and it won't affect anything in the normal case. */
-#define RE_DEBUG (RE_NO_GNU_OPS << 1)
-
-/* If this bit is set, a syntactically invalid interval is treated as
- a string of ordinary characters. For example, the ERE 'a{1' is
- treated as 'a\{1'. */
-#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
-
-/* If this bit is set, then ignore case when matching.
- If not set, then case is significant. */
-#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
-
-/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
- for ^, because it is difficult to scan the regex backwards to find
- whether ^ should be special. */
-#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
-
-/* If this bit is set, then \{ cannot be first in an bre or
- immediately after an alternation or begin-group operator. */
-#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
-
-/* If this bit is set, then no_sub will be set to 1 during
- re_compile_pattern. */
-#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
-
-/* This global variable defines the particular regexp syntax to use (for
- some interfaces). When a regexp is compiled, the syntax used is
- stored in the pattern buffer, so changing this does not affect
- already-compiled regexps. */
-extern reg_syntax_t re_syntax_options;
-
-/* Define combinations of the above bits for the standard possibilities.
- (The [[[ comments delimit what gets put into the Texinfo file, so
- don't delete them!) */
-/* [[[begin syntaxes]]] */
-#define RE_SYNTAX_EMACS 0
-
-#define RE_SYNTAX_AWK \
- (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
- | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
- | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
-
-#define RE_SYNTAX_GNU_AWK \
- ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
- & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
- | RE_CONTEXT_INVALID_OPS ))
-
-#define RE_SYNTAX_POSIX_AWK \
- (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
- | RE_INTERVALS | RE_NO_GNU_OPS)
-
-#define RE_SYNTAX_GREP \
- (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
- | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
- | RE_NEWLINE_ALT)
-
-#define RE_SYNTAX_EGREP \
- (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
- | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
- | RE_NO_BK_VBAR)
-
-#define RE_SYNTAX_POSIX_EGREP \
- (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
- | RE_INVALID_INTERVAL_ORD)
-
-/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
-#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
-
-#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
-
-/* Syntax bits common to both basic and extended POSIX regex syntax. */
-#define _RE_SYNTAX_POSIX_COMMON \
- (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
- | RE_INTERVALS | RE_NO_EMPTY_RANGES)
-
-#define RE_SYNTAX_POSIX_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
-
-/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
- RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
- isn't minimal, since other operators, such as \`, aren't disabled. */
-#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
-
-#define RE_SYNTAX_POSIX_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
- | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
-
-/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
- removed and RE_NO_BK_REFS is added. */
-#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
-/* [[[end syntaxes]]] */
-
-/* Maximum number of duplicates an interval can allow. Some systems
- (erroneously) define this in other header files, but we want our
- value, so remove any previous define. */
-#ifdef RE_DUP_MAX
-# undef RE_DUP_MAX
-#endif
-/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
-#define RE_DUP_MAX (0x7fff)
-
-
-/* POSIX `cflags' bits (i.e., information for `regcomp'). */
-
-/* If this bit is set, then use extended regular expression syntax.
- If not set, then use basic regular expression syntax. */
-#define REG_EXTENDED 1
-
-/* If this bit is set, then ignore case when matching.
- If not set, then case is significant. */
-#define REG_ICASE (REG_EXTENDED << 1)
-
-/* If this bit is set, then anchors do not match at newline
- characters in the string.
- If not set, then anchors do match at newlines. */
-#define REG_NEWLINE (REG_ICASE << 1)
-
-/* If this bit is set, then report only success or fail in regexec.
- If not set, then returns differ between not matching and errors. */
-#define REG_NOSUB (REG_NEWLINE << 1)
-
-
-/* POSIX `eflags' bits (i.e., information for regexec). */
-
-/* If this bit is set, then the beginning-of-line operator doesn't match
- the beginning of the string (presumably because it's not the
- beginning of a line).
- If not set, then the beginning-of-line operator does match the
- beginning of the string. */
-#define REG_NOTBOL 1
-
-/* Like REG_NOTBOL, except for the end-of-line. */
-#define REG_NOTEOL (1 << 1)
-
-/* Use PMATCH[0] to delimit the start and end of the search in the
- buffer. */
-#define REG_STARTEND (1 << 2)
-
-
-/* If any error codes are removed, changed, or added, update the
- `re_error_msg' table in regex.c. */
-typedef enum
-{
-#ifdef _XOPEN_SOURCE
- REG_ENOSYS = -1, /* This will never happen for this implementation. */
-#endif
-
- REG_NOERROR = 0, /* Success. */
- REG_NOMATCH, /* Didn't find a match (for regexec). */
-
- /* POSIX regcomp return error codes. (In the order listed in the
- standard.) */
- REG_BADPAT, /* Invalid pattern. */
- REG_ECOLLATE, /* Inalid collating element. */
- REG_ECTYPE, /* Invalid character class name. */
- REG_EESCAPE, /* Trailing backslash. */
- REG_ESUBREG, /* Invalid back reference. */
- REG_EBRACK, /* Unmatched left bracket. */
- REG_EPAREN, /* Parenthesis imbalance. */
- REG_EBRACE, /* Unmatched \{. */
- REG_BADBR, /* Invalid contents of \{\}. */
- REG_ERANGE, /* Invalid range end. */
- REG_ESPACE, /* Ran out of memory. */
- REG_BADRPT, /* No preceding re for repetition op. */
-
- /* Error codes we've added. */
- REG_EEND, /* Premature end. */
- REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
- REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
-} reg_errcode_t;
-
-/* This data structure represents a compiled pattern. Before calling
- the pattern compiler, the fields `buffer', `allocated', `fastmap',
- `translate', and `no_sub' can be set. After the pattern has been
- compiled, the `re_nsub' field is available. All other fields are
- private to the regex routines. */
-
-#ifndef RE_TRANSLATE_TYPE
-# define RE_TRANSLATE_TYPE char *
-#endif
-
-struct re_pattern_buffer
-{
-/* [[[begin pattern_buffer]]] */
- /* Space that holds the compiled pattern. It is declared as
- `unsigned char *' because its elements are
- sometimes used as array indexes. */
- unsigned char *buffer;
-
- /* Number of bytes to which `buffer' points. */
- unsigned long int allocated;
-
- /* Number of bytes actually used in `buffer'. */
- unsigned long int used;
-
- /* Syntax setting with which the pattern was compiled. */
- reg_syntax_t syntax;
-
- /* Pointer to a fastmap, if any, otherwise zero. re_search uses
- the fastmap, if there is one, to skip over impossible
- starting points for matches. */
- char *fastmap;
-
- /* Either a translate table to apply to all characters before
- comparing them, or zero for no translation. The translation
- is applied to a pattern when it is compiled and to a string
- when it is matched. */
- RE_TRANSLATE_TYPE translate;
-
- /* Number of subexpressions found by the compiler. */
- size_t re_nsub;
-
- /* Zero if this pattern cannot match the empty string, one else.
- Well, in truth it's used only in `re_search_2', to see
- whether or not we should use the fastmap, so we don't set
- this absolutely perfectly; see `re_compile_fastmap' (the
- `duplicate' case). */
- unsigned can_be_null : 1;
-
- /* If REGS_UNALLOCATED, allocate space in the `regs' structure
- for `max (RE_NREGS, re_nsub + 1)' groups.
- If REGS_REALLOCATE, reallocate space if necessary.
- If REGS_FIXED, use what's there. */
-#define REGS_UNALLOCATED 0
-#define REGS_REALLOCATE 1
-#define REGS_FIXED 2
- unsigned regs_allocated : 2;
-
- /* Set to zero when `regex_compile' compiles a pattern; set to one
- by `re_compile_fastmap' if it updates the fastmap. */
- unsigned fastmap_accurate : 1;
-
- /* If set, `re_match_2' does not return information about
- subexpressions. */
- unsigned no_sub : 1;
-
- /* If set, a beginning-of-line anchor doesn't match at the
- beginning of the string. */
- unsigned not_bol : 1;
-
- /* Similarly for an end-of-line anchor. */
- unsigned not_eol : 1;
-
- /* If true, an anchor at a newline matches. */
- unsigned newline_anchor : 1;
-
-/* [[[end pattern_buffer]]] */
-};
-
-typedef struct re_pattern_buffer regex_t;
-
-/* Type for byte offsets within the string. POSIX mandates this. */
-typedef int regoff_t;
-
-
-/* This is the structure we store register match data in. See
- regex.texinfo for a full description of what registers match. */
-struct re_registers
-{
- unsigned num_regs;
- regoff_t *start;
- regoff_t *end;
-};
-
-
-/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
- `re_match_2' returns information about at least this many registers
- the first time a `regs' structure is passed. */
-#ifndef RE_NREGS
-# define RE_NREGS 30
-#endif
-
-
-/* POSIX specification for registers. Aside from the different names than
- `re_registers', POSIX uses an array of structures, instead of a
- structure of arrays. */
-typedef struct
-{
- regoff_t rm_so; /* Byte offset from string's start to substring's start. */
- regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
-} regmatch_t;
-
-/* Declarations for routines. */
-
-/* To avoid duplicating every routine declaration -- once with a
- prototype (if we are ANSI), and once without (if we aren't) -- we
- use the following macro to declare argument types. This
- unfortunately clutters up the declarations a bit, but I think it's
- worth it. */
-
-#if __STDC__
-
-# define _RE_ARGS(args) args
-
-#else /* not __STDC__ */
-
-# define _RE_ARGS(args) ()
-
-#endif /* not __STDC__ */
-
-/* Sets the current default syntax to SYNTAX, and return the old syntax.
- You can also simply assign to the `re_syntax_options' variable. */
-extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
-
-/* Compile the regular expression PATTERN, with length LENGTH
- and syntax given by the global `re_syntax_options', into the buffer
- BUFFER. Return NULL if successful, and an error string if not. */
-extern const char *re_compile_pattern
- _RE_ARGS ((const char *pattern, size_t length,
- struct re_pattern_buffer *buffer));
-
-
-/* Compile a fastmap for the compiled pattern in BUFFER; used to
- accelerate searches. Return 0 if successful and -2 if was an
- internal error. */
-extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
-
-
-/* Search in the string STRING (with length LENGTH) for the pattern
- compiled into BUFFER. Start searching at position START, for RANGE
- characters. Return the starting position of the match, -1 for no
- match, or -2 for an internal error. Also return register
- information in REGS (if REGS and BUFFER->no_sub are nonzero). */
-extern int re_search
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
- int length, int start, int range, struct re_registers *regs));
-
-
-/* Like `re_search', but search in the concatenation of STRING1 and
- STRING2. Also, stop searching at index START + STOP. */
-extern int re_search_2
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, int range, struct re_registers *regs, int stop));
-
-
-/* Like `re_search', but return how many characters in STRING the regexp
- in BUFFER matched, starting at position START. */
-extern int re_match
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
- int length, int start, struct re_registers *regs));
-
-
-/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
-extern int re_match_2
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, struct re_registers *regs, int stop));
-
-
-/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
- ENDS. Subsequent matches using BUFFER and REGS will use this memory
- for recording register information. STARTS and ENDS must be
- allocated with malloc, and must each be at least `NUM_REGS * sizeof
- (regoff_t)' bytes long.
-
- If NUM_REGS == 0, then subsequent matches should allocate their own
- register data.
-
- Unless this function is called, the first search or match using
- PATTERN_BUFFER will allocate its own register data, without
- freeing the old data. */
-extern void re_set_registers
- _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
- unsigned num_regs, regoff_t *starts, regoff_t *ends));
-
-#if defined _REGEX_RE_COMP || defined _LIBC
-# ifndef _CRAY
-/* 4.2 bsd compatibility. */
-extern char *re_comp _RE_ARGS ((const char *));
-extern int re_exec _RE_ARGS ((const char *));
-# endif
-#endif
-
-/* GCC 2.95 and later have "__restrict"; C99 compilers have
- "restrict", and "configure" may have defined "restrict". */
-#ifndef __restrict
-# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
-# if defined restrict || 199901L <= __STDC_VERSION__
-# define __restrict restrict
-# else
-# define __restrict
-# endif
-# endif
-#endif
-/* gcc 3.1 and up support the [restrict] syntax. */
-#ifndef __restrict_arr
-# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
-# define __restrict_arr __restrict
-# else
-# define __restrict_arr
-# endif
-#endif
-
-/* POSIX compatibility. */
-extern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
- const char *__restrict __pattern,
- int __cflags));
-
-extern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
- const char *__restrict __string, size_t __nmatch,
- regmatch_t __pmatch[__restrict_arr],
- int __eflags));
-
-extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
- char *__errbuf, size_t __errbuf_size));
-
-extern void regfree _RE_ARGS ((regex_t *__preg));
-
-
-#ifdef __cplusplus
-}
-#endif /* C++ */
-
-#endif /* regex.h */
-
-/*
-Local variables:
-make-backup-files: t
-version-control: t
-trim-versions-without-asking: nil
-End:
-*/
diff --git a/regcomp.c b/regcomp.c
index 68e2bdab92d1..e85b2351459c 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002-2007,2009,2010,2011,2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -14,17 +14,15 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
- int length, reg_syntax_t syntax);
+ size_t length, reg_syntax_t syntax);
static void re_compile_fastmap_iter (regex_t *bufp,
const re_dfastate_t *init_state,
char *fastmap);
-static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
-static void init_word_char (re_dfa_t *dfa);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
#ifdef RE_ENABLE_I18N
static void free_charset (re_charset_t *cset);
#endif /* RE_ENABLE_I18N */
@@ -34,7 +32,6 @@ static reg_errcode_t create_initial_state (re_dfa_t *dfa);
static void optimize_utf8 (re_dfa_t *dfa);
#endif
static reg_errcode_t analyze (regex_t *preg);
-static reg_errcode_t create_initial_state (re_dfa_t *dfa);
static reg_errcode_t preorder (bin_tree_t *root,
reg_errcode_t (fn (void *, bin_tree_t *)),
void *extra);
@@ -48,12 +45,8 @@ static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
-static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
- int top_clone_node, int root_node,
- unsigned int constraint);
-static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
- unsigned int constraint);
-static int search_duplicated_node (re_dfa_t *dfa, int org_node,
+static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
+static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
unsigned int constraint);
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
@@ -61,12 +54,8 @@ static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
static int fetch_number (re_string_t *input, re_token_t *token,
reg_syntax_t syntax);
-static void fetch_token (re_token_t *result, re_string_t *input,
- reg_syntax_t syntax);
static int peek_token (re_token_t *token, re_string_t *input,
- reg_syntax_t syntax);
-static int peek_token_bracket (re_token_t *token, re_string_t *input,
- reg_syntax_t syntax);
+ reg_syntax_t syntax) internal_function;
static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
reg_syntax_t syntax, reg_errcode_t *err);
static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
@@ -96,45 +85,27 @@ static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
re_string_t *regexp,
re_token_t *token);
-#ifndef _LIBC
-# ifdef RE_ENABLE_I18N
-static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
- re_charset_t *mbcset, int *range_alloc,
- bracket_elem_t *start_elem,
- bracket_elem_t *end_elem);
-static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
- re_charset_t *mbcset,
- int *coll_sym_alloc,
- const unsigned char *name);
-# else /* not RE_ENABLE_I18N */
-static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
- bracket_elem_t *start_elem,
- bracket_elem_t *end_elem);
-static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
- const unsigned char *name);
-# endif /* not RE_ENABLE_I18N */
-#endif /* not _LIBC */
#ifdef RE_ENABLE_I18N
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
re_charset_t *mbcset,
int *equiv_class_alloc,
const unsigned char *name);
-static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans,
- re_bitset_ptr_t sbcset,
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+ bitset_t sbcset,
re_charset_t *mbcset,
int *char_class_alloc,
const unsigned char *class_name,
reg_syntax_t syntax);
#else /* not RE_ENABLE_I18N */
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
const unsigned char *name);
-static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans,
- re_bitset_ptr_t sbcset,
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+ bitset_t sbcset,
const unsigned char *class_name,
reg_syntax_t syntax);
#endif /* not RE_ENABLE_I18N */
static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
- unsigned RE_TRANSLATE_TYPE trans,
+ RE_TRANSLATE_TYPE trans,
const unsigned char *class_name,
const unsigned char *extra,
int non_match, reg_errcode_t *err);
@@ -327,10 +298,8 @@ re_set_fastmap (char *fastmap, int icase, int ch)
Compile fastmap for the initial_state INIT_STATE. */
static void
-re_compile_fastmap_iter (bufp, init_state, fastmap)
- regex_t *bufp;
- const re_dfastate_t *init_state;
- char *fastmap;
+re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
+ char *fastmap)
{
re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
int node_cnt;
@@ -356,9 +325,9 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
&& dfa->nodes[node].type == CHARACTER
&& dfa->nodes[node].mb_partial)
*p++ = dfa->nodes[node].opr.c;
- memset (&state, 0, sizeof (state));
- if (mbrtowc (&wc, (const char *) buf, p - buf,
- &state) == p - buf
+ memset (&state, '\0', sizeof (state));
+ if (__mbrtowc (&wc, (const char *) buf, p - buf,
+ &state) == p - buf
&& (__wcrtomb ((char *) buf, towlower (wc), &state)
!= (size_t) -1))
re_set_fastmap (fastmap, 0, buf[0]);
@@ -367,56 +336,78 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
}
else if (type == SIMPLE_BRACKET)
{
- int i, j, ch;
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
- re_set_fastmap (fastmap, icase, ch);
+ int i, ch;
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ {
+ int j;
+ bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ if (w & ((bitset_word_t) 1 << j))
+ re_set_fastmap (fastmap, icase, ch);
+ }
}
#ifdef RE_ENABLE_I18N
else if (type == COMPLEX_BRACKET)
{
- int i;
re_charset_t *cset = dfa->nodes[node].opr.mbcset;
- if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
- || cset->nranges || cset->nchar_classes)
- {
+ int i;
+
# ifdef _LIBC
- if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+ /* See if we have to try all bytes which start multiple collation
+ elements.
+ e.g. In da_DK, we want to catch 'a' since "aa" is a valid
+ collation element, and don't catch 'b' since 'b' is
+ the only collation element which starts from 'b' (and
+ it is caught by SIMPLE_BRACKET). */
+ if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
+ && (cset->ncoll_syms || cset->nranges))
{
- /* In this case we want to catch the bytes which are
- the first byte of any collation elements.
- e.g. In da_DK, we want to catch 'a' since "aa"
- is a valid collation element, and don't catch
- 'b' since 'b' is the only collation element
- which starts from 'b'. */
- int j, ch;
const int32_t *table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (table[ch] < 0)
- re_set_fastmap (fastmap, icase, ch);
+ for (i = 0; i < SBC_MAX; ++i)
+ if (table[i] < 0)
+ re_set_fastmap (fastmap, icase, i);
}
-# else
- if (dfa->mb_cur_max > 1)
- for (i = 0; i < SBC_MAX; ++i)
- if (__btowc (i) == WEOF)
- re_set_fastmap (fastmap, icase, i);
-# endif /* not _LIBC */
+# endif /* _LIBC */
+
+ /* See if we have to start the match at all multibyte characters,
+ i.e. where we would not find an invalid sequence. This only
+ applies to multibyte character sets; for single byte character
+ sets, the SIMPLE_BRACKET again suffices. */
+ if (dfa->mb_cur_max > 1
+ && (cset->nchar_classes || cset->non_match || cset->nranges
+# ifdef _LIBC
+ || cset->nequiv_classes
+# endif /* _LIBC */
+ ))
+ {
+ unsigned char c = 0;
+ do
+ {
+ mbstate_t mbs;
+ memset (&mbs, 0, sizeof (mbs));
+ if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
+ re_set_fastmap (fastmap, false, (int) c);
+ }
+ while (++c != 0);
}
- for (i = 0; i < cset->nmbchars; ++i)
+
+ else
{
- char buf[256];
- mbstate_t state;
- memset (&state, '\0', sizeof (state));
- if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
- re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
- if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ /* ... Else catch all bytes which can start the mbchars. */
+ for (i = 0; i < cset->nmbchars; ++i)
{
- if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
- != (size_t) -1)
- re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
+ char buf[256];
+ mbstate_t state;
+ memset (&state, '\0', sizeof (state));
+ if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+ re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+ }
}
}
}
@@ -536,8 +527,8 @@ weak_alias (__regcomp, regcomp)
size_t
regerror (errcode, preg, errbuf, errbuf_size)
int errcode;
- const regex_t *preg;
- char *errbuf;
+ const regex_t *__restrict preg;
+ char *__restrict errbuf;
size_t errbuf_size;
{
const char *msg;
@@ -583,14 +574,10 @@ weak_alias (__regerror, regerror)
UTF-8 is used. Otherwise we would allocate memory just to initialize
it the same all the time. UTF-8 is the preferred encoding so this is
a worthwhile optimization. */
-static const bitset utf8_sb_map =
+static const bitset_t utf8_sb_map =
{
/* Set the first 128 bits. */
-# if UINT_MAX == 0xffffffff
- 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
-# else
-# error "Add case for new unsigned int size"
-# endif
+ [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
};
#endif
@@ -627,7 +614,7 @@ free_dfa_content (re_dfa_t *dfa)
re_dfastate_t *state = entry->array[j];
free_state (state);
}
- re_free (entry->array);
+ re_free (entry->array);
}
re_free (dfa->state_table);
#ifdef RE_ENABLE_I18N
@@ -739,11 +726,8 @@ libc_freeres_fn (free_mem)
SYNTAX indicate regular expression's syntax. */
static reg_errcode_t
-re_compile_internal (preg, pattern, length, syntax)
- regex_t *preg;
- const char * pattern;
- int length;
- reg_syntax_t syntax;
+re_compile_internal (regex_t *preg, const char * pattern, size_t length,
+ reg_syntax_t syntax)
{
reg_errcode_t err = REG_NOERROR;
re_dfa_t *dfa;
@@ -783,10 +767,13 @@ re_compile_internal (preg, pattern, length, syntax)
return err;
}
#ifdef DEBUG
+ /* Note: length+1 will not overflow since it is checked in init_dfa. */
dfa->re_str = re_malloc (char, length + 1);
strncpy (dfa->re_str, pattern, length + 1);
#endif
+ __libc_lock_init (dfa->lock);
+
err = re_string_construct (&regexp, pattern, length, preg->translate,
syntax & RE_ICASE, dfa);
if (BE (err != REG_NOERROR, 0))
@@ -838,11 +825,9 @@ re_compile_internal (preg, pattern, length, syntax)
as the initial length of some arrays. */
static reg_errcode_t
-init_dfa (dfa, pat_len)
- re_dfa_t *dfa;
- int pat_len;
+init_dfa (re_dfa_t *dfa, size_t pat_len)
{
- int table_size;
+ unsigned int table_size;
#ifndef _LIBC
char *codeset_name;
#endif
@@ -852,13 +837,15 @@ init_dfa (dfa, pat_len)
/* Force allocation of str_tree_storage the first time. */
dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+ /* Avoid overflows. */
+ if (pat_len == SIZE_MAX)
+ return REG_ESPACE;
+
dfa->nodes_alloc = pat_len + 1;
dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
- dfa->states_alloc = pat_len + 1;
-
/* table_size = 2 ^ ceil(log pat_len) */
- for (table_size = 1; table_size > 0; table_size <<= 1)
+ for (table_size = 1; ; table_size <<= 1)
if (table_size > pat_len)
break;
@@ -905,22 +892,19 @@ init_dfa (dfa, pat_len)
{
int i, j, ch;
- dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
+ dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
if (BE (dfa->sb_char == NULL, 0))
return REG_ESPACE;
- /* Clear all bits by, then set those corresponding to single
- byte chars. */
- bitset_empty (dfa->sb_char);
-
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
+ /* Set the bits corresponding to single byte chars. */
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
{
- wchar_t wch = __btowc (ch);
+ wint_t wch = __btowc (ch);
if (wch != WEOF)
- dfa->sb_char[i] |= 1 << j;
+ dfa->sb_char[i] |= (bitset_word_t) 1 << j;
# ifndef _LIBC
- if (isascii (ch) && wch != (wchar_t) ch)
+ if (isascii (ch) && wch != ch)
dfa->map_notascii = 1;
# endif
}
@@ -938,22 +922,53 @@ init_dfa (dfa, pat_len)
character used by some operators like "\<", "\>", etc. */
static void
-init_word_char (dfa)
- re_dfa_t *dfa;
+internal_function
+init_word_char (re_dfa_t *dfa)
{
- int i, j, ch;
dfa->word_ops_used = 1;
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
+ int i = 0;
+ int ch = 0;
+ if (BE (dfa->map_notascii == 0, 1))
+ {
+ if (sizeof (dfa->word_char[0]) == 8)
+ {
+ /* The extra temporaries here avoid "implicitly truncated"
+ warnings in the case when this is dead code, i.e. 32-bit. */
+ const uint64_t wc0 = UINT64_C (0x03ff000000000000);
+ const uint64_t wc1 = UINT64_C (0x07fffffe87fffffe);
+ dfa->word_char[0] = wc0;
+ dfa->word_char[1] = wc1;
+ i = 2;
+ }
+ else if (sizeof (dfa->word_char[0]) == 4)
+ {
+ dfa->word_char[0] = UINT32_C (0x00000000);
+ dfa->word_char[1] = UINT32_C (0x03ff0000);
+ dfa->word_char[2] = UINT32_C (0x87fffffe);
+ dfa->word_char[3] = UINT32_C (0x07fffffe);
+ i = 4;
+ }
+ else
+ abort ();
+ ch = 128;
+
+ if (BE (dfa->is_utf8, 1))
+ {
+ memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
+ return;
+ }
+ }
+
+ for (; i < BITSET_WORDS; ++i)
+ for (int j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
- dfa->word_char[i] |= 1 << j;
+ dfa->word_char[i] |= (bitset_word_t) 1 << j;
}
/* Free the work area which are only used while compiling. */
static void
-free_workarea_compile (preg)
- regex_t *preg;
+free_workarea_compile (regex_t *preg)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
bin_tree_storage_t *storage, *next;
@@ -972,8 +987,7 @@ free_workarea_compile (preg)
/* Create initial states for all contexts. */
static reg_errcode_t
-create_initial_state (dfa)
- re_dfa_t *dfa;
+create_initial_state (re_dfa_t *dfa)
{
int first, i;
reg_errcode_t err;
@@ -1016,7 +1030,11 @@ create_initial_state (dfa)
int dest_idx = dfa->edests[node_idx].elems[0];
if (!re_node_set_contains (&init_nodes, dest_idx))
{
- re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+ reg_errcode_t err = re_node_set_merge (&init_nodes,
+ dfa->eclosures
+ + dest_idx);
+ if (err != REG_NOERROR)
+ return err;
i = 0;
}
}
@@ -1055,8 +1073,7 @@ create_initial_state (dfa)
DFA nodes where needed. */
static void
-optimize_utf8 (dfa)
- re_dfa_t *dfa;
+optimize_utf8 (re_dfa_t *dfa)
{
int node, i, mb_chars = 0, has_period = 0;
@@ -1068,7 +1085,7 @@ optimize_utf8 (dfa)
mb_chars = 1;
break;
case ANCHOR:
- switch (dfa->nodes[node].opr.idx)
+ switch (dfa->nodes[node].opr.ctx_type)
{
case LINE_FIRST:
case LINE_LAST:
@@ -1076,13 +1093,15 @@ optimize_utf8 (dfa)
case BUF_LAST:
break;
default:
- /* Word anchors etc. cannot be handled. */
+ /* Word anchors etc. cannot be handled. It's okay to test
+ opr.ctx_type since constraints (for all DFA nodes) are
+ created by ORing one or more opr.ctx_type values. */
return;
}
break;
case OP_PERIOD:
- has_period = 1;
- break;
+ has_period = 1;
+ break;
case OP_BACK_REF:
case OP_ALT:
case END_OF_RE:
@@ -1093,8 +1112,9 @@ optimize_utf8 (dfa)
case COMPLEX_BRACKET:
return;
case SIMPLE_BRACKET:
- /* Just double check. */
- for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i)
+ /* Just double check. The non-ASCII range starts at 0x80. */
+ assert (0x80 % BITSET_WORD_BITS == 0);
+ for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
if (dfa->nodes[node].opr.sbcset[i])
return;
break;
@@ -1123,8 +1143,7 @@ optimize_utf8 (dfa)
"eclosure", and "inveclosure". */
static reg_errcode_t
-analyze (preg)
- regex_t *preg;
+analyze (regex_t *preg)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
reg_errcode_t ret;
@@ -1176,7 +1195,7 @@ analyze (preg)
{
dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
if (BE (dfa->inveclosures == NULL, 0))
- return REG_ESPACE;
+ return REG_ESPACE;
ret = calc_inveclosure (dfa);
}
@@ -1187,10 +1206,8 @@ analyze (preg)
implement parse tree visits. Instead, we use parent pointers and
some hairy code in these two functions. */
static reg_errcode_t
-postorder (root, fn, extra)
- bin_tree_t *root;
- reg_errcode_t (fn (void *, bin_tree_t *));
- void *extra;
+postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra)
{
bin_tree_t *node, *prev;
@@ -1200,16 +1217,16 @@ postorder (root, fn, extra)
if that's the only child). */
while (node->left || node->right)
if (node->left)
- node = node->left;
- else
- node = node->right;
+ node = node->left;
+ else
+ node = node->right;
do
{
reg_errcode_t err = fn (extra, node);
if (BE (err != REG_NOERROR, 0))
return err;
- if (node->parent == NULL)
+ if (node->parent == NULL)
return REG_NOERROR;
prev = node;
node = node->parent;
@@ -1221,10 +1238,8 @@ postorder (root, fn, extra)
}
static reg_errcode_t
-preorder (root, fn, extra)
- bin_tree_t *root;
- reg_errcode_t (fn (void *, bin_tree_t *));
- void *extra;
+preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra)
{
bin_tree_t *node;
@@ -1245,7 +1260,7 @@ preorder (root, fn, extra)
prev = node;
node = node->parent;
if (!node)
- return REG_NOERROR;
+ return REG_NOERROR;
}
node = node->right;
}
@@ -1256,9 +1271,7 @@ preorder (root, fn, extra)
re_search_internal to map the inner one's opr.idx to this one's. Adjust
backreferences as well. Requires a preorder visit. */
static reg_errcode_t
-optimize_subexps (extra, node)
- void *extra;
- bin_tree_t *node;
+optimize_subexps (void *extra, bin_tree_t *node)
{
re_dfa_t *dfa = (re_dfa_t *) extra;
@@ -1270,17 +1283,17 @@ optimize_subexps (extra, node)
}
else if (node->token.type == SUBEXP
- && node->left && node->left->token.type == SUBEXP)
+ && node->left && node->left->token.type == SUBEXP)
{
int other_idx = node->left->token.opr.idx;
node->left = node->left->left;
if (node->left)
- node->left->parent = node;
+ node->left->parent = node;
dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
- if (other_idx < 8 * sizeof (dfa->used_bkref_map))
- dfa->used_bkref_map &= ~(1 << other_idx);
+ if (other_idx < BITSET_WORD_BITS)
+ dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
}
return REG_NOERROR;
@@ -1289,9 +1302,7 @@ optimize_subexps (extra, node)
/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
static reg_errcode_t
-lower_subexps (extra, node)
- void *extra;
- bin_tree_t *node;
+lower_subexps (void *extra, bin_tree_t *node)
{
regex_t *preg = (regex_t *) extra;
reg_errcode_t err = REG_NOERROR;
@@ -1313,10 +1324,7 @@ lower_subexps (extra, node)
}
static bin_tree_t *
-lower_subexp (err, preg, node)
- reg_errcode_t *err;
- regex_t *preg;
- bin_tree_t *node;
+lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
bin_tree_t *body = node->left;
@@ -1328,8 +1336,9 @@ lower_subexp (err, preg, node)
very common, so we do not lose much. An example that triggers
this case is the sed "script" /\(\)/x. */
&& node->left != NULL
- && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
- || !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
+ && (node->token.opr.idx >= BITSET_WORD_BITS
+ || !(dfa->used_bkref_map
+ & ((bitset_word_t) 1 << node->token.opr.idx))))
return node->left;
/* Convert the SUBEXP node to the concatenation of an
@@ -1352,9 +1361,7 @@ lower_subexp (err, preg, node)
/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
nodes. Requires a postorder visit. */
static reg_errcode_t
-calc_first (extra, node)
- void *extra;
- bin_tree_t *node;
+calc_first (void *extra, bin_tree_t *node)
{
re_dfa_t *dfa = (re_dfa_t *) extra;
if (node->token.type == CONCAT)
@@ -1367,16 +1374,16 @@ calc_first (extra, node)
node->first = node;
node->node_idx = re_dfa_add_node (dfa, node->token);
if (BE (node->node_idx == -1, 0))
- return REG_ESPACE;
+ return REG_ESPACE;
+ if (node->token.type == ANCHOR)
+ dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
}
return REG_NOERROR;
}
/* Pass 2: compute NEXT on the tree. Preorder visit. */
static reg_errcode_t
-calc_next (extra, node)
- void *extra;
- bin_tree_t *node;
+calc_next (void *extra, bin_tree_t *node)
{
switch (node->token.type)
{
@@ -1391,7 +1398,7 @@ calc_next (extra, node)
if (node->left)
node->left->next = node->next;
if (node->right)
- node->right->next = node->next;
+ node->right->next = node->next;
break;
}
return REG_NOERROR;
@@ -1399,9 +1406,7 @@ calc_next (extra, node)
/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
static reg_errcode_t
-link_nfa_nodes (extra, node)
- void *extra;
- bin_tree_t *node;
+link_nfa_nodes (void *extra, bin_tree_t *node)
{
re_dfa_t *dfa = (re_dfa_t *) extra;
int idx = node->node_idx;
@@ -1444,7 +1449,7 @@ link_nfa_nodes (extra, node)
case OP_BACK_REF:
dfa->nexts[idx] = node->next->node_idx;
if (node->token.type == OP_BACK_REF)
- re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
+ err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
break;
default:
@@ -1461,13 +1466,10 @@ link_nfa_nodes (extra, node)
to their own constraint. */
static reg_errcode_t
-duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
- init_constraint)
- re_dfa_t *dfa;
- int top_org_node, top_clone_node, root_node;
- unsigned int init_constraint;
+internal_function
+duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
+ int root_node, unsigned int init_constraint)
{
- reg_errcode_t err;
int org_node, clone_node, ret;
unsigned int constraint = init_constraint;
for (org_node = top_org_node, clone_node = top_clone_node;;)
@@ -1481,9 +1483,9 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
edests of the back reference. */
org_dest = dfa->nexts[org_node];
re_node_set_empty (dfa->edests + clone_node);
- err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
dfa->nexts[clone_node] = dfa->nexts[org_node];
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
@@ -1503,25 +1505,20 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
destination. */
org_dest = dfa->edests[org_node].elems[0];
re_node_set_empty (dfa->edests + clone_node);
- if (dfa->nodes[org_node].type == ANCHOR)
+ /* If the node is root_node itself, it means the epsilon clsoure
+ has a loop. Then tie it to the destination of the root_node. */
+ if (org_node == root_node && clone_node != org_node)
{
- /* In case of the node has another constraint, append it. */
- if (org_node == root_node && clone_node != org_node)
- {
- /* ...but if the node is root_node itself, it means the
- epsilon closure have a loop, then tie it to the
- destination of the root_node. */
- ret = re_node_set_insert (dfa->edests + clone_node,
- org_dest);
- if (BE (ret < 0, 0))
- return REG_ESPACE;
- break;
- }
- constraint |= dfa->nodes[org_node].opr.ctx_type;
+ ret = re_node_set_insert (dfa->edests + clone_node, org_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ break;
}
- err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
+ /* In case of the node has another constraint, add it. */
+ constraint |= dfa->nodes[org_node].constraint;
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
@@ -1536,10 +1533,11 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
clone_dest = search_duplicated_node (dfa, org_dest, constraint);
if (clone_dest == -1)
{
- /* There are no such a duplicated node, create a new one. */
- err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
+ /* There is no such duplicated node, create a new one. */
+ reg_errcode_t err;
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
@@ -1550,7 +1548,7 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
}
else
{
- /* There are a duplicated node which satisfy the constraint,
+ /* There is a duplicated node which satisfies the constraint,
use it to avoid infinite loop. */
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
@@ -1558,9 +1556,9 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
}
org_dest = dfa->edests[org_node].elems[1];
- err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
@@ -1575,10 +1573,8 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
satisfies the constraint CONSTRAINT. */
static int
-search_duplicated_node (dfa, org_node, constraint)
- re_dfa_t *dfa;
- int org_node;
- unsigned int constraint;
+search_duplicated_node (const re_dfa_t *dfa, int org_node,
+ unsigned int constraint)
{
int idx;
for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
@@ -1591,32 +1587,27 @@ search_duplicated_node (dfa, org_node, constraint)
}
/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
- The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
- otherwise return the error code. */
+ Return the index of the new node, or -1 if insufficient storage is
+ available. */
-static reg_errcode_t
-duplicate_node (new_idx, dfa, org_idx, constraint)
- re_dfa_t *dfa;
- int *new_idx, org_idx;
- unsigned int constraint;
+static int
+duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
{
int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
- if (BE (dup_idx == -1, 0))
- return REG_ESPACE;
- dfa->nodes[dup_idx].constraint = constraint;
- if (dfa->nodes[org_idx].type == ANCHOR)
- dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
- dfa->nodes[dup_idx].duplicated = 1;
-
- /* Store the index of the original node. */
- dfa->org_indices[dup_idx] = org_idx;
- *new_idx = dup_idx;
- return REG_NOERROR;
+ if (BE (dup_idx != -1, 1))
+ {
+ dfa->nodes[dup_idx].constraint = constraint;
+ dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
+ dfa->nodes[dup_idx].duplicated = 1;
+
+ /* Store the index of the original node. */
+ dfa->org_indices[dup_idx] = org_idx;
+ }
+ return dup_idx;
}
static reg_errcode_t
-calc_inveclosure (dfa)
- re_dfa_t *dfa;
+calc_inveclosure (re_dfa_t *dfa)
{
int src, idx, ret;
for (idx = 0; idx < dfa->nodes_len; ++idx)
@@ -1639,8 +1630,7 @@ calc_inveclosure (dfa)
/* Calculate "eclosure" for all the node in DFA. */
static reg_errcode_t
-calc_eclosure (dfa)
- re_dfa_t *dfa;
+calc_eclosure (re_dfa_t *dfa)
{
int node_idx, incomplete;
#ifdef DEBUG
@@ -1684,16 +1674,13 @@ calc_eclosure (dfa)
/* Calculate epsilon closure of NODE. */
static reg_errcode_t
-calc_eclosure_iter (new_set, dfa, node, root)
- re_node_set *new_set;
- re_dfa_t *dfa;
- int node, root;
+calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
{
reg_errcode_t err;
- unsigned int constraint;
- int i, incomplete;
+ int i;
re_node_set eclosure;
- incomplete = 0;
+ int ret;
+ int incomplete = 0;
err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
if (BE (err != REG_NOERROR, 0))
return err;
@@ -1702,17 +1689,14 @@ calc_eclosure_iter (new_set, dfa, node, root)
We reference this value to avoid infinite loop. */
dfa->eclosures[node].nelem = -1;
- constraint = ((dfa->nodes[node].type == ANCHOR)
- ? dfa->nodes[node].opr.ctx_type : 0);
- /* If the current node has constraints, duplicate all nodes.
- Since they must inherit the constraints. */
- if (constraint
+ /* If the current node has constraints, duplicate all nodes
+ since they must inherit the constraints. */
+ if (dfa->nodes[node].constraint
&& dfa->edests[node].nelem
&& !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
{
- int org_node, cur_node;
- org_node = cur_node = node;
- err = duplicate_node_closure (dfa, node, node, node, constraint);
+ err = duplicate_node_closure (dfa, node, node, node,
+ dfa->nodes[node].constraint);
if (BE (err != REG_NOERROR, 0))
return err;
}
@@ -1741,7 +1725,9 @@ calc_eclosure_iter (new_set, dfa, node, root)
else
eclosure_elem = dfa->eclosures[edest];
/* Merge the epsilon closure of `edest'. */
- re_node_set_merge (&eclosure, &eclosure_elem);
+ err = re_node_set_merge (&eclosure, &eclosure_elem);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
/* If the epsilon closure of `edest' is incomplete,
the epsilon closure of this node is also incomplete. */
if (dfa->eclosures[edest].nelem == 0)
@@ -1751,8 +1737,10 @@ calc_eclosure_iter (new_set, dfa, node, root)
}
}
- /* Epsilon closures include itself. */
- re_node_set_insert (&eclosure, node);
+ /* An epsilon closure includes itself. */
+ ret = re_node_set_insert (&eclosure, node);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
if (incomplete && !root)
dfa->eclosures[node].nelem = 0;
else
@@ -1767,10 +1755,8 @@ calc_eclosure_iter (new_set, dfa, node, root)
We must not use this function inside bracket expressions. */
static void
-fetch_token (result, input, syntax)
- re_token_t *result;
- re_string_t *input;
- reg_syntax_t syntax;
+internal_function
+fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
{
re_string_skip_bytes (input, peek_token (result, input, syntax));
}
@@ -1779,10 +1765,8 @@ fetch_token (result, input, syntax)
We must not use this function inside bracket expressions. */
static int
-peek_token (token, input, syntax)
- re_token_t *token;
- re_string_t *input;
- reg_syntax_t syntax;
+internal_function
+peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
{
unsigned char c;
@@ -2020,10 +2004,8 @@ peek_token (token, input, syntax)
We must not use this function out of bracket expressions. */
static int
-peek_token_bracket (token, input, syntax)
- re_token_t *token;
- re_string_t *input;
- reg_syntax_t syntax;
+internal_function
+peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
{
unsigned char c;
if (re_string_eoi (input))
@@ -2119,11 +2101,8 @@ peek_token_bracket (token, input, syntax)
EOR means end of regular expression. */
static bin_tree_t *
-parse (regexp, preg, syntax, err)
- re_string_t *regexp;
- regex_t *preg;
- reg_syntax_t syntax;
- reg_errcode_t *err;
+parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
+ reg_errcode_t *err)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
bin_tree_t *tree, *eor, *root;
@@ -2156,13 +2135,8 @@ parse (regexp, preg, syntax, err)
ALT means alternative, which represents the operator `|'. */
static bin_tree_t *
-parse_reg_exp (regexp, preg, token, syntax, nest, err)
- re_string_t *regexp;
- regex_t *preg;
- re_token_t *token;
- reg_syntax_t syntax;
- int nest;
- reg_errcode_t *err;
+parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
bin_tree_t *tree, *branch = NULL;
@@ -2202,13 +2176,8 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
CAT means concatenation. */
static bin_tree_t *
-parse_branch (regexp, preg, token, syntax, nest, err)
- re_string_t *regexp;
- regex_t *preg;
- re_token_t *token;
- reg_syntax_t syntax;
- int nest;
- reg_errcode_t *err;
+parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
{
bin_tree_t *tree, *exp;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
@@ -2222,16 +2191,21 @@ parse_branch (regexp, preg, token, syntax, nest, err)
exp = parse_expression (regexp, preg, token, syntax, nest, err);
if (BE (*err != REG_NOERROR && exp == NULL, 0))
{
+ if (tree != NULL)
+ postorder (tree, free_tree, NULL);
return NULL;
}
if (tree != NULL && exp != NULL)
{
- tree = create_tree (dfa, tree, exp, CONCAT);
- if (tree == NULL)
+ bin_tree_t *newtree = create_tree (dfa, tree, exp, CONCAT);
+ if (newtree == NULL)
{
+ postorder (exp, free_tree, NULL);
+ postorder (tree, free_tree, NULL);
*err = REG_ESPACE;
return NULL;
}
+ tree = newtree;
}
else if (tree == NULL)
tree = exp;
@@ -2247,13 +2221,8 @@ parse_branch (regexp, preg, token, syntax, nest, err)
*/
static bin_tree_t *
-parse_expression (regexp, preg, token, syntax, nest, err)
- re_string_t *regexp;
- regex_t *preg;
- re_token_t *token;
- reg_syntax_t syntax;
- int nest;
- reg_errcode_t *err;
+parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
bin_tree_t *tree;
@@ -2360,7 +2329,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
&& dfa->word_ops_used == 0)
init_word_char (dfa);
if (token->opr.ctx_type == WORD_DELIM
- || token->opr.ctx_type == NOT_WORD_DELIM)
+ || token->opr.ctx_type == NOT_WORD_DELIM)
{
bin_tree_t *tree_first, *tree_last;
if (token->opr.ctx_type == WORD_DELIM)
@@ -2368,13 +2337,13 @@ parse_expression (regexp, preg, token, syntax, nest, err)
token->opr.ctx_type = WORD_FIRST;
tree_first = create_token_tree (dfa, NULL, NULL, token);
token->opr.ctx_type = WORD_LAST;
- }
- else
- {
+ }
+ else
+ {
token->opr.ctx_type = INSIDE_WORD;
tree_first = create_token_tree (dfa, NULL, NULL, token);
token->opr.ctx_type = INSIDE_NOTWORD;
- }
+ }
tree_last = create_token_tree (dfa, NULL, NULL, token);
tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
@@ -2468,13 +2437,8 @@ parse_expression (regexp, preg, token, syntax, nest, err)
*/
static bin_tree_t *
-parse_sub_exp (regexp, preg, token, syntax, nest, err)
- re_string_t *regexp;
- regex_t *preg;
- re_token_t *token;
- reg_syntax_t syntax;
- int nest;
- reg_errcode_t *err;
+parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
bin_tree_t *tree;
@@ -2490,11 +2454,17 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
{
tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
- *err = REG_EPAREN;
+ {
+ if (tree != NULL)
+ postorder (tree, free_tree, NULL);
+ *err = REG_EPAREN;
+ }
if (BE (*err != REG_NOERROR, 0))
return NULL;
}
- dfa->completed_bkref_map |= 1 << cur_nsub;
+
+ if (cur_nsub <= '9' - '1')
+ dfa->completed_bkref_map |= 1 << cur_nsub;
tree = create_tree (dfa, tree, NULL, SUBEXP);
if (BE (tree == NULL, 0))
@@ -2509,13 +2479,8 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
static bin_tree_t *
-parse_dup_op (elem, regexp, dfa, token, syntax, err)
- bin_tree_t *elem;
- re_string_t *regexp;
- re_dfa_t *dfa;
- re_token_t *token;
- reg_syntax_t syntax;
- reg_errcode_t *err;
+parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
+ re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
{
bin_tree_t *tree = NULL, *old_tree = NULL;
int i, start, end, start_idx = re_string_cur_idx (regexp);
@@ -2564,7 +2529,7 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
return elem;
}
- if (BE (end != -1 && start > end, 0))
+ if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0))
{
/* First number greater than second. */
*err = REG_BADBR;
@@ -2624,11 +2589,11 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
elem = duplicate_tree (elem, dfa);
tree = create_tree (dfa, tree, elem, CONCAT);
if (BE (elem == NULL || tree == NULL, 0))
- goto parse_dup_op_espace;
+ goto parse_dup_op_espace;
tree = create_tree (dfa, tree, NULL, OP_ALT);
if (BE (tree == NULL, 0))
- goto parse_dup_op_espace;
+ goto parse_dup_op_espace;
}
if (old_tree)
@@ -2654,15 +2619,14 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
update it. */
static reg_errcode_t
+internal_function
# ifdef RE_ENABLE_I18N
-build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
- re_charset_t *mbcset;
- int *range_alloc;
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem)
# else /* not RE_ENABLE_I18N */
-build_range_exp (sbcset, start_elem, end_elem)
+build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
+ bracket_elem_t *end_elem)
# endif /* not RE_ENABLE_I18N */
- re_bitset_ptr_t sbcset;
- bracket_elem_t *start_elem, *end_elem;
{
unsigned int start_ch, end_ch;
/* Equivalence Classes and Character Classes can't be a range start/end. */
@@ -2681,7 +2645,9 @@ build_range_exp (sbcset, start_elem, end_elem)
# ifdef RE_ENABLE_I18N
{
- wchar_t wc, start_wc, end_wc;
+ wchar_t wc;
+ wint_t start_wc;
+ wint_t end_wc;
wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
@@ -2708,9 +2674,9 @@ build_range_exp (sbcset, start_elem, end_elem)
no MBCSET if dfa->mb_cur_max == 1. */
if (mbcset)
{
- /* Check the space of the arrays. */
- if (BE (*range_alloc == mbcset->nranges, 0))
- {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
/* There is not enough space, need realloc. */
wchar_t *new_array_start, *new_array_end;
int new_nranges;
@@ -2720,9 +2686,9 @@ build_range_exp (sbcset, start_elem, end_elem)
/* Use realloc since mbcset->range_starts and mbcset->range_ends
are NULL if *range_alloc == 0. */
new_array_start = re_realloc (mbcset->range_starts, wchar_t,
- new_nranges);
+ new_nranges);
new_array_end = re_realloc (mbcset->range_ends, wchar_t,
- new_nranges);
+ new_nranges);
if (BE (new_array_start == NULL || new_array_end == NULL, 0))
return REG_ESPACE;
@@ -2730,10 +2696,10 @@ build_range_exp (sbcset, start_elem, end_elem)
mbcset->range_starts = new_array_start;
mbcset->range_ends = new_array_end;
*range_alloc = new_nranges;
- }
+ }
- mbcset->range_starts[mbcset->nranges] = start_wc;
- mbcset->range_ends[mbcset->nranges++] = end_wc;
+ mbcset->range_starts[mbcset->nranges] = start_wc;
+ mbcset->range_ends[mbcset->nranges++] = end_wc;
}
/* Build the table for single byte characters. */
@@ -2774,15 +2740,13 @@ build_range_exp (sbcset, start_elem, end_elem)
pointer argument since we may update it. */
static reg_errcode_t
+internal_function
# ifdef RE_ENABLE_I18N
-build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
- re_charset_t *mbcset;
- int *coll_sym_alloc;
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+ int *coll_sym_alloc, const unsigned char *name)
# else /* not RE_ENABLE_I18N */
-build_collating_symbol (sbcset, name)
+build_collating_symbol (bitset_t sbcset, const unsigned char *name)
# endif /* not RE_ENABLE_I18N */
- re_bitset_ptr_t sbcset;
- const unsigned char *name;
{
size_t name_len = strlen ((const char *) name);
if (BE (name_len != 1, 0))
@@ -2799,12 +2763,8 @@ build_collating_symbol (sbcset, name)
"[[.a-a.]]" etc. */
static bin_tree_t *
-parse_bracket_exp (regexp, dfa, token, syntax, err)
- re_string_t *regexp;
- re_dfa_t *dfa;
- re_token_t *token;
- reg_syntax_t syntax;
- reg_errcode_t *err;
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err)
{
#ifdef _LIBC
const unsigned char *collseqmb;
@@ -2826,28 +2786,33 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
{
int32_t hash = elem_hash ((const char *) name, name_len);
int32_t elem = hash % table_size;
- int32_t second = hash % (table_size - 2);
- while (symb_table[2 * elem] != 0)
- {
- /* First compare the hashing value. */
- if (symb_table[2 * elem] == hash
- /* Compare the length of the name. */
- && name_len == extra[symb_table[2 * elem + 1]]
- /* Compare the name. */
- && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
- name_len) == 0)
+ if (symb_table[2 * elem] != 0)
+ {
+ int32_t second = hash % (table_size - 2) + 1;
+
+ do
{
- /* Yep, this is the entry. */
- break;
- }
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ /* Compare the length of the name. */
+ && name_len == extra[symb_table[2 * elem + 1]]
+ /* Compare the name. */
+ && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+ name_len) == 0)
+ {
+ /* Yep, this is the entry. */
+ break;
+ }
- /* Next entry. */
- elem += second;
+ /* Next entry. */
+ elem += second;
+ }
+ while (symb_table[2 * elem] != 0);
}
return elem;
}
- /* Local function for parse_bracket_exp used in _LIBC environement.
+ /* Local function for parse_bracket_exp used in _LIBC environment.
Look up the collation sequence value of BR_ELEM.
Return the value if succeeded, UINT_MAX otherwise. */
@@ -2871,7 +2836,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
}
else if (br_elem->type == MB_CHAR)
{
- return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ if (nrules != 0)
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
}
else if (br_elem->type == COLL_SYM)
{
@@ -2924,7 +2890,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
re_charset_t *mbcset;
int *range_alloc;
- re_bitset_ptr_t sbcset;
+ bitset_t sbcset;
bracket_elem_t *start_elem, *end_elem;
{
unsigned int ch;
@@ -2952,8 +2918,8 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
build below suffices. */
if (nrules > 0 || dfa->mb_cur_max > 1)
{
- /* Check the space of the arrays. */
- if (BE (*range_alloc == mbcset->nranges, 0))
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
{
/* There is not enough space, need realloc. */
uint32_t *new_array_start;
@@ -2965,18 +2931,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
new_array_start = re_realloc (mbcset->range_starts, uint32_t,
new_nranges);
new_array_end = re_realloc (mbcset->range_ends, uint32_t,
- new_nranges);
+ new_nranges);
if (BE (new_array_start == NULL || new_array_end == NULL, 0))
- return REG_ESPACE;
+ return REG_ESPACE;
mbcset->range_starts = new_array_start;
mbcset->range_ends = new_array_end;
*range_alloc = new_nranges;
}
- mbcset->range_starts[mbcset->nranges] = start_collseq;
- mbcset->range_ends[mbcset->nranges++] = end_collseq;
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
}
/* Build the table for single byte characters. */
@@ -3007,7 +2973,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
re_charset_t *mbcset;
int *coll_sym_alloc;
- re_bitset_ptr_t sbcset;
+ bitset_t sbcset;
const unsigned char *name;
{
int32_t elem, idx;
@@ -3084,7 +3050,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
/*
if (MB_CUR_MAX > 1)
*/
- collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_TABLEMB);
@@ -3092,7 +3058,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
_NL_COLLATE_SYMB_EXTRAMB);
}
#endif
- sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
#ifdef RE_ENABLE_I18N
mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
#endif /* RE_ENABLE_I18N */
@@ -3102,6 +3068,10 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
if (BE (sbcset == NULL, 0))
#endif /* RE_ENABLE_I18N */
{
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ re_free (mbcset);
+#endif
*err = REG_ESPACE;
return NULL;
}
@@ -3119,7 +3089,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
#endif /* not RE_ENABLE_I18N */
non_match = 1;
if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
- bitset_set (sbcset, '\0');
+ bitset_set (sbcset, '\n');
re_string_skip_bytes (regexp, token_len); /* Skip a token. */
token_len = peek_token_bracket (token, regexp, syntax);
if (BE (token->type == END_OF_RE, 0))
@@ -3302,24 +3272,24 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
if (BE (mbc_tree == NULL, 0))
goto parse_bracket_exp_espace;
- for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
+ for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
if (sbcset[sbc_idx])
break;
/* If there are no bits set in sbcset, there is no point
of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
- if (sbc_idx < BITSET_UINTS)
+ if (sbc_idx < BITSET_WORDS)
{
- /* Build a tree for simple bracket. */
- br_token.type = SIMPLE_BRACKET;
- br_token.opr.sbcset = sbcset;
- work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
- if (BE (work_tree == NULL, 0))
- goto parse_bracket_exp_espace;
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
- /* Then join them by ALT node. */
- work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
- if (BE (work_tree == NULL, 0))
- goto parse_bracket_exp_espace;
+ /* Then join them by ALT node. */
+ work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
}
else
{
@@ -3338,7 +3308,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
br_token.opr.sbcset = sbcset;
work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
if (BE (work_tree == NULL, 0))
- goto parse_bracket_exp_espace;
+ goto parse_bracket_exp_espace;
}
return work_tree;
@@ -3355,15 +3325,9 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
/* Parse an element in the bracket expression. */
static reg_errcode_t
-parse_bracket_element (elem, regexp, token, token_len, dfa, syntax,
- accept_hyphen)
- bracket_elem_t *elem;
- re_string_t *regexp;
- re_token_t *token;
- int token_len;
- re_dfa_t *dfa;
- reg_syntax_t syntax;
- int accept_hyphen;
+parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
+ re_token_t *token, int token_len, re_dfa_t *dfa,
+ reg_syntax_t syntax, int accept_hyphen)
{
#ifdef RE_ENABLE_I18N
int cur_char_size;
@@ -3401,10 +3365,8 @@ parse_bracket_element (elem, regexp, token, token_len, dfa, syntax,
[=<equivalent_class>=]. */
static reg_errcode_t
-parse_bracket_symbol (elem, regexp, token)
- bracket_elem_t *elem;
- re_string_t *regexp;
- re_token_t *token;
+parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
+ re_token_t *token)
{
unsigned char ch, delim = token->opr.c;
int i = 0;
@@ -3451,16 +3413,13 @@ parse_bracket_symbol (elem, regexp, token)
static reg_errcode_t
#ifdef RE_ENABLE_I18N
-build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
- re_charset_t *mbcset;
- int *equiv_class_alloc;
+build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
+ int *equiv_class_alloc, const unsigned char *name)
#else /* not RE_ENABLE_I18N */
-build_equiv_class (sbcset, name)
+build_equiv_class (bitset_t sbcset, const unsigned char *name)
#endif /* not RE_ENABLE_I18N */
- re_bitset_ptr_t sbcset;
- const unsigned char *name;
{
-#if defined _LIBC
+#ifdef _LIBC
uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules != 0)
{
@@ -3481,30 +3440,33 @@ build_equiv_class (sbcset, name)
_NL_COLLATE_EXTRAMB);
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_INDIRECTMB);
- idx1 = findidx (&cp);
- if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+ idx1 = findidx (&cp, -1);
+ if (BE (idx1 == 0 || *cp != '\0', 0))
/* This isn't a valid character. */
return REG_ECOLLATE;
/* Build single byte matcing table for this equivalence class. */
- char_buf[1] = (unsigned char) '\0';
- len = weights[idx1];
+ len = weights[idx1 & 0xffffff];
for (ch = 0; ch < SBC_MAX; ++ch)
{
char_buf[0] = ch;
cp = char_buf;
- idx2 = findidx (&cp);
+ idx2 = findidx (&cp, 1);
/*
idx2 = table[ch];
*/
if (idx2 == 0)
/* This isn't a valid character. */
continue;
- if (len == weights[idx2])
+ /* Compare only if the length matches and the collation rule
+ index is the same. */
+ if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
{
int cnt = 0;
+
while (cnt <= len &&
- weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
+ weights[(idx1 & 0xffffff) + 1 + cnt]
+ == weights[(idx2 & 0xffffff) + 1 + cnt])
++cnt;
if (cnt > len)
@@ -3546,16 +3508,13 @@ build_equiv_class (sbcset, name)
static reg_errcode_t
#ifdef RE_ENABLE_I18N
-build_charclass (trans, sbcset, mbcset, char_class_alloc, class_name, syntax)
- re_charset_t *mbcset;
- int *char_class_alloc;
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+ re_charset_t *mbcset, int *char_class_alloc,
+ const unsigned char *class_name, reg_syntax_t syntax)
#else /* not RE_ENABLE_I18N */
-build_charclass (trans, sbcset, class_name, syntax)
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+ const unsigned char *class_name, reg_syntax_t syntax)
#endif /* not RE_ENABLE_I18N */
- unsigned RE_TRANSLATE_TYPE trans;
- re_bitset_ptr_t sbcset;
- const unsigned char *class_name;
- reg_syntax_t syntax;
{
int i;
const char *name = (const char *) class_name;
@@ -3585,39 +3544,45 @@ build_charclass (trans, sbcset, class_name, syntax)
#endif /* RE_ENABLE_I18N */
#define BUILD_CHARCLASS_LOOP(ctype_func) \
- for (i = 0; i < SBC_MAX; ++i) \
+ do { \
+ if (BE (trans != NULL, 0)) \
{ \
- if (ctype_func (i)) \
- { \
- int ch = trans ? trans[i] : i; \
- bitset_set (sbcset, ch); \
- } \
- }
+ for (i = 0; i < SBC_MAX; ++i) \
+ if (ctype_func (i)) \
+ bitset_set (sbcset, trans[i]); \
+ } \
+ else \
+ { \
+ for (i = 0; i < SBC_MAX; ++i) \
+ if (ctype_func (i)) \
+ bitset_set (sbcset, i); \
+ } \
+ } while (0)
if (strcmp (name, "alnum") == 0)
- BUILD_CHARCLASS_LOOP (isalnum)
+ BUILD_CHARCLASS_LOOP (isalnum);
else if (strcmp (name, "cntrl") == 0)
- BUILD_CHARCLASS_LOOP (iscntrl)
+ BUILD_CHARCLASS_LOOP (iscntrl);
else if (strcmp (name, "lower") == 0)
- BUILD_CHARCLASS_LOOP (islower)
+ BUILD_CHARCLASS_LOOP (islower);
else if (strcmp (name, "space") == 0)
- BUILD_CHARCLASS_LOOP (isspace)
+ BUILD_CHARCLASS_LOOP (isspace);
else if (strcmp (name, "alpha") == 0)
- BUILD_CHARCLASS_LOOP (isalpha)
+ BUILD_CHARCLASS_LOOP (isalpha);
else if (strcmp (name, "digit") == 0)
- BUILD_CHARCLASS_LOOP (isdigit)
+ BUILD_CHARCLASS_LOOP (isdigit);
else if (strcmp (name, "print") == 0)
- BUILD_CHARCLASS_LOOP (isprint)
+ BUILD_CHARCLASS_LOOP (isprint);
else if (strcmp (name, "upper") == 0)
- BUILD_CHARCLASS_LOOP (isupper)
+ BUILD_CHARCLASS_LOOP (isupper);
else if (strcmp (name, "blank") == 0)
- BUILD_CHARCLASS_LOOP (isblank)
+ BUILD_CHARCLASS_LOOP (isblank);
else if (strcmp (name, "graph") == 0)
- BUILD_CHARCLASS_LOOP (isgraph)
+ BUILD_CHARCLASS_LOOP (isgraph);
else if (strcmp (name, "punct") == 0)
- BUILD_CHARCLASS_LOOP (ispunct)
+ BUILD_CHARCLASS_LOOP (ispunct);
else if (strcmp (name, "xdigit") == 0)
- BUILD_CHARCLASS_LOOP (isxdigit)
+ BUILD_CHARCLASS_LOOP (isxdigit);
else
return REG_ECTYPE;
@@ -3625,13 +3590,10 @@ build_charclass (trans, sbcset, class_name, syntax)
}
static bin_tree_t *
-build_charclass_op (dfa, trans, class_name, extra, non_match, err)
- re_dfa_t *dfa;
- unsigned RE_TRANSLATE_TYPE trans;
- const unsigned char *class_name;
- const unsigned char *extra;
- int non_match;
- reg_errcode_t *err;
+build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
+ const unsigned char *class_name,
+ const unsigned char *extra, int non_match,
+ reg_errcode_t *err)
{
re_bitset_ptr_t sbcset;
#ifdef RE_ENABLE_I18N
@@ -3642,7 +3604,7 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err)
re_token_t br_token;
bin_tree_t *tree;
- sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
#ifdef RE_ENABLE_I18N
mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
#endif /* RE_ENABLE_I18N */
@@ -3660,10 +3622,6 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err)
if (non_match)
{
#ifdef RE_ENABLE_I18N
- /*
- if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
- bitset_set(cset->sbcset, '\0');
- */
mbcset->non_match = 1;
#endif /* not RE_ENABLE_I18N */
}
@@ -3745,10 +3703,7 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err)
Return -2, If an error is occured. */
static int
-fetch_number (input, token, syntax)
- re_string_t *input;
- re_token_t *token;
- reg_syntax_t syntax;
+fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
{
int num = -1;
unsigned char c;
@@ -3788,11 +3743,8 @@ free_charset (re_charset_t *cset)
/* Create a tree node. */
static bin_tree_t *
-create_tree (dfa, left, right, type)
- re_dfa_t *dfa;
- bin_tree_t *left;
- bin_tree_t *right;
- re_token_type_t type;
+create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+ re_token_type_t type)
{
re_token_t t;
t.type = type;
@@ -3800,11 +3752,8 @@ create_tree (dfa, left, right, type)
}
static bin_tree_t *
-create_token_tree (dfa, left, right, token)
- re_dfa_t *dfa;
- bin_tree_t *left;
- bin_tree_t *right;
- const re_token_t *token;
+create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+ const re_token_t *token)
{
bin_tree_t *tree;
if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
@@ -3840,9 +3789,7 @@ create_token_tree (dfa, left, right, token)
To be called from preorder or postorder. */
static reg_errcode_t
-mark_opt_subexp (extra, node)
- void *extra;
- bin_tree_t *node;
+mark_opt_subexp (void *extra, bin_tree_t *node)
{
int idx = (int) (long) extra;
if (node->token.type == SUBEXP && node->token.opr.idx == idx)
@@ -3882,9 +3829,7 @@ free_tree (void *extra, bin_tree_t *node)
it's easier to duplicate. */
static bin_tree_t *
-duplicate_tree (root, dfa)
- const bin_tree_t *root;
- re_dfa_t *dfa;
+duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
{
const bin_tree_t *node;
bin_tree_t *dup_root;
@@ -3915,7 +3860,7 @@ duplicate_tree (root, dfa)
node = node->parent;
dup_node = dup_node->parent;
if (!node)
- return dup_root;
+ return dup_root;
}
node = node->right;
p_new = &dup_node->right;
diff --git a/regex.c b/regex.c
index 7a4f304cddc5..3ab9a6adcb7f 100644
--- a/regex.c
+++ b/regex.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ Copyright (C) 2002-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -14,36 +14,16 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
-#ifdef _AIX
-#pragma alloca
-#else
-# ifndef allocax /* predefined by HP cc +Olibcalls */
-# ifdef __GNUC__
-# define alloca(size) __builtin_alloca (size)
-# else
-# if HAVE_ALLOCA_H
-# include <alloca.h>
-# else
-# ifdef __hpux
- void *alloca ();
-# else
-# if !defined __OS2__ && !defined WIN32
- char *alloca ();
-# else
-# include <malloc.h> /* OS/2 defines alloca in here */
-# endif
-# endif
-# endif
-# endif
-# endif
+/* Make sure noone compiles this code with a C++ compiler. */
+#ifdef __cplusplus
+# error "This is C code, use a C compiler"
#endif
#ifdef _LIBC
@@ -71,15 +51,14 @@
# include "../locale/localeinfo.h"
#endif
-/* POSIX says that <sys/types.h> must be included (by the caller) before
- <regex.h>. */
-#include <sys/types.h>
-
/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
GNU regex allows. Include it before <regex.h>, which correctly
#undefs RE_DUP_MAX and sets it to the right value. */
#include <limits.h>
+/* This header defines the MIN and MAX macros. */
+#include <sys/param.h>
+
#include <regex.h>
#include "regex_internal.h"
diff --git a/regex.h b/regex.h
index 81789be8978a..469a22b1d3be 100644
--- a/regex.h
+++ b/regex.h
@@ -1,46 +1,582 @@
+/* Definitions for data structures and routines for the regular
+ expression library.
+ Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008,2011
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
#ifndef _REGEX_H
-#include <posix/regex.h>
+#define _REGEX_H 1
+
+#include <sys/types.h>
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long int reg_syntax_t;
+
+#ifdef __USE_GNU
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+# define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+# define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+ a string of ordinary characters. For example, the ERE 'a{1' is
+ treated as 'a\{1'. */
+# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+ for ^, because it is difficult to scan the regex backwards to find
+ whether ^ should be special. */
+# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+ immediately after an alternation or begin-group operator. */
+# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* If this bit is set, then no_sub will be set to 1 during
+ re_compile_pattern. */
+# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+#endif
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+#ifdef __USE_GNU
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CHAR_CLASSES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INVALID_INTERVAL_ORD) \
+ & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \
+ | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS \
+ | RE_INVALID_INTERVAL_ORD)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
+ | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+ removed and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+# ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+# endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+# define RE_DUP_MAX (0x7fff)
+#endif
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
-/* Document internal interfaces. */
-extern reg_syntax_t __re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
-extern const char *__re_compile_pattern
- _RE_ARGS ((const char *pattern, size_t length,
- struct re_pattern_buffer *buffer));
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
-extern int __re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
-extern int __re_search
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
- int length, int start, int range, struct re_registers *regs));
+/* POSIX `eflags' bits (i.e., information for regexec). */
-extern int __re_search_2
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, int range, struct re_registers *regs, int stop));
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
-extern int __re_match
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
- int length, int start, struct re_registers *regs));
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
-extern int __re_match_2
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, struct re_registers *regs, int stop));
+/* Use PMATCH[0] to delimit the start and end of the search in the
+ buffer. */
+#define REG_STARTEND (1 << 2)
-extern void __re_set_registers
- _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
- unsigned num_regs, regoff_t *starts, regoff_t *ends));
-extern int __regcomp _RE_ARGS ((regex_t *__preg, const char *__pattern,
- int __cflags));
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
+ REG_ENOSYS = -1, /* This will never happen for this implementation. */
+#endif
+
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Inalid collating element. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ and `translate' can be set. After the pattern has been compiled,
+ the fields `re_nsub', `not_bol' and `not_eol' are available. All
+ other fields are private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+# define __RE_TRANSLATE_TYPE unsigned char *
+# ifdef __USE_GNU
+# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
+# endif
+#endif
+
+#ifdef __USE_GNU
+# define __REPB_PREFIX(name) name
+#else
+# define __REPB_PREFIX(name) __##name
+#endif
+
+struct re_pattern_buffer
+{
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are sometimes used as
+ array indexes. */
+ unsigned char *__REPB_PREFIX(buffer);
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long int __REPB_PREFIX(allocated);
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long int __REPB_PREFIX(used);
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t __REPB_PREFIX(syntax);
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
+ fastmap, if there is one, to skip over impossible starting points
+ for matches. */
+ char *__REPB_PREFIX(fastmap);
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation is
+ applied to a pattern when it is compiled and to a string when it
+ is matched. */
+ __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see whether or
+ not we should use the fastmap, so we don't set this absolutely
+ perfectly; see `re_compile_fastmap' (the `duplicate' case). */
+ unsigned __REPB_PREFIX(can_be_null) : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#ifdef __USE_GNU
+# define REGS_UNALLOCATED 0
+# define REGS_REALLOCATE 1
+# define REGS_FIXED 2
+#endif
+ unsigned __REPB_PREFIX(regs_allocated) : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned __REPB_PREFIX(fastmap_accurate) : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned __REPB_PREFIX(no_sub) : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the beginning
+ of the string. */
+ unsigned __REPB_PREFIX(not_bol) : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned __REPB_PREFIX(not_eol) : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned __REPB_PREFIX(newline_anchor) : 1;
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+#ifdef __USE_GNU
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+# ifndef RE_NREGS
+# define RE_NREGS 30
+# endif
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+#ifdef __USE_GNU
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not.
+
+ To free the allocated storage, you must call `regfree' on BUFFER.
+ Note that the translate table must either have been initialised by
+ `regcomp', with a malloc'ed value, or set to NULL before calling
+ `regfree'. */
+extern const char *re_compile_pattern (const char *__pattern, size_t __length,
+ struct re_pattern_buffer *__buffer);
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search (struct re_pattern_buffer *__buffer, const char *__string,
+ int __length, int __start, int __range,
+ struct re_registers *__regs);
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2 (struct re_pattern_buffer *__buffer,
+ const char *__string1, int __length1,
+ const char *__string2, int __length2, int __start,
+ int __range, struct re_registers *__regs, int __stop);
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match (struct re_pattern_buffer *__buffer, const char *__string,
+ int __length, int __start, struct re_registers *__regs);
-extern int __regexec _RE_ARGS ((const regex_t *__preg,
- const char *__string, size_t __nmatch,
- regmatch_t __pmatch[], int __eflags));
-extern size_t __regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
- char *__errbuf, size_t __errbuf_size));
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2 (struct re_pattern_buffer *__buffer,
+ const char *__string1, int __length1,
+ const char *__string2, int __length2, int __start,
+ struct re_registers *__regs, int __stop);
-extern void __regfree _RE_ARGS ((regex_t *__preg));
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers (struct re_pattern_buffer *__buffer,
+ struct re_registers *__regs,
+ unsigned int __num_regs,
+ regoff_t *__starts, regoff_t *__ends);
+#endif /* Use GNU */
+
+#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD)
+# ifndef _CRAY
+/* 4.2 bsd compatibility. */
+extern char *re_comp (const char *);
+extern int re_exec (const char *);
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+ "restrict", and "configure" may have defined "restrict". */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+# if defined restrict || 199901L <= __STDC_VERSION__
+# define __restrict restrict
+# else
+# define __restrict
+# endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax. */
+#ifndef __restrict_arr
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
+ && !defined __GNUG__
+# define __restrict_arr __restrict
+# else
+# define __restrict_arr
+# endif
#endif
+
+/* POSIX compatibility. */
+extern int regcomp (regex_t *__restrict __preg,
+ const char *__restrict __pattern,
+ int __cflags);
+
+extern int regexec (const regex_t *__restrict __preg,
+ const char *__restrict __string, size_t __nmatch,
+ regmatch_t __pmatch[__restrict_arr],
+ int __eflags);
+
+extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
+ char *__restrict __errbuf, size_t __errbuf_size);
+
+extern void regfree (regex_t *__preg);
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
diff --git a/regex_internal.c b/regex_internal.c
index b3d44c368dd4..9be8a532e6d3 100644
--- a/regex_internal.c
+++ b/regex_internal.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002-2006, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -14,29 +14,20 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
static void re_string_construct_common (const char *str, int len,
re_string_t *pstr,
RE_TRANSLATE_TYPE trans, int icase,
const re_dfa_t *dfa) internal_function;
-#ifdef RE_ENABLE_I18N
-static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
- wint_t *last_wc) internal_function;
-#endif /* RE_ENABLE_I18N */
-static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
- unsigned int hash) internal_function;
-static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int hash) internal_function;
-static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int context,
unsigned int hash) internal_function;
-static unsigned int inline calc_state_hash (const re_node_set *nodes,
- unsigned int context) internal_function;
/* Functions for string operation. */
@@ -44,12 +35,9 @@ static unsigned int inline calc_state_hash (const re_node_set *nodes,
re_string_reconstruct before using the object. */
static reg_errcode_t
-re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
- re_string_t *pstr;
- const char *str;
- int len, init_len, icase;
- RE_TRANSLATE_TYPE trans;
- const re_dfa_t *dfa;
+internal_function __attribute_warn_unused_result__
+re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
{
reg_errcode_t ret;
int init_buf_len;
@@ -75,12 +63,9 @@ re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
/* This function allocate the buffers, and initialize them. */
static reg_errcode_t
-re_string_construct (pstr, str, len, trans, icase, dfa)
- re_string_t *pstr;
- const char *str;
- int len, icase;
- RE_TRANSLATE_TYPE trans;
- const re_dfa_t *dfa;
+internal_function __attribute_warn_unused_result__
+re_string_construct (re_string_t *pstr, const char *str, int len,
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
{
reg_errcode_t ret;
memset (pstr, '\0', sizeof (re_string_t));
@@ -141,33 +126,39 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
/* Helper functions for re_string_allocate, and re_string_construct. */
static reg_errcode_t
-re_string_realloc_buffers (pstr, new_buf_len)
- re_string_t *pstr;
- int new_buf_len;
+internal_function __attribute_warn_unused_result__
+re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
{
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
- wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
- if (BE (new_array == NULL, 0))
+ wint_t *new_wcs;
+
+ /* Avoid overflow in realloc. */
+ const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int));
+ if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
return REG_ESPACE;
- pstr->wcs = new_array;
+
+ new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
+ if (BE (new_wcs == NULL, 0))
+ return REG_ESPACE;
+ pstr->wcs = new_wcs;
if (pstr->offsets != NULL)
{
- int *new_array = re_realloc (pstr->offsets, int, new_buf_len);
- if (BE (new_array == NULL, 0))
+ int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
+ if (BE (new_offsets == NULL, 0))
return REG_ESPACE;
- pstr->offsets = new_array;
+ pstr->offsets = new_offsets;
}
}
#endif /* RE_ENABLE_I18N */
if (pstr->mbs_allocated)
{
- unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
- new_buf_len);
- if (BE (new_array == NULL, 0))
+ unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
+ new_buf_len);
+ if (BE (new_mbs == NULL, 0))
return REG_ESPACE;
- pstr->mbs = new_array;
+ pstr->mbs = new_mbs;
}
pstr->bufs_len = new_buf_len;
return REG_NOERROR;
@@ -175,18 +166,15 @@ re_string_realloc_buffers (pstr, new_buf_len)
static void
-re_string_construct_common (str, len, pstr, trans, icase, dfa)
- const char *str;
- int len;
- re_string_t *pstr;
- RE_TRANSLATE_TYPE trans;
- int icase;
- const re_dfa_t *dfa;
+internal_function
+re_string_construct_common (const char *str, int len, re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, int icase,
+ const re_dfa_t *dfa)
{
pstr->raw_mbs = (const unsigned char *) str;
pstr->len = len;
pstr->raw_len = len;
- pstr->trans = (unsigned RE_TRANSLATE_TYPE) trans;
+ pstr->trans = trans;
pstr->icase = icase ? 1 : 0;
pstr->mbs_allocated = (trans != NULL || icase);
pstr->mb_cur_max = dfa->mb_cur_max;
@@ -210,12 +198,12 @@ re_string_construct_common (str, len, pstr, trans, icase, dfa)
built and starts from PSTR->VALID_LEN. */
static void
-build_wcs_buffer (pstr)
- re_string_t *pstr;
+internal_function
+build_wcs_buffer (re_string_t *pstr)
{
#ifdef _LIBC
- unsigned char buf[MB_CUR_MAX];
- assert (MB_CUR_MAX >= pstr->mb_cur_max);
+ unsigned char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
#else
unsigned char buf[64];
#endif
@@ -247,14 +235,9 @@ build_wcs_buffer (pstr)
}
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
- mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
- if (BE (mbclen == (size_t) -2, 0))
- {
- /* The buffer doesn't have enough space, finish to build. */
- pstr->cur_state = prev_st;
- break;
- }
- else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -1 || mbclen == 0
+ || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
{
/* We treat these cases as a singlebyte character. */
mbclen = 1;
@@ -263,6 +246,12 @@ build_wcs_buffer (pstr)
wc = pstr->trans[wc];
pstr->cur_state = prev_st;
}
+ else if (BE (mbclen == (size_t) -2, 0))
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
/* Write wide character and padding. */
pstr->wcs[byte_idx++] = wc;
@@ -277,16 +266,16 @@ build_wcs_buffer (pstr)
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
but for REG_ICASE. */
-static int
-build_wcs_upper_buffer (pstr)
- re_string_t *pstr;
+static reg_errcode_t
+internal_function __attribute_warn_unused_result__
+build_wcs_upper_buffer (re_string_t *pstr)
{
mbstate_t prev_st;
int src_idx, byte_idx, end_idx, remain_len;
size_t mbclen;
#ifdef _LIBC
- char buf[MB_CUR_MAX];
- assert (MB_CUR_MAX >= pstr->mb_cur_max);
+ char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
#else
char buf[64];
#endif
@@ -317,9 +306,9 @@ build_wcs_upper_buffer (pstr)
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
- mbclen = mbrtowc (&wc,
- ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
- + byte_idx), remain_len, &pstr->cur_state);
+ mbclen = __mbrtowc (&wc,
+ ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen + 2 > 2, 1))
{
wchar_t wcu = wc;
@@ -345,9 +334,11 @@ build_wcs_upper_buffer (pstr)
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
- else if (mbclen == (size_t) -1 || mbclen == 0)
+ else if (mbclen == (size_t) -1 || mbclen == 0
+ || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
{
- /* It is an invalid character or '\0'. Just use the byte. */
+ /* It is an invalid character, an incomplete character
+ at the end of the string, or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
pstr->mbs[byte_idx] = ch;
/* And also cast it to wide char. */
@@ -387,7 +378,7 @@ build_wcs_upper_buffer (pstr)
}
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
- mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen + 2 > 2, 1))
{
wchar_t wcu = wc;
@@ -441,8 +432,8 @@ build_wcs_upper_buffer (pstr)
src_idx += mbclen;
continue;
}
- else
- memcpy (pstr->mbs + byte_idx, p, mbclen);
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
}
else
memcpy (pstr->mbs + byte_idx, p, mbclen);
@@ -460,7 +451,8 @@ build_wcs_upper_buffer (pstr)
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
- else if (mbclen == (size_t) -1 || mbclen == 0)
+ else if (mbclen == (size_t) -1 || mbclen == 0
+ || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
{
/* It is an invalid character or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
@@ -494,35 +486,39 @@ build_wcs_upper_buffer (pstr)
Return the index. */
static int
-re_string_skip_chars (pstr, new_raw_idx, last_wc)
- re_string_t *pstr;
- int new_raw_idx;
- wint_t *last_wc;
+internal_function
+re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
{
mbstate_t prev_st;
int rawbuf_idx;
size_t mbclen;
- wchar_t wc = 0;
+ wint_t wc = WEOF;
/* Skip the characters which are not necessary to check. */
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
rawbuf_idx < new_raw_idx;)
{
- int remain_len;
- remain_len = pstr->len - rawbuf_idx;
+ wchar_t wc2;
+ int remain_len = pstr->raw_len - rawbuf_idx;
prev_st = pstr->cur_state;
- mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
- remain_len, &pstr->cur_state);
- if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+ mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
+ remain_len, &pstr->cur_state);
+ if (BE ((ssize_t) mbclen <= 0, 0))
{
- /* We treat these cases as a singlebyte character. */
+ /* We treat these cases as a single byte character. */
+ if (mbclen == 0 || remain_len == 0)
+ wc = L'\0';
+ else
+ wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
mbclen = 1;
pstr->cur_state = prev_st;
}
+ else
+ wc = (wint_t) wc2;
/* Then proceed the next character. */
rawbuf_idx += mbclen;
}
- *last_wc = (wint_t) wc;
+ *last_wc = wc;
return rawbuf_idx;
}
#endif /* RE_ENABLE_I18N */
@@ -531,8 +527,8 @@ re_string_skip_chars (pstr, new_raw_idx, last_wc)
This function is used in case of REG_ICASE. */
static void
-build_upper_buffer (pstr)
- re_string_t *pstr;
+internal_function
+build_upper_buffer (re_string_t *pstr)
{
int char_idx, end_idx;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
@@ -554,8 +550,8 @@ build_upper_buffer (pstr)
/* Apply TRANS to the buffer in PSTR. */
static void
-re_string_translate_buffer (pstr)
- re_string_t *pstr;
+internal_function
+re_string_translate_buffer (re_string_t *pstr)
{
int buf_idx, end_idx;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
@@ -575,9 +571,8 @@ re_string_translate_buffer (pstr)
convert to upper case in case of REG_ICASE, apply translation. */
static reg_errcode_t
-re_string_reconstruct (pstr, idx, eflags)
- re_string_t *pstr;
- int idx, eflags;
+internal_function __attribute_warn_unused_result__
+re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
{
int offset = idx - pstr->raw_mbs_idx;
if (BE (offset < 0, 0))
@@ -602,34 +597,98 @@ re_string_reconstruct (pstr, idx, eflags)
if (BE (offset != 0, 1))
{
- /* Are the characters which are already checked remain? */
- if (BE (offset < pstr->valid_raw_len, 1)
-#ifdef RE_ENABLE_I18N
- /* Handling this would enlarge the code too much.
- Accept a slowdown in that case. */
- && pstr->offsets_needed == 0
-#endif
- )
+ /* Should the already checked characters be kept? */
+ if (BE (offset < pstr->valid_raw_len, 1))
{
/* Yes, move them to the front of the buffer. */
- pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
#ifdef RE_ENABLE_I18N
- if (pstr->mb_cur_max > 1)
- memmove (pstr->wcs, pstr->wcs + offset,
- (pstr->valid_len - offset) * sizeof (wint_t));
+ if (BE (pstr->offsets_needed, 0))
+ {
+ int low = 0, high = pstr->valid_len, mid;
+ do
+ {
+ mid = (high + low) / 2;
+ if (pstr->offsets[mid] > offset)
+ high = mid;
+ else if (pstr->offsets[mid] < offset)
+ low = mid + 1;
+ else
+ break;
+ }
+ while (low < high);
+ if (pstr->offsets[mid] < offset)
+ ++mid;
+ pstr->tip_context = re_string_context_at (pstr, mid - 1,
+ eflags);
+ /* This can be quite complicated, so handle specially
+ only the common and easy case where the character with
+ different length representation of lower and upper
+ case is present at or after offset. */
+ if (pstr->valid_len > offset
+ && mid == offset && pstr->offsets[mid] == offset)
+ {
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
+ memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+ pstr->valid_raw_len -= offset;
+ for (low = 0; low < pstr->valid_len; low++)
+ pstr->offsets[low] = pstr->offsets[low + offset] - offset;
+ }
+ else
+ {
+ /* Otherwise, just find out how long the partial multibyte
+ character at offset is and fill it with WEOF/255. */
+ pstr->len = pstr->raw_len - idx + offset;
+ pstr->stop = pstr->raw_stop - idx + offset;
+ pstr->offsets_needed = 0;
+ while (mid > 0 && pstr->offsets[mid - 1] == offset)
+ --mid;
+ while (mid < pstr->valid_len)
+ if (pstr->wcs[mid] != WEOF)
+ break;
+ else
+ ++mid;
+ if (mid == pstr->valid_len)
+ pstr->valid_len = 0;
+ else
+ {
+ pstr->valid_len = pstr->offsets[mid] - offset;
+ if (pstr->valid_len)
+ {
+ for (low = 0; low < pstr->valid_len; ++low)
+ pstr->wcs[low] = WEOF;
+ memset (pstr->mbs, 255, pstr->valid_len);
+ }
+ }
+ pstr->valid_raw_len = pstr->valid_len;
+ }
+ }
+ else
+#endif
+ {
+ pstr->tip_context = re_string_context_at (pstr, offset - 1,
+ eflags);
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
#endif /* RE_ENABLE_I18N */
- if (BE (pstr->mbs_allocated, 0))
- memmove (pstr->mbs, pstr->mbs + offset,
- pstr->valid_len - offset);
- pstr->valid_len -= offset;
- pstr->valid_raw_len -= offset;
+ if (BE (pstr->mbs_allocated, 0))
+ memmove (pstr->mbs, pstr->mbs + offset,
+ pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+ pstr->valid_raw_len -= offset;
#if DEBUG
- assert (pstr->valid_len > 0);
+ assert (pstr->valid_len > 0);
#endif
+ }
}
else
{
/* No, skip all characters until IDX. */
+ int prev_valid_len = pstr->valid_len;
+
#ifdef RE_ENABLE_I18N
if (BE (pstr->offsets_needed, 0))
{
@@ -639,7 +698,6 @@ re_string_reconstruct (pstr, idx, eflags)
}
#endif
pstr->valid_len = 0;
- pstr->valid_raw_len = 0;
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
@@ -648,47 +706,72 @@ re_string_reconstruct (pstr, idx, eflags)
if (pstr->is_utf8)
{
- const unsigned char *raw, *p, *q, *end;
+ const unsigned char *raw, *p, *end;
/* Special case UTF-8. Multi-byte chars start with any
byte other than 0x80 - 0xbf. */
raw = pstr->raw_mbs + pstr->raw_mbs_idx;
end = raw + (offset - pstr->mb_cur_max);
- for (p = raw + offset - 1; p >= end; --p)
- if ((*p & 0xc0) != 0x80)
- {
- mbstate_t cur_state;
- wchar_t wc2;
- int mlen = raw + pstr->len - p;
- unsigned char buf[6];
-
- q = p;
- if (BE (pstr->trans != NULL, 0))
- {
- int i = mlen < 6 ? mlen : 6;
- while (--i >= 0)
- buf[i] = pstr->trans[p[i]];
- q = buf;
- }
- /* XXX Don't use mbrtowc, we know which conversion
- to use (UTF-8 -> UCS4). */
- memset (&cur_state, 0, sizeof (cur_state));
- mlen = (mbrtowc (&wc2, (const char *) p, mlen,
- &cur_state)
- - (raw + offset - p));
- if (mlen >= 0)
- {
- memset (&pstr->cur_state, '\0',
- sizeof (mbstate_t));
- pstr->valid_len = mlen;
- wc = wc2;
- }
- break;
- }
+ if (end < pstr->raw_mbs)
+ end = pstr->raw_mbs;
+ p = raw + offset - 1;
+#ifdef _LIBC
+ /* We know the wchar_t encoding is UCS4, so for the simple
+ case, ASCII characters, skip the conversion step. */
+ if (isascii (*p) && BE (pstr->trans == NULL, 1))
+ {
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+ /* pstr->valid_len = 0; */
+ wc = (wchar_t) *p;
+ }
+ else
+#endif
+ for (; p >= end; --p)
+ if ((*p & 0xc0) != 0x80)
+ {
+ mbstate_t cur_state;
+ wchar_t wc2;
+ int mlen = raw + pstr->len - p;
+ unsigned char buf[6];
+ size_t mbclen;
+
+ const unsigned char *pp = p;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i = mlen < 6 ? mlen : 6;
+ while (--i >= 0)
+ buf[i] = pstr->trans[p[i]];
+ pp = buf;
+ }
+ /* XXX Don't use mbrtowc, we know which conversion
+ to use (UTF-8 -> UCS4). */
+ memset (&cur_state, 0, sizeof (cur_state));
+ mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
+ &cur_state);
+ if (raw + offset - p <= mbclen
+ && mbclen < (size_t) -2)
+ {
+ memset (&pstr->cur_state, '\0',
+ sizeof (mbstate_t));
+ pstr->valid_len = mbclen - (raw + offset - p);
+ wc = wc2;
+ }
+ break;
+ }
}
if (wc == WEOF)
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+ if (wc == WEOF)
+ pstr->tip_context
+ = re_string_context_at (pstr, prev_valid_len - 1, eflags);
+ else
+ pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
+ && IS_WIDE_WORD_CHAR (wc))
+ ? CONTEXT_WORD
+ : ((IS_WIDE_NEWLINE (wc)
+ && pstr->newline_anchor)
+ ? CONTEXT_NEWLINE : 0));
if (BE (pstr->valid_len, 0))
{
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
@@ -697,17 +780,12 @@ re_string_reconstruct (pstr, idx, eflags)
memset (pstr->mbs, 255, pstr->valid_len);
}
pstr->valid_raw_len = pstr->valid_len;
- pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
- && IS_WIDE_WORD_CHAR (wc))
- ? CONTEXT_WORD
- : ((IS_WIDE_NEWLINE (wc)
- && pstr->newline_anchor)
- ? CONTEXT_NEWLINE : 0));
}
else
#endif /* RE_ENABLE_I18N */
{
int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+ pstr->valid_raw_len = 0;
if (pstr->trans)
c = pstr->trans[c];
pstr->tip_context = (bitset_contain (pstr->word_char, c)
@@ -729,7 +807,7 @@ re_string_reconstruct (pstr, idx, eflags)
{
if (pstr->icase)
{
- int ret = build_wcs_upper_buffer (pstr);
+ reg_errcode_t ret = build_wcs_upper_buffer (pstr);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
@@ -738,24 +816,23 @@ re_string_reconstruct (pstr, idx, eflags)
}
else
#endif /* RE_ENABLE_I18N */
- if (BE (pstr->mbs_allocated, 0))
- {
- if (pstr->icase)
- build_upper_buffer (pstr);
- else if (pstr->trans != NULL)
- re_string_translate_buffer (pstr);
- }
- else
- pstr->valid_len = pstr->len;
+ if (BE (pstr->mbs_allocated, 0))
+ {
+ if (pstr->icase)
+ build_upper_buffer (pstr);
+ else if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ else
+ pstr->valid_len = pstr->len;
pstr->cur_idx = 0;
return REG_NOERROR;
}
static unsigned char
-re_string_peek_byte_case (pstr, idx)
- const re_string_t *pstr;
- int idx;
+internal_function __attribute ((pure))
+re_string_peek_byte_case (const re_string_t *pstr, int idx)
{
int ch, off;
@@ -790,8 +867,8 @@ re_string_peek_byte_case (pstr, idx)
}
static unsigned char
-re_string_fetch_byte_case (pstr)
- re_string_t *pstr;
+internal_function
+re_string_fetch_byte_case (re_string_t *pstr)
{
if (BE (!pstr->mbs_allocated, 1))
return re_string_fetch_byte (pstr);
@@ -827,8 +904,8 @@ re_string_fetch_byte_case (pstr)
}
static void
-re_string_destruct (pstr)
- re_string_t *pstr;
+internal_function
+re_string_destruct (re_string_t *pstr)
{
#ifdef RE_ENABLE_I18N
re_free (pstr->wcs);
@@ -841,9 +918,8 @@ re_string_destruct (pstr)
/* Return the context at IDX in INPUT. */
static unsigned int
-re_string_context_at (input, idx, eflags)
- const re_string_t *input;
- int idx, eflags;
+internal_function
+re_string_context_at (const re_string_t *input, int idx, int eflags)
{
int c;
if (BE (idx < 0, 0))
@@ -887,9 +963,8 @@ re_string_context_at (input, idx, eflags)
/* Functions for set operation. */
static reg_errcode_t
-re_node_set_alloc (set, size)
- re_node_set *set;
- int size;
+internal_function __attribute_warn_unused_result__
+re_node_set_alloc (re_node_set *set, int size)
{
set->alloc = size;
set->nelem = 0;
@@ -900,9 +975,8 @@ re_node_set_alloc (set, size)
}
static reg_errcode_t
-re_node_set_init_1 (set, elem)
- re_node_set *set;
- int elem;
+internal_function __attribute_warn_unused_result__
+re_node_set_init_1 (re_node_set *set, int elem)
{
set->alloc = 1;
set->nelem = 1;
@@ -917,9 +991,8 @@ re_node_set_init_1 (set, elem)
}
static reg_errcode_t
-re_node_set_init_2 (set, elem1, elem2)
- re_node_set *set;
- int elem1, elem2;
+internal_function __attribute_warn_unused_result__
+re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
{
set->alloc = 2;
set->elems = re_malloc (int, 2);
@@ -948,9 +1021,8 @@ re_node_set_init_2 (set, elem1, elem2)
}
static reg_errcode_t
-re_node_set_init_copy (dest, src)
- re_node_set *dest;
- const re_node_set *src;
+internal_function __attribute_warn_unused_result__
+re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
{
dest->nelem = src->nelem;
if (src->nelem > 0)
@@ -974,9 +1046,9 @@ re_node_set_init_copy (dest, src)
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
static reg_errcode_t
-re_node_set_add_intersect (dest, src1, src2)
- re_node_set *dest;
- const re_node_set *src1, *src2;
+internal_function __attribute_warn_unused_result__
+re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
+ const re_node_set *src2)
{
int i1, i2, is, id, delta, sbase;
if (src1->nelem == 0 || src2->nelem == 0)
@@ -989,7 +1061,7 @@ re_node_set_add_intersect (dest, src1, src2)
int new_alloc = src1->nelem + src2->nelem + dest->alloc;
int *new_elems = re_realloc (dest->elems, int, new_alloc);
if (BE (new_elems == NULL, 0))
- return REG_ESPACE;
+ return REG_ESPACE;
dest->elems = new_elems;
dest->alloc = new_alloc;
}
@@ -1008,8 +1080,8 @@ re_node_set_add_intersect (dest, src1, src2)
while (id >= 0 && dest->elems[id] > src1->elems[i1])
--id;
- if (id < 0 || dest->elems[id] != src1->elems[i1])
- dest->elems[--sbase] = src1->elems[i1];
+ if (id < 0 || dest->elems[id] != src1->elems[i1])
+ dest->elems[--sbase] = src1->elems[i1];
if (--i1 < 0 || --i2 < 0)
break;
@@ -1039,20 +1111,20 @@ re_node_set_add_intersect (dest, src1, src2)
if (delta > 0 && id >= 0)
for (;;)
{
- if (dest->elems[is] > dest->elems[id])
- {
- /* Copy from the top. */
- dest->elems[id + delta--] = dest->elems[is--];
- if (delta == 0)
- break;
- }
- else
- {
- /* Slide from the bottom. */
- dest->elems[id + delta] = dest->elems[id];
- if (--id < 0)
- break;
- }
+ if (dest->elems[is] > dest->elems[id])
+ {
+ /* Copy from the top. */
+ dest->elems[id + delta--] = dest->elems[is--];
+ if (delta == 0)
+ break;
+ }
+ else
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
+ if (--id < 0)
+ break;
+ }
}
/* Copy remaining SRC elements. */
@@ -1065,9 +1137,9 @@ re_node_set_add_intersect (dest, src1, src2)
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
static reg_errcode_t
-re_node_set_init_union (dest, src1, src2)
- re_node_set *dest;
- const re_node_set *src1, *src2;
+internal_function __attribute_warn_unused_result__
+re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
+ const re_node_set *src2)
{
int i1, i2, id;
if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
@@ -1118,9 +1190,8 @@ re_node_set_init_union (dest, src1, src2)
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
static reg_errcode_t
-re_node_set_merge (dest, src)
- re_node_set *dest;
- const re_node_set *src;
+internal_function __attribute_warn_unused_result__
+re_node_set_merge (re_node_set *dest, const re_node_set *src)
{
int is, id, sbase, delta;
if (src == NULL || src->nelem == 0)
@@ -1148,11 +1219,11 @@ re_node_set_merge (dest, src)
is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
{
if (dest->elems[id] == src->elems[is])
- is--, id--;
+ is--, id--;
else if (dest->elems[id] < src->elems[is])
- dest->elems[--sbase] = src->elems[is--];
+ dest->elems[--sbase] = src->elems[is--];
else /* if (dest->elems[id] > src->elems[is]) */
- --id;
+ --id;
}
if (is >= 0)
@@ -1174,21 +1245,21 @@ re_node_set_merge (dest, src)
for (;;)
{
if (dest->elems[is] > dest->elems[id])
- {
+ {
/* Copy from the top. */
- dest->elems[id + delta--] = dest->elems[is--];
+ dest->elems[id + delta--] = dest->elems[is--];
if (delta == 0)
break;
}
else
- {
- /* Slide from the bottom. */
- dest->elems[id + delta] = dest->elems[id];
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
if (--id < 0)
{
/* Copy remaining SRC elements. */
memcpy (dest->elems, dest->elems + sbase,
- delta * sizeof (int));
+ delta * sizeof (int));
break;
}
}
@@ -1202,9 +1273,8 @@ re_node_set_merge (dest, src)
return -1 if an error is occured, return 1 otherwise. */
static int
-re_node_set_insert (set, elem)
- re_node_set *set;
- int elem;
+internal_function __attribute_warn_unused_result__
+re_node_set_insert (re_node_set *set, int elem)
{
int idx;
/* In case the set is empty. */
@@ -1227,12 +1297,12 @@ re_node_set_insert (set, elem)
/* Realloc if we need. */
if (set->alloc == set->nelem)
{
- int *new_array;
+ int *new_elems;
set->alloc = set->alloc * 2;
- new_array = re_realloc (set->elems, int, set->alloc);
- if (BE (new_array == NULL, 0))
+ new_elems = re_realloc (set->elems, int, set->alloc);
+ if (BE (new_elems == NULL, 0))
return -1;
- set->elems = new_array;
+ set->elems = new_elems;
}
/* Move the elements which follows the new element. Test the
@@ -1241,12 +1311,12 @@ re_node_set_insert (set, elem)
{
idx = 0;
for (idx = set->nelem; idx > 0; idx--)
- set->elems[idx] = set->elems[idx - 1];
+ set->elems[idx] = set->elems[idx - 1];
}
else
{
for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
- set->elems[idx] = set->elems[idx - 1];
+ set->elems[idx] = set->elems[idx - 1];
}
/* Insert the new element. */
@@ -1260,19 +1330,18 @@ re_node_set_insert (set, elem)
Return -1 if an error is occured, return 1 otherwise. */
static int
-re_node_set_insert_last (set, elem)
- re_node_set *set;
- int elem;
+internal_function __attribute_warn_unused_result__
+re_node_set_insert_last (re_node_set *set, int elem)
{
/* Realloc if we need. */
if (set->alloc == set->nelem)
{
- int *new_array;
+ int *new_elems;
set->alloc = (set->alloc + 1) * 2;
- new_array = re_realloc (set->elems, int, set->alloc);
- if (BE (new_array == NULL, 0))
+ new_elems = re_realloc (set->elems, int, set->alloc);
+ if (BE (new_elems == NULL, 0))
return -1;
- set->elems = new_array;
+ set->elems = new_elems;
}
/* Insert the new element. */
@@ -1284,8 +1353,8 @@ re_node_set_insert_last (set, elem)
return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
static int
-re_node_set_compare (set1, set2)
- const re_node_set *set1, *set2;
+internal_function __attribute ((pure))
+re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
{
int i;
if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
@@ -1299,9 +1368,8 @@ re_node_set_compare (set1, set2)
/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
static int
-re_node_set_contains (set, elem)
- const re_node_set *set;
- int elem;
+internal_function __attribute ((pure))
+re_node_set_contains (const re_node_set *set, int elem)
{
unsigned int idx, right, mid;
if (set->nelem <= 0)
@@ -1322,9 +1390,8 @@ re_node_set_contains (set, elem)
}
static void
-re_node_set_remove_at (set, idx)
- re_node_set *set;
- int idx;
+internal_function
+re_node_set_remove_at (re_node_set *set, int idx)
{
if (idx < 0 || idx >= set->nelem)
return;
@@ -1338,22 +1405,28 @@ re_node_set_remove_at (set, idx)
Or return -1, if an error will be occured. */
static int
-re_dfa_add_node (dfa, token)
- re_dfa_t *dfa;
- re_token_t token;
+internal_function
+re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
{
int type = token.type;
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
{
- int new_nodes_alloc = dfa->nodes_alloc * 2;
+ size_t new_nodes_alloc = dfa->nodes_alloc * 2;
int *new_nexts, *new_indices;
re_node_set *new_edests, *new_eclosures;
+ re_token_t *new_nodes;
- re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
- new_nodes_alloc);
- if (BE (new_array == NULL, 0))
+ /* Avoid overflows in realloc. */
+ const size_t max_object_size = MAX (sizeof (re_token_t),
+ MAX (sizeof (re_node_set),
+ sizeof (int)));
+ if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0))
+ return -1;
+
+ new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
+ if (BE (new_nodes == NULL, 0))
return -1;
- dfa->nodes = new_array;
+ dfa->nodes = new_nodes;
new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
@@ -1379,10 +1452,9 @@ re_dfa_add_node (dfa, token)
return dfa->nodes_len++;
}
-static unsigned int inline
-calc_state_hash (nodes, context)
- const re_node_set *nodes;
- unsigned int context;
+static inline unsigned int
+internal_function
+calc_state_hash (const re_node_set *nodes, unsigned int context)
{
unsigned int hash = nodes->nelem + context;
int i;
@@ -1400,11 +1472,10 @@ calc_state_hash (nodes, context)
- We never return non-NULL value in case of any errors, it is for
optimization. */
-static re_dfastate_t*
-re_acquire_state (err, dfa, nodes)
- reg_errcode_t *err;
- re_dfa_t *dfa;
- const re_node_set *nodes;
+static re_dfastate_t *
+internal_function __attribute_warn_unused_result__
+re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
+ const re_node_set *nodes)
{
unsigned int hash;
re_dfastate_t *new_state;
@@ -1429,13 +1500,10 @@ re_acquire_state (err, dfa, nodes)
/* There are no appropriate state in the dfa, create the new one. */
new_state = create_ci_newstate (dfa, nodes, hash);
- if (BE (new_state != NULL, 1))
- return new_state;
- else
- {
- *err = REG_ESPACE;
- return NULL;
- }
+ if (BE (new_state == NULL, 0))
+ *err = REG_ESPACE;
+
+ return new_state;
}
/* Search for the state whose node_set is equivalent to NODES and
@@ -1448,12 +1516,10 @@ re_acquire_state (err, dfa, nodes)
- We never return non-NULL value in case of any errors, it is for
optimization. */
-static re_dfastate_t*
-re_acquire_state_context (err, dfa, nodes, context)
- reg_errcode_t *err;
- re_dfa_t *dfa;
- const re_node_set *nodes;
- unsigned int context;
+static re_dfastate_t *
+internal_function __attribute_warn_unused_result__
+re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
+ const re_node_set *nodes, unsigned int context)
{
unsigned int hash;
re_dfastate_t *new_state;
@@ -1477,13 +1543,10 @@ re_acquire_state_context (err, dfa, nodes, context)
}
/* There are no appropriate state in `dfa', create the new one. */
new_state = create_cd_newstate (dfa, nodes, context, hash);
- if (BE (new_state != NULL, 1))
- return new_state;
- else
- {
- *err = REG_ESPACE;
- return NULL;
- }
+ if (BE (new_state == NULL, 0))
+ *err = REG_ESPACE;
+
+ return new_state;
}
/* Finish initialization of the new state NEWSTATE, and using its hash value
@@ -1491,10 +1554,9 @@ re_acquire_state_context (err, dfa, nodes, context)
indicates the error code if failed. */
static reg_errcode_t
-register_state (dfa, newstate, hash)
- re_dfa_t *dfa;
- re_dfastate_t *newstate;
- unsigned int hash;
+__attribute_warn_unused_result__
+register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
+ unsigned int hash)
{
struct re_state_table_entry *spot;
reg_errcode_t err;
@@ -1508,7 +1570,8 @@ register_state (dfa, newstate, hash)
{
int elem = newstate->nodes.elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
- re_node_set_insert_last (&newstate->non_eps_nodes, elem);
+ if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0)
+ return REG_ESPACE;
}
spot = dfa->state_table + (hash & dfa->state_hash_mask);
@@ -1526,14 +1589,29 @@ register_state (dfa, newstate, hash)
return REG_NOERROR;
}
+static void
+free_state (re_dfastate_t *state)
+{
+ re_node_set_free (&state->non_eps_nodes);
+ re_node_set_free (&state->inveclosure);
+ if (state->entrance_nodes != &state->nodes)
+ {
+ re_node_set_free (state->entrance_nodes);
+ re_free (state->entrance_nodes);
+ }
+ re_node_set_free (&state->nodes);
+ re_free (state->word_trtable);
+ re_free (state->trtable);
+ re_free (state);
+}
+
/* Create the new state which is independ of contexts.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t *
-create_ci_newstate (dfa, nodes, hash)
- re_dfa_t *dfa;
- const re_node_set *nodes;
- unsigned int hash;
+internal_function __attribute_warn_unused_result__
+create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+ unsigned int hash)
{
int i;
reg_errcode_t err;
@@ -1581,10 +1659,9 @@ create_ci_newstate (dfa, nodes, hash)
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t *
-create_cd_newstate (dfa, nodes, context, hash)
- re_dfa_t *dfa;
- const re_node_set *nodes;
- unsigned int context, hash;
+internal_function __attribute_warn_unused_result__
+create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+ unsigned int context, unsigned int hash)
{
int i, nctx_nodes = 0;
reg_errcode_t err;
@@ -1605,11 +1682,9 @@ create_cd_newstate (dfa, nodes, context, hash)
for (i = 0 ; i < nodes->nelem ; i++)
{
- unsigned int constraint = 0;
re_token_t *node = dfa->nodes + nodes->elems[i];
re_token_type_t type = node->type;
- if (node->constraint)
- constraint = node->constraint;
+ unsigned int constraint = node->constraint;
if (type == CHARACTER && !constraint)
continue;
@@ -1622,8 +1697,6 @@ create_cd_newstate (dfa, nodes, context, hash)
newstate->halt = 1;
else if (type == OP_BACK_REF)
newstate->has_backref = 1;
- else if (type == ANCHOR)
- constraint = node->opr.ctx_type;
if (constraint)
{
@@ -1635,7 +1708,9 @@ create_cd_newstate (dfa, nodes, context, hash)
free_state (newstate);
return NULL;
}
- re_node_set_init_copy (newstate->entrance_nodes, nodes);
+ if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
+ != REG_NOERROR)
+ return NULL;
nctx_nodes = 0;
newstate->has_constraint = 1;
}
@@ -1655,20 +1730,3 @@ create_cd_newstate (dfa, nodes, context, hash)
}
return newstate;
}
-
-static void
-free_state (state)
- re_dfastate_t *state;
-{
- re_node_set_free (&state->non_eps_nodes);
- re_node_set_free (&state->inveclosure);
- if (state->entrance_nodes != &state->nodes)
- {
- re_node_set_free (state->entrance_nodes);
- re_free (state->entrance_nodes);
- }
- re_node_set_free (&state->nodes);
- re_free (state->word_trtable);
- re_free (state->trtable);
- re_free (state);
-}
diff --git a/regex_internal.h b/regex_internal.h
index 58fa749e9002..6dfdef663943 100644
--- a/regex_internal.h
+++ b/regex_internal.h
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#ifndef _REGEX_INTERNAL_H
#define _REGEX_INTERNAL_H 1
@@ -39,6 +38,20 @@
#if defined HAVE_WCTYPE_H || defined _LIBC
# include <wctype.h>
#endif /* HAVE_WCTYPE_H || _LIBC */
+#if defined HAVE_STDBOOL_H || defined _LIBC
+# include <stdbool.h>
+#endif /* HAVE_STDBOOL_H || _LIBC */
+#if defined HAVE_STDINT_H || defined _LIBC
+# include <stdint.h>
+#endif /* HAVE_STDINT_H || _LIBC */
+#if defined _LIBC
+# include <bits/libc-lock.h>
+#else
+# define __libc_lock_define(CLASS,NAME)
+# define __libc_lock_init(NAME) do { } while (0)
+# define __libc_lock_lock(NAME) do { } while (0)
+# define __libc_lock_unlock(NAME) do { } while (0)
+#endif
/* In case that the system doesn't have isblank(). */
#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
@@ -60,7 +73,7 @@
# ifdef _LIBC
# undef gettext
# define gettext(msgid) \
- INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
+ __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES)
# endif
#else
# define gettext(msgid) (msgid)
@@ -72,6 +85,11 @@
# define gettext_noop(String) String
#endif
+/* For loser systems without the definition. */
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
# define RE_ENABLE_I18N
#endif
@@ -83,8 +101,6 @@
# define inline
#endif
-/* Number of bits in a byte. */
-#define BYTE_BITS 8
/* Number of single byte character. */
#define SBC_MAX 256
@@ -99,6 +115,7 @@
# define __wctype wctype
# define __iswctype iswctype
# define __btowc btowc
+# define __mbrtowc mbrtowc
# define __mempcpy mempcpy
# define __wcrtomb wcrtomb
# define __regfree regfree
@@ -114,26 +131,28 @@
extern const char __re_error_msgid[] attribute_hidden;
extern const size_t __re_error_msgid_idx[] attribute_hidden;
-/* Number of bits in an unsinged int. */
-#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
-/* Number of unsigned int in an bit_set. */
-#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
-typedef unsigned int bitset[BITSET_UINTS];
-typedef unsigned int *re_bitset_ptr_t;
-typedef const unsigned int *re_const_bitset_ptr_t;
-
-#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
-#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
-#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
-#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
-#define bitset_set_all(set) \
- memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
-#define bitset_copy(dest,src) \
- memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
-static inline void bitset_not (bitset set);
-static inline void bitset_merge (bitset dest, const bitset src);
-static inline void bitset_not_merge (bitset dest, const bitset src);
-static inline void bitset_mask (bitset dest, const bitset src);
+/* An integer used to represent a set of bits. It must be unsigned,
+ and must be at least as wide as unsigned int. */
+typedef unsigned long int bitset_word_t;
+/* All bits set in a bitset_word_t. */
+#define BITSET_WORD_MAX ULONG_MAX
+/* Number of bits in a bitset_word_t. */
+#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
+/* Number of bitset_word_t in a bit_set. */
+#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
+typedef bitset_word_t bitset_t[BITSET_WORDS];
+typedef bitset_word_t *re_bitset_ptr_t;
+typedef const bitset_word_t *re_const_bitset_ptr_t;
+
+#define bitset_set(set,i) \
+ (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
+#define bitset_clear(set,i) \
+ (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_contain(set,i) \
+ (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
+#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
+#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
#define PREV_WORD_CONSTRAINT 0x0001
#define PREV_NOTWORD_CONSTRAINT 0x0002
@@ -339,7 +358,7 @@ struct re_string_t
the beginning of the input string. */
unsigned int tip_context;
/* The translation passed as a part of an argument of re_compile_pattern. */
- unsigned RE_TRANSLATE_TYPE trans;
+ RE_TRANSLATE_TYPE trans;
/* Copy of re_dfa_t's word_char. */
re_const_bitset_ptr_t word_char;
/* 1 if REG_ICASE. */
@@ -366,44 +385,20 @@ typedef struct re_dfa_t re_dfa_t;
# endif
#endif
-#ifndef RE_NO_INTERNAL_PROTOTYPES
-static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
- int len, int init_len,
- RE_TRANSLATE_TYPE trans, int icase,
- const re_dfa_t *dfa)
- internal_function;
-static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
- int len, RE_TRANSLATE_TYPE trans,
- int icase, const re_dfa_t *dfa)
- internal_function;
-static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
- int eflags) internal_function;
+#ifndef NOT_IN_libc
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
int new_buf_len)
internal_function;
# ifdef RE_ENABLE_I18N
static void build_wcs_buffer (re_string_t *pstr) internal_function;
-static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
+static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
+ internal_function;
# endif /* RE_ENABLE_I18N */
static void build_upper_buffer (re_string_t *pstr) internal_function;
static void re_string_translate_buffer (re_string_t *pstr) internal_function;
-static void re_string_destruct (re_string_t *pstr) internal_function;
-# ifdef RE_ENABLE_I18N
-static int re_string_elem_size_at (const re_string_t *pstr, int idx)
- internal_function __attribute ((pure));
-static inline int re_string_char_size_at (const re_string_t *pstr, int idx)
- internal_function __attribute ((pure));
-static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx)
- internal_function __attribute ((pure));
-# endif /* RE_ENABLE_I18N */
static unsigned int re_string_context_at (const re_string_t *input, int idx,
int eflags)
internal_function __attribute ((pure));
-static unsigned char re_string_peek_byte_case (const re_string_t *pstr,
- int idx)
- internal_function __attribute ((pure));
-static unsigned char re_string_fetch_byte_case (re_string_t *pstr)
- internal_function __attribute ((pure));
#endif
#define re_string_peek_byte(pstr, offset) \
((pstr)->mbs[(pstr)->cur_idx + offset])
@@ -422,6 +417,21 @@ static unsigned char re_string_fetch_byte_case (re_string_t *pstr)
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+#include <alloca.h>
+
+#ifndef _LIBC
+# if HAVE_ALLOCA
+/* The OS usually guarantees only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ allocate anything larger than 4096 bytes. Also care for the possibility
+ of a few compiler-allocated temporary stack slots. */
+# define __libc_use_alloca(n) ((n) < 4032)
+# else
+/* alloca is implemented with malloc, so just use malloc. */
+# define __libc_use_alloca(n) 0
+# endif
+#endif
+
#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
#define re_free(p) free (p)
@@ -533,7 +543,6 @@ typedef struct
{
int str_idx;
int node;
- int next_last_offset;
state_array_t *path;
int alasts; /* Allocation size of LASTS. */
int nlasts; /* The number of LASTS. */
@@ -556,9 +565,9 @@ typedef struct
/* The string object corresponding to the input string. */
re_string_t input;
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
- re_dfa_t *const dfa;
+ const re_dfa_t *const dfa;
#else
- re_dfa_t *dfa;
+ const re_dfa_t *dfa;
#endif
/* EFLAGS of the argument of regexec. */
int eflags;
@@ -605,8 +614,8 @@ struct re_fail_stack_t
struct re_dfa_t
{
re_token_t *nodes;
- int nodes_alloc;
- int nodes_len;
+ size_t nodes_alloc;
+ size_t nodes_len;
int *nexts;
int *org_indices;
re_node_set *edests;
@@ -624,13 +633,12 @@ struct re_dfa_t
/* number of subexpressions `re_nsub' is in regex_t. */
unsigned int state_hash_mask;
- int states_alloc;
int init_node;
int nbackref; /* The number of backreference in this dfa. */
/* Bitmap expressing which backreference is used. */
- unsigned int used_bkref_map;
- unsigned int completed_bkref_map;
+ bitset_word_t used_bkref_map;
+ bitset_word_t completed_bkref_map;
unsigned int has_plural_match : 1;
/* If this dfa has "multibyte node", which is a backreference or
@@ -641,52 +649,20 @@ struct re_dfa_t
unsigned int map_notascii : 1;
unsigned int word_ops_used : 1;
int mb_cur_max;
- bitset word_char;
+ bitset_t word_char;
reg_syntax_t syntax;
int *subexp_map;
#ifdef DEBUG
char* re_str;
#endif
+ __libc_lock_define (, lock)
};
-#ifndef RE_NO_INTERNAL_PROTOTYPES
-static reg_errcode_t re_node_set_alloc (re_node_set *set, int size) internal_function;
-static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem) internal_function;
-static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1,
- int elem2) internal_function;
#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
-static reg_errcode_t re_node_set_init_copy (re_node_set *dest,
- const re_node_set *src) internal_function;
-static reg_errcode_t re_node_set_add_intersect (re_node_set *dest,
- const re_node_set *src1,
- const re_node_set *src2) internal_function;
-static reg_errcode_t re_node_set_init_union (re_node_set *dest,
- const re_node_set *src1,
- const re_node_set *src2) internal_function;
-static reg_errcode_t re_node_set_merge (re_node_set *dest,
- const re_node_set *src) internal_function;
-static int re_node_set_insert (re_node_set *set, int elem) internal_function;
-static int re_node_set_insert_last (re_node_set *set,
- int elem) internal_function;
-static int re_node_set_compare (const re_node_set *set1,
- const re_node_set *set2)
- internal_function __attribute ((pure));
-static int re_node_set_contains (const re_node_set *set, int elem)
- internal_function __attribute ((pure));
-static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
#define re_node_set_remove(set,id) \
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
#define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems)
-static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function;
-static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
- const re_node_set *nodes) internal_function;
-static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
- re_dfa_t *dfa,
- const re_node_set *nodes,
- unsigned int context) internal_function;
-static void free_state (re_dfastate_t *state) internal_function;
-#endif
typedef enum
@@ -712,41 +688,33 @@ typedef struct
/* Inline functions for bitset operation. */
static inline void
-bitset_not (bitset set)
+bitset_not (bitset_t set)
{
int bitset_i;
- for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
set[bitset_i] = ~set[bitset_i];
}
static inline void
-bitset_merge (bitset dest, const bitset src)
+bitset_merge (bitset_t dest, const bitset_t src)
{
int bitset_i;
- for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
dest[bitset_i] |= src[bitset_i];
}
static inline void
-bitset_not_merge (bitset dest, const bitset src)
-{
- int i;
- for (i = 0; i < BITSET_UINTS; ++i)
- dest[i] |= ~src[i];
-}
-
-static inline void
-bitset_mask (bitset dest, const bitset src)
+bitset_mask (bitset_t dest, const bitset_t src)
{
int bitset_i;
- for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
dest[bitset_i] &= src[bitset_i];
}
-#if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES
+#ifdef RE_ENABLE_I18N
/* Inline functions for re_string. */
static inline int
-internal_function
+internal_function __attribute ((pure))
re_string_char_size_at (const re_string_t *pstr, int idx)
{
int byte_idx;
@@ -759,7 +727,7 @@ re_string_char_size_at (const re_string_t *pstr, int idx)
}
static inline wint_t
-internal_function
+internal_function __attribute ((pure))
re_string_wchar_at (const re_string_t *pstr, int idx)
{
if (pstr->mb_cur_max == 1)
@@ -767,15 +735,15 @@ re_string_wchar_at (const re_string_t *pstr, int idx)
return (wint_t) pstr->wcs[idx];
}
+# ifndef NOT_IN_libc
static int
-internal_function
+internal_function __attribute ((pure))
re_string_elem_size_at (const re_string_t *pstr, int idx)
{
-#ifdef _LIBC
+# ifdef _LIBC
const unsigned char *p, *extra;
const int32_t *table, *indirect;
- int32_t tmp;
-# include <locale/weight.h>
+# include <locale/weight.h>
uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules != 0)
@@ -786,13 +754,14 @@ re_string_elem_size_at (const re_string_t *pstr, int idx)
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_INDIRECTMB);
p = pstr->mbs + idx;
- tmp = findidx (&p);
+ findidx (&p, pstr->len - idx);
return p - pstr->mbs - idx;
}
else
-#endif /* _LIBC */
+# endif /* _LIBC */
return 1;
}
+# endif
#endif /* RE_ENABLE_I18N */
#endif /* _REGEX_INTERNAL_H */
diff --git a/regexec.c b/regexec.c
index 3c226e3c20cd..ec4ae1316ffa 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002-2005,2007,2009,2010,2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
int n) internal_function;
@@ -25,7 +24,7 @@ static void match_ctx_free (re_match_context_t *cache) internal_function;
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
int str_idx, int from, int to)
internal_function;
-static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
+static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
internal_function;
static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
int str_idx) internal_function;
@@ -52,81 +51,76 @@ static int re_search_stub (struct re_pattern_buffer *bufp,
int ret_len) internal_function;
static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
int nregs, int regs_allocated) internal_function;
-static inline re_dfastate_t *acquire_init_state_context
- (reg_errcode_t *err, const re_match_context_t *mctx, int idx)
- __attribute ((always_inline)) internal_function;
static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
internal_function;
static int check_matching (re_match_context_t *mctx, int fl_longest_match,
- int *p_match_first)
- internal_function;
-static int check_halt_node_context (const re_dfa_t *dfa, int node,
- unsigned int context) internal_function;
+ int *p_match_first) internal_function;
static int check_halt_state_context (const re_match_context_t *mctx,
const re_dfastate_t *state, int idx)
internal_function;
-static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
regmatch_t *prev_idx_match, int cur_node,
int cur_idx, int nmatch) internal_function;
-static int proceed_next_node (const re_match_context_t *mctx,
- int nregs, regmatch_t *regs,
- int *pidx, int node, re_node_set *eps_via_nodes,
- struct re_fail_stack_t *fs) internal_function;
static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
int str_idx, int dest_node, int nregs,
regmatch_t *regs,
- re_node_set *eps_via_nodes) internal_function;
-static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
- regmatch_t *regs, re_node_set *eps_via_nodes) internal_function;
+ re_node_set *eps_via_nodes)
+ internal_function;
static reg_errcode_t set_regs (const regex_t *preg,
const re_match_context_t *mctx,
size_t nmatch, regmatch_t *pmatch,
int fl_backtrack) internal_function;
-static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
+ internal_function;
#ifdef RE_ENABLE_I18N
static int sift_states_iter_mb (const re_match_context_t *mctx,
re_sift_context_t *sctx,
- int node_idx, int str_idx, int max_str_idx) internal_function;
+ int node_idx, int str_idx, int max_str_idx)
+ internal_function;
#endif /* RE_ENABLE_I18N */
-static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
- re_sift_context_t *sctx) internal_function;
-static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
+static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
+ re_sift_context_t *sctx)
+ internal_function;
+static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
re_sift_context_t *sctx, int str_idx,
- re_node_set *cur_dest) internal_function;
-static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
+ re_node_set *cur_dest)
+ internal_function;
+static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
re_sift_context_t *sctx,
int str_idx,
- re_node_set *dest_nodes) internal_function;
-static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
- re_node_set *dest_nodes,
- const re_node_set *candidates) internal_function;
-static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
+ re_node_set *dest_nodes)
+ internal_function;
+static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
re_node_set *dest_nodes,
- const re_node_set *and_nodes) internal_function;
-static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
+ const re_node_set *candidates)
+ internal_function;
+static int check_dst_limits (const re_match_context_t *mctx,
+ re_node_set *limits,
int dst_node, int dst_idx, int src_node,
int src_idx) internal_function;
-static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx,
+static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
int boundaries, int subexp_idx,
- int from_node, int bkref_idx) internal_function;
-static int check_dst_limits_calc_pos (re_match_context_t *mctx,
+ int from_node, int bkref_idx)
+ internal_function;
+static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
int limit, int subexp_idx,
int node, int str_idx,
int bkref_idx) internal_function;
-static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
+static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
re_node_set *dest_nodes,
const re_node_set *candidates,
re_node_set *limits,
struct re_backref_cache_entry *bkref_ents,
int str_idx) internal_function;
-static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
+static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
re_sift_context_t *sctx,
- int str_idx, const re_node_set *candidates) internal_function;
-static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
- int next_state_log_idx) internal_function;
-static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
- re_dfastate_t **src, int num) internal_function;
+ int str_idx, const re_node_set *candidates)
+ internal_function;
+static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
+ re_dfastate_t **dst,
+ re_dfastate_t **src, int num)
+ internal_function;
static re_dfastate_t *find_recover_state (reg_errcode_t *err,
re_match_context_t *mctx) internal_function;
static re_dfastate_t *transit_state (reg_errcode_t *err,
@@ -134,27 +128,33 @@ static re_dfastate_t *transit_state (reg_errcode_t *err,
re_dfastate_t *state) internal_function;
static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
re_match_context_t *mctx,
- re_dfastate_t *next_state) internal_function;
+ re_dfastate_t *next_state)
+ internal_function;
static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
re_node_set *cur_nodes,
int str_idx) internal_function;
#if 0
static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
re_match_context_t *mctx,
- re_dfastate_t *pstate) internal_function;
+ re_dfastate_t *pstate)
+ internal_function;
#endif
#ifdef RE_ENABLE_I18N
static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
- re_dfastate_t *pstate) internal_function;
+ re_dfastate_t *pstate)
+ internal_function;
#endif /* RE_ENABLE_I18N */
static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
- const re_node_set *nodes) internal_function;
+ const re_node_set *nodes)
+ internal_function;
static reg_errcode_t get_subexp (re_match_context_t *mctx,
- int bkref_node, int bkref_str_idx) internal_function;
+ int bkref_node, int bkref_str_idx)
+ internal_function;
static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
const re_sub_match_top_t *sub_top,
re_sub_match_last_t *sub_last,
- int bkref_node, int bkref_str) internal_function;
+ int bkref_node, int bkref_str)
+ internal_function;
static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
int subexp_idx, int type) internal_function;
static reg_errcode_t check_arrival (re_match_context_t *mctx,
@@ -164,34 +164,41 @@ static reg_errcode_t check_arrival (re_match_context_t *mctx,
static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
int str_idx,
re_node_set *cur_nodes,
- re_node_set *next_nodes) internal_function;
-static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
+ re_node_set *next_nodes)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
re_node_set *cur_nodes,
- int ex_subexp, int type) internal_function;
-static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
+ int ex_subexp, int type)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
re_node_set *dst_nodes,
int target, int ex_subexp,
int type) internal_function;
static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
re_node_set *cur_nodes, int cur_str,
- int subexp_num, int type) internal_function;
-static int build_trtable (re_dfa_t *dfa,
+ int subexp_num, int type)
+ internal_function;
+static int build_trtable (const re_dfa_t *dfa,
re_dfastate_t *state) internal_function;
#ifdef RE_ENABLE_I18N
-static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
- const re_string_t *input, int idx) internal_function;
+static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+ const re_string_t *input, int idx)
+ internal_function;
# ifdef _LIBC
static unsigned int find_collation_sequence_value (const unsigned char *mbs,
- size_t name_len) internal_function;
+ size_t name_len)
+ internal_function;
# endif /* _LIBC */
#endif /* RE_ENABLE_I18N */
-static int group_nodes_into_DFAstates (re_dfa_t *dfa,
+static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
const re_dfastate_t *state,
re_node_set *states_node,
- bitset *states_ch) internal_function;
+ bitset_t *states_ch) internal_function;
static int check_node_accept (const re_match_context_t *mctx,
- const re_token_t *node, int idx) internal_function;
-static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
+ const re_token_t *node, int idx)
+ internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+ internal_function;
/* Entry point for POSIX code. */
@@ -219,6 +226,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
{
reg_errcode_t err;
int start, length;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
return REG_BADPAT;
@@ -233,12 +241,15 @@ regexec (preg, string, nmatch, pmatch, eflags)
start = 0;
length = strlen (string);
}
+
+ __libc_lock_lock (dfa->lock);
if (preg->no_sub)
err = re_search_internal (preg, string, length, start, length - start,
length, 0, NULL, eflags);
else
err = re_search_internal (preg, string, length, start, length - start,
length, nmatch, pmatch, eflags);
+ __libc_lock_unlock (dfa->lock);
return err != REG_NOERROR;
}
@@ -356,33 +367,34 @@ re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
const char *str;
int rval;
int len = length1 + length2;
- int free_str = 0;
+ char *s = NULL;
- if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+ if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
return -2;
/* Concatenate the strings. */
if (length2 > 0)
if (length1 > 0)
{
- char *s = re_malloc (char, len);
+ s = re_malloc (char, len);
if (BE (s == NULL, 0))
return -2;
+#ifdef _LIBC
+ memcpy (__mempcpy (s, string1, length1), string2, length2);
+#else
memcpy (s, string1, length1);
memcpy (s + length1, string2, length2);
+#endif
str = s;
- free_str = 1;
}
else
str = string2;
else
str = string1;
- rval = re_search_stub (bufp, str, len, start, range, stop, regs,
- ret_len);
- if (free_str)
- re_free ((char *) str);
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
+ re_free (s);
return rval;
}
@@ -402,6 +414,7 @@ re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
regmatch_t *pmatch;
int nregs, rval;
int eflags = 0;
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
/* Check for out-of-range. */
if (BE (start < 0 || start > length, 0))
@@ -411,6 +424,8 @@ re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
else if (BE (start + range < 0, 0))
range = -start;
+ __libc_lock_lock (dfa->lock);
+
eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
@@ -439,7 +454,10 @@ re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
nregs = bufp->re_nsub + 1;
pmatch = re_malloc (regmatch_t, nregs);
if (BE (pmatch == NULL, 0))
- return -2;
+ {
+ rval = -2;
+ goto out;
+ }
result = re_search_internal (bufp, string, length, start, range, stop,
nregs, pmatch, eflags);
@@ -469,6 +487,8 @@ re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
rval = pmatch[0].rm_so;
}
re_free (pmatch);
+ out:
+ __libc_lock_unlock (dfa->lock);
return rval;
}
@@ -488,9 +508,14 @@ re_copy_regs (regs, pmatch, nregs, regs_allocated)
if (regs_allocated == REGS_UNALLOCATED)
{ /* No. So allocate them with malloc. */
regs->start = re_malloc (regoff_t, need_regs);
- regs->end = re_malloc (regoff_t, need_regs);
- if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+ if (BE (regs->start == NULL, 0))
return REGS_UNALLOCATED;
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->end == NULL, 0))
+ {
+ re_free (regs->start);
+ return REGS_UNALLOCATED;
+ }
regs->num_regs = need_regs;
}
else if (regs_allocated == REGS_REALLOCATE)
@@ -500,9 +525,15 @@ re_copy_regs (regs, pmatch, nregs, regs_allocated)
if (BE (need_regs > regs->num_regs, 0))
{
regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
- regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
- if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+ regoff_t *new_end;
+ if (BE (new_start == NULL, 0))
return REGS_UNALLOCATED;
+ new_end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (new_end == NULL, 0))
+ {
+ re_free (new_start);
+ return REGS_UNALLOCATED;
+ }
regs->start = new_start;
regs->end = new_end;
regs->num_regs = need_regs;
@@ -593,6 +624,7 @@ re_exec (s)
(START + RANGE >= 0 && START + RANGE <= LENGTH) */
static reg_errcode_t
+__attribute_warn_unused_result__
re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
eflags)
const regex_t *preg;
@@ -602,7 +634,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
regmatch_t pmatch[];
{
reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
int left_lim, right_lim, incr;
int fl_longest_match, match_first, match_kind, match_last = -1;
int extra_nmatch;
@@ -614,7 +646,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
#endif
char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
&& range && !preg->can_be_null) ? preg->fastmap : NULL;
- unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
+ RE_TRANSLATE_TYPE t = preg->translate;
#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
memset (&mctx, '\0', sizeof (re_match_context_t));
@@ -644,7 +676,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
|| !preg->newline_anchor))
{
if (start != 0 && start + range != 0)
- return REG_NOMATCH;
+ return REG_NOMATCH;
start = range = 0;
}
@@ -669,6 +701,13 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
multi character collating element. */
if (nmatch > 1 || dfa->has_mb_node)
{
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+
mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
if (BE (mctx.state_log == NULL, 0))
{
@@ -776,10 +815,10 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
break;
match_first += incr;
if (match_first < left_lim || match_first > right_lim)
- {
- err = REG_NOMATCH;
- goto free_return;
- }
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
}
break;
}
@@ -871,14 +910,14 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
#ifdef RE_ENABLE_I18N
if (BE (mctx.input.offsets_needed != 0, 0))
{
- if (pmatch[reg_idx].rm_so == mctx.input.valid_len)
- pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len;
- else
- pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so];
- if (pmatch[reg_idx].rm_eo == mctx.input.valid_len)
- pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len;
- else
- pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo];
+ pmatch[reg_idx].rm_so =
+ (pmatch[reg_idx].rm_so == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_so]);
+ pmatch[reg_idx].rm_eo =
+ (pmatch[reg_idx].rm_eo == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
}
#else
assert (mctx.input.offsets_needed == 0);
@@ -893,14 +932,14 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
}
if (dfa->subexp_map)
- for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
- if (dfa->subexp_map[reg_idx] != reg_idx)
- {
- pmatch[reg_idx + 1].rm_so
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
- pmatch[reg_idx + 1].rm_eo
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
- }
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ if (dfa->subexp_map[reg_idx] != reg_idx)
+ {
+ pmatch[reg_idx + 1].rm_so
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+ pmatch[reg_idx + 1].rm_eo
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+ }
}
free_return:
@@ -912,10 +951,11 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
}
static reg_errcode_t
+__attribute_warn_unused_result__
prune_impossible_nodes (mctx)
re_match_context_t *mctx;
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int halt_node, match_last;
reg_errcode_t ret;
re_dfastate_t **sifted_states;
@@ -926,6 +966,11 @@ prune_impossible_nodes (mctx)
#endif
match_last = mctx->match_last;
halt_node = mctx->last_node;
+
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
+ return REG_ESPACE;
+
sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
if (BE (sifted_states == NULL, 0))
{
@@ -980,6 +1025,11 @@ prune_impossible_nodes (mctx)
re_node_set_free (&sctx.limits);
if (BE (ret != REG_NOERROR, 0))
goto free_return;
+ if (sifted_states[0] == NULL)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
}
re_free (mctx->state_log);
mctx->state_log = sifted_states;
@@ -998,12 +1048,11 @@ prune_impossible_nodes (mctx)
since initial states may have constraints like "\<", "^", etc.. */
static inline re_dfastate_t *
-acquire_init_state_context (err, mctx, idx)
- reg_errcode_t *err;
- const re_match_context_t *mctx;
- int idx;
+__attribute ((always_inline)) internal_function
+acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
+ int idx)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
if (dfa->init_state->has_constraint)
{
unsigned int context;
@@ -1041,12 +1090,11 @@ acquire_init_state_context (err, mctx, idx)
index of the buffer. */
static int
-check_matching (mctx, fl_longest_match, p_match_first)
- re_match_context_t *mctx;
- int fl_longest_match;
- int *p_match_first;
+internal_function __attribute_warn_unused_result__
+check_matching (re_match_context_t *mctx, int fl_longest_match,
+ int *p_match_first)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int match = 0;
int match_last = -1;
@@ -1081,7 +1129,7 @@ check_matching (mctx, fl_longest_match, p_match_first)
{
err = transit_state_bkref (mctx, &cur_state->nodes);
if (BE (err != REG_NOERROR, 0))
- return err;
+ return err;
}
}
}
@@ -1107,17 +1155,18 @@ check_matching (mctx, fl_longest_match, p_match_first)
re_dfastate_t *old_state = cur_state;
int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
- if (BE (next_char_idx >= mctx->input.bufs_len, 0)
- || (BE (next_char_idx >= mctx->input.valid_len, 0)
- && mctx->input.valid_len < mctx->input.len))
- {
- err = extend_buffers (mctx);
- if (BE (err != REG_NOERROR, 0))
+ if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
+ && mctx->input.bufs_len < mctx->input.len)
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
{
assert (err == REG_ESPACE);
return -2;
}
- }
+ }
cur_state = transit_state (&err, mctx, cur_state);
if (mctx->state_log != NULL)
@@ -1173,10 +1222,9 @@ check_matching (mctx, fl_longest_match, p_match_first)
/* Check NODE match the current context. */
-static int check_halt_node_context (dfa, node, context)
- const re_dfa_t *dfa;
- int node;
- unsigned int context;
+static int
+internal_function
+check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
{
re_token_type_t type = dfa->nodes[node].type;
unsigned int constraint = dfa->nodes[node].constraint;
@@ -1194,10 +1242,9 @@ static int check_halt_node_context (dfa, node, context)
match the context, return the node. */
static int
-check_halt_state_context (mctx, state, idx)
- const re_match_context_t *mctx;
- const re_dfastate_t *state;
- int idx;
+internal_function
+check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, int idx)
{
int i;
unsigned int context;
@@ -1217,16 +1264,13 @@ check_halt_state_context (mctx, state, idx)
of errors. */
static int
-proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
- const re_match_context_t *mctx;
- regmatch_t *regs;
- int nregs, *pidx, node;
- re_node_set *eps_via_nodes;
- struct re_fail_stack_t *fs;
+internal_function
+proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
+ int *pidx, int node, re_node_set *eps_via_nodes,
+ struct re_fail_stack_t *fs)
{
- re_dfa_t *const dfa = mctx->dfa;
- int i, err, dest_node;
- dest_node = -1;
+ const re_dfa_t *const dfa = mctx->dfa;
+ int i, err;
if (IS_EPSILON_NODE (dfa->nodes[node].type))
{
re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
@@ -1241,20 +1285,20 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
int candidate = edests->elems[i];
if (!re_node_set_contains (cur_nodes, candidate))
continue;
- if (dest_node == -1)
+ if (dest_node == -1)
dest_node = candidate;
- else
+ else
{
/* In order to avoid infinite loop like "(a*)*", return the second
- epsilon-transition if the first was already considered. */
+ epsilon-transition if the first was already considered. */
if (re_node_set_contains (eps_via_nodes, dest_node))
- return candidate;
+ return candidate;
/* Otherwise, push the second epsilon-transition on the fail stack. */
else if (fs != NULL
&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
- eps_via_nodes))
+ eps_via_nodes))
return -2;
/* We know we are going to exit. */
@@ -1292,6 +1336,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
if (naccepted == 0)
{
+ int dest_node;
err = re_node_set_insert (eps_via_nodes, node);
if (BE (err < 0, 0))
return -2;
@@ -1305,7 +1350,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
if (naccepted != 0
|| check_node_accept (mctx, dfa->nodes + node, *pidx))
{
- dest_node = dfa->nexts[node];
+ int dest_node = dfa->nexts[node];
*pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
|| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
@@ -1319,11 +1364,9 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
}
static reg_errcode_t
-push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes)
- struct re_fail_stack_t *fs;
- int str_idx, dest_node, nregs;
- regmatch_t *regs;
- re_node_set *eps_via_nodes;
+internal_function __attribute_warn_unused_result__
+push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
+ int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
{
reg_errcode_t err;
int num = fs->num++;
@@ -1348,11 +1391,9 @@ push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes)
}
static int
-pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
- struct re_fail_stack_t *fs;
- int *pidx, nregs;
- regmatch_t *regs;
- re_node_set *eps_via_nodes;
+internal_function
+pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+ regmatch_t *regs, re_node_set *eps_via_nodes)
{
int num = --fs->num;
assert (num >= 0);
@@ -1370,19 +1411,17 @@ pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
static reg_errcode_t
-set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
- const regex_t *preg;
- const re_match_context_t *mctx;
- size_t nmatch;
- regmatch_t *pmatch;
- int fl_backtrack;
+internal_function __attribute_warn_unused_result__
+set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
+ regmatch_t *pmatch, int fl_backtrack)
{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
int idx, cur_node;
re_node_set eps_via_nodes;
struct re_fail_stack_t *fs;
struct re_fail_stack_t fs_body = { 0, 2, NULL };
regmatch_t *prev_idx_match;
+ int prev_idx_match_malloced = 0;
#ifdef DEBUG
assert (nmatch > 1);
@@ -1401,7 +1440,18 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
cur_node = dfa->init_node;
re_node_set_init_empty (&eps_via_nodes);
- prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch);
+ if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
+ prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
+ else
+ {
+ prev_idx_match = re_malloc (regmatch_t, nmatch);
+ if (prev_idx_match == NULL)
+ {
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
+ }
+ prev_idx_match_malloced = 1;
+ }
memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
@@ -1419,6 +1469,8 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
if (reg_idx == nmatch)
{
re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
return free_fail_stack_return (fs);
}
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
@@ -1427,6 +1479,8 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
else
{
re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
return REG_NOERROR;
}
}
@@ -1440,6 +1494,8 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
if (BE (cur_node == -2, 0))
{
re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
free_fail_stack_return (fs);
return REG_ESPACE;
}
@@ -1449,17 +1505,21 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
else
{
re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
return REG_NOMATCH;
}
}
}
re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
return free_fail_stack_return (fs);
}
static reg_errcode_t
-free_fail_stack_return (fs)
- struct re_fail_stack_t *fs;
+internal_function
+free_fail_stack_return (struct re_fail_stack_t *fs)
{
if (fs)
{
@@ -1475,10 +1535,9 @@ free_fail_stack_return (fs)
}
static void
-update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
- re_dfa_t *dfa;
- regmatch_t *pmatch, *prev_idx_match;
- int cur_node, cur_idx, nmatch;
+internal_function
+update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
{
int type = dfa->nodes[cur_node].type;
if (type == OP_OPEN_SUBEXP)
@@ -1548,9 +1607,8 @@ update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
((state) != NULL && re_node_set_contains (&(state)->nodes, node))
static reg_errcode_t
-sift_states_backward (mctx, sctx)
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
+internal_function
+sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
{
reg_errcode_t err;
int null_cnt = 0;
@@ -1588,7 +1646,7 @@ sift_states_backward (mctx, sctx)
if (mctx->state_log[str_idx])
{
err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
- if (BE (err != REG_NOERROR, 0))
+ if (BE (err != REG_NOERROR, 0))
goto free_return;
}
@@ -1607,14 +1665,12 @@ sift_states_backward (mctx, sctx)
}
static reg_errcode_t
-build_sifted_states (mctx, sctx, str_idx, cur_dest)
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int str_idx;
- re_node_set *cur_dest;
+internal_function __attribute_warn_unused_result__
+build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int str_idx, re_node_set *cur_dest)
{
- re_dfa_t *const dfa = mctx->dfa;
- re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
int i;
/* Then build the next sifted state.
@@ -1671,13 +1727,13 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest)
/* Helper functions. */
static reg_errcode_t
-clean_state_log_if_needed (mctx, next_state_log_idx)
- re_match_context_t *mctx;
- int next_state_log_idx;
+internal_function
+clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
{
int top = mctx->state_log_top;
- if (next_state_log_idx >= mctx->input.bufs_len
+ if ((next_state_log_idx >= mctx->input.bufs_len
+ && mctx->input.bufs_len < mctx->input.len)
|| (next_state_log_idx >= mctx->input.valid_len
&& mctx->input.valid_len < mctx->input.len))
{
@@ -1697,11 +1753,9 @@ clean_state_log_if_needed (mctx, next_state_log_idx)
}
static reg_errcode_t
-merge_state_array (dfa, dst, src, num)
- re_dfa_t *dfa;
- re_dfastate_t **dst;
- re_dfastate_t **src;
- int num;
+internal_function
+merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
+ re_dfastate_t **src, int num)
{
int st_idx;
reg_errcode_t err;
@@ -1726,14 +1780,13 @@ merge_state_array (dfa, dst, src, num)
}
static reg_errcode_t
-update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int str_idx;
- re_node_set *dest_nodes;
+internal_function
+update_cur_sifted_state (const re_match_context_t *mctx,
+ re_sift_context_t *sctx, int str_idx,
+ re_node_set *dest_nodes)
{
- re_dfa_t *const dfa = mctx->dfa;
- reg_errcode_t err;
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err = REG_NOERROR;
const re_node_set *candidates;
candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
: &mctx->state_log[str_idx]->nodes);
@@ -1775,10 +1828,9 @@ update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
}
static reg_errcode_t
-add_epsilon_src_nodes (dfa, dest_nodes, candidates)
- re_dfa_t *dfa;
- re_node_set *dest_nodes;
- const re_node_set *candidates;
+internal_function __attribute_warn_unused_result__
+add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates)
{
reg_errcode_t err = REG_NOERROR;
int i;
@@ -1791,21 +1843,23 @@ add_epsilon_src_nodes (dfa, dest_nodes, candidates)
{
err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
if (BE (err != REG_NOERROR, 0))
- return REG_ESPACE;
+ return REG_ESPACE;
for (i = 0; i < dest_nodes->nelem; i++)
- re_node_set_merge (&state->inveclosure,
- dfa->inveclosures + dest_nodes->elems[i]);
+ {
+ err = re_node_set_merge (&state->inveclosure,
+ dfa->inveclosures + dest_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ }
}
return re_node_set_add_intersect (dest_nodes, candidates,
&state->inveclosure);
}
static reg_errcode_t
-sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
- re_dfa_t *dfa;
- int node;
- re_node_set *dest_nodes;
- const re_node_set *candidates;
+internal_function
+sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
+ const re_node_set *candidates)
{
int ecl_idx;
reg_errcode_t err;
@@ -1852,12 +1906,11 @@ sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
}
static int
-check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
- re_match_context_t *mctx;
- re_node_set *limits;
- int dst_node, dst_idx, src_node, src_idx;
+internal_function
+check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
+ int dst_node, int dst_idx, int src_node, int src_idx)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int lim_idx, src_pos, dst_pos;
int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
@@ -1889,12 +1942,12 @@ check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
}
static int
-check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
- re_match_context_t *mctx;
- int boundaries, subexp_idx, from_node, bkref_idx;
+internal_function
+check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
+ int subexp_idx, int from_node, int bkref_idx)
{
- re_dfa_t *const dfa = mctx->dfa;
- re_node_set *eclosures = dfa->eclosures + from_node;
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *eclosures = dfa->eclosures + from_node;
int node_idx;
/* Else, we are on the boundary: examine the nodes on the epsilon
@@ -1909,14 +1962,15 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
{
struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
do
- {
+ {
int dst, cpos;
if (ent->node != node)
continue;
- if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
- && !(ent->eps_reachable_subexps_map & (1 << subexp_idx)))
+ if (subexp_idx < BITSET_WORD_BITS
+ && !(ent->eps_reachable_subexps_map
+ & ((bitset_word_t) 1 << subexp_idx)))
continue;
/* Recurse trying to reach the OP_OPEN_SUBEXP and
@@ -1929,9 +1983,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
if (dst == from_node)
{
if (boundaries & 1)
- return -1;
+ return -1;
else /* if (boundaries & 2) */
- return 0;
+ return 0;
}
cpos =
@@ -1942,8 +1996,10 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
if (cpos == 0 && (boundaries & 2))
return 0;
- ent->eps_reachable_subexps_map &= ~(1 << subexp_idx);
- }
+ if (subexp_idx < BITSET_WORD_BITS)
+ ent->eps_reachable_subexps_map
+ &= ~((bitset_word_t) 1 << subexp_idx);
+ }
while (ent++->more);
}
break;
@@ -1967,9 +2023,10 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
}
static int
-check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx)
- re_match_context_t *mctx;
- int limit, subexp_idx, from_node, str_idx, bkref_idx;
+internal_function
+check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
+ int subexp_idx, int from_node, int str_idx,
+ int bkref_idx)
{
struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
int boundaries;
@@ -1996,13 +2053,10 @@ check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_id
which are against limitations from DEST_NODES. */
static reg_errcode_t
-check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
- re_dfa_t *dfa;
- re_node_set *dest_nodes;
- const re_node_set *candidates;
- re_node_set *limits;
- struct re_backref_cache_entry *bkref_ents;
- int str_idx;
+internal_function
+check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates, re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents, int str_idx)
{
reg_errcode_t err;
int node_idx, lim_idx;
@@ -2087,13 +2141,11 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
}
static reg_errcode_t
-sift_states_bkref (mctx, sctx, str_idx, candidates)
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int str_idx;
- const re_node_set *candidates;
+internal_function __attribute_warn_unused_result__
+sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int str_idx, const re_node_set *candidates)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int node_idx, node;
re_sift_context_t local_sctx;
@@ -2121,7 +2173,10 @@ sift_states_bkref (mctx, sctx, str_idx, candidates)
enabled_idx = first_idx;
do
{
- int subexp_len, to_idx, dst_node;
+ int subexp_len;
+ int to_idx;
+ int dst_node;
+ int ret;
re_dfastate_t *cur_state;
if (entry->node != node)
@@ -2147,8 +2202,8 @@ sift_states_bkref (mctx, sctx, str_idx, candidates)
}
local_sctx.last_node = node;
local_sctx.last_str_idx = str_idx;
- err = re_node_set_insert (&local_sctx.limits, enabled_idx);
- if (BE (err < 0, 0))
+ ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
+ if (BE (ret < 0, 0))
{
err = REG_ESPACE;
goto free_return;
@@ -2169,7 +2224,7 @@ sift_states_bkref (mctx, sctx, str_idx, candidates)
re_node_set_remove (&local_sctx.limits, enabled_idx);
/* mctx->bkref_ents may have changed, reload the pointer. */
- entry = mctx->bkref_ents + enabled_idx;
+ entry = mctx->bkref_ents + enabled_idx;
}
while (enabled_idx++, entry++->more);
}
@@ -2186,12 +2241,11 @@ sift_states_bkref (mctx, sctx, str_idx, candidates)
#ifdef RE_ENABLE_I18N
static int
-sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
- const re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int node_idx, str_idx, max_str_idx;
+internal_function
+sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int node_idx, int str_idx, int max_str_idx)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int naccepted;
/* Check the node can accept `multi byte'. */
naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
@@ -2217,10 +2271,9 @@ sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
update the destination of STATE_LOG. */
static re_dfastate_t *
-transit_state (err, mctx, state)
- reg_errcode_t *err;
- re_match_context_t *mctx;
- re_dfastate_t *state;
+internal_function __attribute_warn_unused_result__
+transit_state (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
{
re_dfastate_t **trtable;
unsigned char ch;
@@ -2252,7 +2305,7 @@ transit_state (err, mctx, state)
trtable = state->word_trtable;
if (BE (trtable != NULL, 1))
- {
+ {
unsigned int context;
context
= re_string_context_at (&mctx->input,
@@ -2276,12 +2329,11 @@ transit_state (err, mctx, state)
/* Update the state_log if we need */
re_dfastate_t *
-merge_state_with_log (err, mctx, next_state)
- reg_errcode_t *err;
- re_match_context_t *mctx;
- re_dfastate_t *next_state;
+internal_function
+merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *next_state)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int cur_idx = re_string_cur_idx (&mctx->input);
if (cur_idx > mctx->state_log_top)
@@ -2299,21 +2351,21 @@ merge_state_with_log (err, mctx, next_state)
unsigned int context;
re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
/* If (state_log[cur_idx] != 0), it implies that cur_idx is
- the destination of a multibyte char/collating element/
- back reference. Then the next state is the union set of
- these destinations and the results of the transition table. */
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
pstate = mctx->state_log[cur_idx];
log_nodes = pstate->entrance_nodes;
if (next_state != NULL)
- {
- table_nodes = next_state->entrance_nodes;
- *err = re_node_set_init_union (&next_nodes, table_nodes,
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
log_nodes);
- if (BE (*err != REG_NOERROR, 0))
+ if (BE (*err != REG_NOERROR, 0))
return NULL;
- }
+ }
else
- next_nodes = *log_nodes;
+ next_nodes = *log_nodes;
/* Note: We already add the nodes of the initial state,
then we don't need to add them here. */
@@ -2321,12 +2373,12 @@ merge_state_with_log (err, mctx, next_state)
re_string_cur_idx (&mctx->input) - 1,
mctx->eflags);
next_state = mctx->state_log[cur_idx]
- = re_acquire_state_context (err, dfa, &next_nodes, context);
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
- this function is next_state and ERR is already set. */
+ this function is next_state and ERR is already set. */
if (table_nodes != NULL)
- re_node_set_free (&next_nodes);
+ re_node_set_free (&next_nodes);
}
if (BE (dfa->nbackref, 0) && next_state != NULL)
@@ -2356,11 +2408,10 @@ merge_state_with_log (err, mctx, next_state)
multi-byte match, then look in the log for a state
from which to restart matching. */
re_dfastate_t *
-find_recover_state (err, mctx)
- reg_errcode_t *err;
- re_match_context_t *mctx;
+internal_function
+find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
{
- re_dfastate_t *cur_state = NULL;
+ re_dfastate_t *cur_state;
do
{
int max = mctx->state_log_top;
@@ -2368,15 +2419,15 @@ find_recover_state (err, mctx)
do
{
- if (++cur_str_idx > max)
- return NULL;
- re_string_skip_bytes (&mctx->input, 1);
+ if (++cur_str_idx > max)
+ return NULL;
+ re_string_skip_bytes (&mctx->input, 1);
}
while (mctx->state_log[cur_str_idx] == NULL);
cur_state = merge_state_with_log (err, mctx, NULL);
}
- while (err == REG_NOERROR && cur_state == NULL);
+ while (*err == REG_NOERROR && cur_state == NULL);
return cur_state;
}
@@ -2388,12 +2439,11 @@ find_recover_state (err, mctx)
correspoding back references. */
static reg_errcode_t
-check_subexp_matching_top (mctx, cur_nodes, str_idx)
- re_match_context_t *mctx;
- re_node_set *cur_nodes;
- int str_idx;
+internal_function
+check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
+ int str_idx)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int node_idx;
reg_errcode_t err;
@@ -2406,8 +2456,9 @@ check_subexp_matching_top (mctx, cur_nodes, str_idx)
{
int node = cur_nodes->elems[node_idx];
if (dfa->nodes[node].type == OP_OPEN_SUBEXP
- && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
- && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
+ && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+ && (dfa->used_bkref_map
+ & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
{
err = match_ctx_add_subtop (mctx, node, str_idx);
if (BE (err != REG_NOERROR, 0))
@@ -2422,12 +2473,10 @@ check_subexp_matching_top (mctx, cur_nodes, str_idx)
accepting the current input byte. */
static re_dfastate_t *
-transit_state_sb (err, mctx, state)
- reg_errcode_t *err;
- re_match_context_t *mctx;
- re_dfastate_t *state;
+transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
re_node_set next_nodes;
re_dfastate_t *next_state;
int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
@@ -2463,11 +2512,10 @@ transit_state_sb (err, mctx, state)
#ifdef RE_ENABLE_I18N
static reg_errcode_t
-transit_state_mb (mctx, pstate)
- re_match_context_t *mctx;
- re_dfastate_t *pstate;
+internal_function
+transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int i;
@@ -2480,7 +2528,7 @@ transit_state_mb (mctx, pstate)
re_dfastate_t *dest_state;
if (!dfa->nodes[cur_node_idx].accept_mb)
- continue;
+ continue;
if (dfa->nodes[cur_node_idx].constraint)
{
@@ -2520,7 +2568,8 @@ transit_state_mb (mctx, pstate)
if (BE (err != REG_NOERROR, 0))
return err;
}
- context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
+ context = re_string_context_at (&mctx->input, dest_idx - 1,
+ mctx->eflags);
mctx->state_log[dest_idx]
= re_acquire_state_context (&err, dfa, &dest_nodes, context);
if (dest_state != NULL)
@@ -2533,11 +2582,10 @@ transit_state_mb (mctx, pstate)
#endif /* RE_ENABLE_I18N */
static reg_errcode_t
-transit_state_bkref (mctx, nodes)
- re_match_context_t *mctx;
- const re_node_set *nodes;
+internal_function
+transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int i;
int cur_str_idx = re_string_cur_idx (&mctx->input);
@@ -2648,20 +2696,20 @@ transit_state_bkref (mctx, nodes)
delay these checking for prune_impossible_nodes(). */
static reg_errcode_t
-get_subexp (mctx, bkref_node, bkref_str_idx)
- re_match_context_t *mctx;
- int bkref_node, bkref_str_idx;
+internal_function __attribute_warn_unused_result__
+get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int subexp_num, sub_top_idx;
const char *buf = (const char *) re_string_get_buffer (&mctx->input);
/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
if (cache_idx != -1)
{
- const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
+ const struct re_backref_cache_entry *entry
+ = mctx->bkref_ents + cache_idx;
do
- if (entry->node == bkref_node)
+ if (entry->node == bkref_node)
return REG_NOERROR; /* We already checked it. */
while (entry++->more);
}
@@ -2706,7 +2754,8 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
buf = (const char *) re_string_get_buffer (&mctx->input);
}
if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
- break; /* We don't need to search this sub expression any more. */
+ /* We don't need to search this sub expression any more. */
+ break;
}
bkref_str_off += sl_str_diff;
sl_str += sl_str_diff;
@@ -2757,7 +2806,8 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
continue;
/* Does this state have a ')' of the sub expression? */
nodes = &mctx->state_log[sl_str]->nodes;
- cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
+ cls_node = find_subexp_node (dfa, nodes, subexp_num,
+ OP_CLOSE_SUBEXP);
if (cls_node == -1)
continue; /* No. */
if (sub_top->path == NULL)
@@ -2770,7 +2820,8 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
in the current context? */
err = check_arrival (mctx, sub_top->path, sub_top->node,
- sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
+ sub_top->str_idx, cls_node, sl_str,
+ OP_CLOSE_SUBEXP);
if (err == REG_NOMATCH)
continue;
if (BE (err != REG_NOERROR, 0))
@@ -2794,17 +2845,16 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
and SUB_LAST. */
static reg_errcode_t
-get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
- re_match_context_t *mctx;
- const re_sub_match_top_t *sub_top;
- re_sub_match_last_t *sub_last;
- int bkref_node, bkref_str;
+internal_function
+get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
{
reg_errcode_t err;
int to_idx;
/* Can the subexpression arrive the back reference? */
err = check_arrival (mctx, &sub_last->path, sub_last->node,
- sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
+ sub_last->str_idx, bkref_node, bkref_str,
+ OP_OPEN_SUBEXP);
if (err != REG_NOERROR)
return err;
err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
@@ -2824,10 +2874,9 @@ get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
E.g. RE: (a){2} */
static int
-find_subexp_node (dfa, nodes, subexp_idx, type)
- const re_dfa_t *dfa;
- const re_node_set *nodes;
- int subexp_idx, type;
+internal_function
+find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ int subexp_idx, int type)
{
int cls_idx;
for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
@@ -2847,14 +2896,12 @@ find_subexp_node (dfa, nodes, subexp_idx, type)
Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
static reg_errcode_t
-check_arrival (mctx, path, top_node, top_str, last_node, last_str,
- type)
- re_match_context_t *mctx;
- state_array_t *path;
- int top_node, top_str, last_node, last_str, type;
+internal_function __attribute_warn_unused_result__
+check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
+ int top_str, int last_node, int last_str, int type)
{
- re_dfa_t *const dfa = mctx->dfa;
- reg_errcode_t err;
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err = REG_NOERROR;
int subexp_num, backup_cur_idx, str_idx, null_cnt;
re_dfastate_t *cur_state = NULL;
re_node_set *cur_nodes, next_nodes;
@@ -2869,7 +2916,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str,
int old_alloc = path->alloc;
path->alloc += last_str + mctx->max_mb_elem_len + 1;
new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
- if (new_array == NULL)
+ if (BE (new_array == NULL, 0))
{
path->alloc = old_alloc;
return REG_ESPACE;
@@ -2879,7 +2926,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str,
sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
}
- str_idx = path->next_idx == 0 ? top_str : path->next_idx;
+ str_idx = path->next_idx ?: top_str;
/* Temporary modify MCTX. */
backup_state_log = mctx->state_log;
@@ -2907,7 +2954,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str,
if (cur_state && cur_state->has_backref)
{
err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
- if (BE ( err != REG_NOERROR, 0))
+ if (BE (err != REG_NOERROR, 0))
return err;
}
else
@@ -2919,7 +2966,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str,
{
err = expand_bkref_cache (mctx, &next_nodes, str_idx,
subexp_num, type);
- if (BE ( err != REG_NOERROR, 0))
+ if (BE (err != REG_NOERROR, 0))
{
re_node_set_free (&next_nodes);
return err;
@@ -2950,7 +2997,8 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str,
if (cur_state)
{
err = check_arrival_add_next_nodes (mctx, str_idx,
- &cur_state->non_eps_nodes, &next_nodes);
+ &cur_state->non_eps_nodes,
+ &next_nodes);
if (BE (err != REG_NOERROR, 0))
{
re_node_set_free (&next_nodes);
@@ -2968,7 +3016,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str,
}
err = expand_bkref_cache (mctx, &next_nodes, str_idx,
subexp_num, type);
- if (BE ( err != REG_NOERROR, 0))
+ if (BE (err != REG_NOERROR, 0))
{
re_node_set_free (&next_nodes);
return err;
@@ -3009,15 +3057,14 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str,
Can't we unify them? */
static reg_errcode_t
-check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
- re_match_context_t *mctx;
- int str_idx;
- re_node_set *cur_nodes, *next_nodes;
+internal_function __attribute_warn_unused_result__
+check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
+ re_node_set *cur_nodes, re_node_set *next_nodes)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int result;
int cur_idx;
- reg_errcode_t err;
+ reg_errcode_t err = REG_NOERROR;
re_node_set union_set;
re_node_set_init_empty (&union_set);
for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
@@ -3089,10 +3136,9 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
*/
static reg_errcode_t
-check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
- re_dfa_t *dfa;
- re_node_set *cur_nodes;
- int ex_subexp, type;
+internal_function
+check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
+ int ex_subexp, int type)
{
reg_errcode_t err;
int idx, outside_node;
@@ -3109,7 +3155,7 @@ check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
for (idx = 0; idx < cur_nodes->nelem; ++idx)
{
int cur_node = cur_nodes->elems[idx];
- re_node_set *eclosure = dfa->eclosures + cur_node;
+ const re_node_set *eclosure = dfa->eclosures + cur_node;
outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
if (outside_node == -1)
{
@@ -3143,10 +3189,9 @@ check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
problematic append it to DST_NODES. */
static reg_errcode_t
-check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
- re_dfa_t *dfa;
- int target, ex_subexp, type;
- re_node_set *dst_nodes;
+internal_function __attribute_warn_unused_result__
+check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
+ int target, int ex_subexp, int type)
{
int cur_node;
for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
@@ -3188,13 +3233,11 @@ check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
in MCTX->BKREF_ENTS. */
static reg_errcode_t
-expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
- type)
- re_match_context_t *mctx;
- int cur_str, subexp_num, type;
- re_node_set *cur_nodes;
+internal_function __attribute_warn_unused_result__
+expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
+ int cur_str, int subexp_num, int type)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
struct re_backref_cache_entry *ent;
@@ -3279,39 +3322,42 @@ expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
Return 1 if succeeded, otherwise return NULL. */
static int
-build_trtable (dfa, state)
- re_dfa_t *dfa;
- re_dfastate_t *state;
+internal_function
+build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
{
reg_errcode_t err;
int i, j, ch, need_word_trtable = 0;
- unsigned int elem, mask;
- int dests_node_malloced = 0, dest_states_malloced = 0;
+ bitset_word_t elem, mask;
+ bool dests_node_malloced = false;
+ bool dest_states_malloced = false;
int ndests; /* Number of the destination states from `state'. */
re_dfastate_t **trtable;
re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
re_node_set follows, *dests_node;
- bitset *dests_ch;
- bitset acceptable;
+ bitset_t *dests_ch;
+ bitset_t acceptable;
+
+ struct dests_alloc
+ {
+ re_node_set dests_node[SBC_MAX];
+ bitset_t dests_ch[SBC_MAX];
+ } *dests_alloc;
/* We build DFA states which corresponds to the destination nodes
from `state'. `dests_node[i]' represents the nodes which i-th
destination state contains, and `dests_ch[i]' represents the
characters which i-th destination state accepts. */
-#ifdef _LIBC
- if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
- dests_node = (re_node_set *)
- alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+ if (__libc_use_alloca (sizeof (struct dests_alloc)))
+ dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
else
-#endif
{
- dests_node = (re_node_set *)
- malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
- if (BE (dests_node == NULL, 0))
+ dests_alloc = re_malloc (struct dests_alloc, 1);
+ if (BE (dests_alloc == NULL, 0))
return 0;
- dests_node_malloced = 1;
+ dests_node_malloced = true;
}
- dests_ch = (bitset *) (dests_node + SBC_MAX);
+ dests_node = dests_alloc->dests_node;
+ dests_ch = dests_alloc->dests_ch;
/* Initialize transiton table. */
state->word_trtable = state->trtable = NULL;
@@ -3322,12 +3368,14 @@ build_trtable (dfa, state)
if (BE (ndests <= 0, 0))
{
if (dests_node_malloced)
- free (dests_node);
+ free (dests_alloc);
/* Return 0 in case of an error, 1 otherwise. */
if (ndests == 0)
{
state->trtable = (re_dfastate_t **)
calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ if (BE (state->trtable == NULL, 0))
+ return 0;
return 1;
}
return 0;
@@ -3337,13 +3385,18 @@ build_trtable (dfa, state)
if (BE (err != REG_NOERROR, 0))
goto out_free;
-#ifdef _LIBC
- if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
+ /* Avoid arithmetic overflow in size calculation. */
+ if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
+ / (3 * sizeof (re_dfastate_t *)))
+ < ndests),
+ 0))
+ goto out_free;
+
+ if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+ ndests * 3 * sizeof (re_dfastate_t *)))
dest_states = (re_dfastate_t **)
alloca (ndests * 3 * sizeof (re_dfastate_t *));
else
-#endif
{
dest_states = (re_dfastate_t **)
malloc (ndests * 3 * sizeof (re_dfastate_t *));
@@ -3356,10 +3409,10 @@ out_free:
for (i = 0; i < ndests; ++i)
re_node_set_free (dests_node + i);
if (dests_node_malloced)
- free (dests_node);
+ free (dests_alloc);
return 0;
}
- dest_states_malloced = 1;
+ dest_states_malloced = true;
}
dest_states_word = dest_states + ndests;
dest_states_nl = dest_states_word + ndests;
@@ -3421,8 +3474,8 @@ out_free:
goto out_free;
/* For all characters ch...: */
- for (i = 0; i < BITSET_UINTS; ++i)
- for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
elem;
mask <<= 1, elem >>= 1, ++ch)
if (BE (elem & 1, 0))
@@ -3452,8 +3505,8 @@ out_free:
goto out_free;
/* For all characters ch...: */
- for (i = 0; i < BITSET_UINTS; ++i)
- for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
elem;
mask <<= 1, elem >>= 1, ++ch)
if (BE (elem & 1, 0))
@@ -3494,7 +3547,7 @@ out_free:
re_node_set_free (dests_node + i);
if (dests_node_malloced)
- free (dests_node);
+ free (dests_alloc);
return 1;
}
@@ -3505,17 +3558,15 @@ out_free:
to DEST_CH[i]. This function return the number of destinations. */
static int
-group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
- re_dfa_t *dfa;
- const re_dfastate_t *state;
- re_node_set *dests_node;
- bitset *dests_ch;
+internal_function
+group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
+ re_node_set *dests_node, bitset_t *dests_ch)
{
reg_errcode_t err;
int result;
int i, j, k;
int ndests; /* Number of the destinations from `state'. */
- bitset accepts; /* Characters a node can accept. */
+ bitset_t accepts; /* Characters a node can accept. */
const re_node_set *cur_nodes = &state->nodes;
bitset_empty (accepts);
ndests = 0;
@@ -3549,13 +3600,13 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
}
#ifdef RE_ENABLE_I18N
else if (type == OP_UTF8_PERIOD)
- {
- memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
+ {
+ memset (accepts, '\xff', sizeof (bitset_t) / 2);
if (!(dfa->syntax & RE_DOT_NEWLINE))
bitset_clear (accepts, '\n');
if (dfa->syntax & RE_DOT_NOT_NULL)
bitset_clear (accepts, '\0');
- }
+ }
#endif
else
continue;
@@ -3566,7 +3617,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
{
if (constraint & NEXT_NEWLINE_CONSTRAINT)
{
- int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+ bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
bitset_empty (accepts);
if (accepts_newline)
bitset_set (accepts, NEWLINE_CHAR);
@@ -3581,7 +3632,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
if (constraint & NEXT_WORD_CONSTRAINT)
{
- unsigned int any_set = 0;
+ bitset_word_t any_set = 0;
if (type == CHARACTER && !node->word_char)
{
bitset_empty (accepts);
@@ -3589,18 +3640,18 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
}
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
else
#endif
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= dfa->word_char[j]);
if (!any_set)
continue;
}
if (constraint & NEXT_NOTWORD_CONSTRAINT)
{
- unsigned int any_set = 0;
+ bitset_word_t any_set = 0;
if (type == CHARACTER && node->word_char)
{
bitset_empty (accepts);
@@ -3608,11 +3659,11 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
}
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
else
#endif
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= ~dfa->word_char[j]);
if (!any_set)
continue;
@@ -3623,10 +3674,10 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
state. Above, we make sure that accepts is not empty. */
for (j = 0; j < ndests; ++j)
{
- bitset intersec; /* Intersection sets, see below. */
- bitset remains;
+ bitset_t intersec; /* Intersection sets, see below. */
+ bitset_t remains;
/* Flags, see below. */
- int has_intersec, not_subset, not_consumed;
+ bitset_word_t has_intersec, not_subset, not_consumed;
/* Optimization, skip if this state doesn't accept the character. */
if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
@@ -3634,7 +3685,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
/* Enumerate the intersection set of this state and `accepts'. */
has_intersec = 0;
- for (k = 0; k < BITSET_UINTS; ++k)
+ for (k = 0; k < BITSET_WORDS; ++k)
has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
/* And skip if the intersection set is empty. */
if (!has_intersec)
@@ -3642,7 +3693,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
/* Then check if this state is a subset of `accepts'. */
not_subset = not_consumed = 0;
- for (k = 0; k < BITSET_UINTS; ++k)
+ for (k = 0; k < BITSET_WORDS; ++k)
{
not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
@@ -3697,10 +3748,9 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
can only accept one byte. */
static int
-check_node_accept_bytes (dfa, node_idx, input, str_idx)
- re_dfa_t *dfa;
- int node_idx, str_idx;
- const re_string_t *input;
+internal_function
+check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+ const re_string_t *input, int str_idx)
{
const re_token_t *node = dfa->nodes + node_idx;
int char_len, elem_len;
@@ -3761,7 +3811,7 @@ check_node_accept_bytes (dfa, node_idx, input, str_idx)
if (node->type == OP_PERIOD)
{
if (char_len <= 1)
- return 0;
+ return 0;
/* FIXME: I don't think this if is needed, as both '\n'
and '\0' are char_len == 1. */
/* '.' accepts any one character except the following two cases. */
@@ -3816,7 +3866,6 @@ check_node_accept_bytes (dfa, node_idx, input, str_idx)
const int32_t *table, *indirect;
const unsigned char *weights, *extra;
const char *collseqwc;
- int32_t idx;
/* This #include defines a local function! */
# include <locale/weight.h>
@@ -3874,15 +3923,20 @@ check_node_accept_bytes (dfa, node_idx, input, str_idx)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
- idx = findidx (&cp);
+ int32_t idx = findidx (&cp, elem_len);
if (idx > 0)
for (i = 0; i < cset->nequiv_classes; ++i)
{
int32_t equiv_class_idx = cset->equiv_classes[i];
- size_t weight_len = weights[idx];
- if (weight_len == weights[equiv_class_idx])
+ size_t weight_len = weights[idx & 0xffffff];
+ if (weight_len == weights[equiv_class_idx & 0xffffff]
+ && (idx >> 24) == (equiv_class_idx >> 24))
{
int cnt = 0;
+
+ idx &= 0xffffff;
+ equiv_class_idx &= 0xffffff;
+
while (cnt <= weight_len
&& (weights[equiv_class_idx + 1 + cnt]
== weights[idx + 1 + cnt]))
@@ -3934,9 +3988,8 @@ check_node_accept_bytes (dfa, node_idx, input, str_idx)
# ifdef _LIBC
static unsigned int
-find_collation_sequence_value (mbs, mbs_len)
- const unsigned char *mbs;
- size_t mbs_len;
+internal_function
+find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
{
uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules == 0)
@@ -3981,7 +4034,7 @@ find_collation_sequence_value (mbs, mbs_len)
/* Skip the collation sequence value. */
idx += sizeof (uint32_t);
/* Skip the wide char sequence of the collating element. */
- idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+ idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
/* If we found the entry, return the sequence value. */
if (found)
return *(uint32_t *) (extra + idx);
@@ -3998,10 +4051,9 @@ find_collation_sequence_value (mbs, mbs_len)
byte of the INPUT. */
static int
-check_node_accept (mctx, node, idx)
- const re_match_context_t *mctx;
- const re_token_t *node;
- int idx;
+internal_function
+check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
+ int idx)
{
unsigned char ch;
ch = re_string_byte_at (&mctx->input, idx);
@@ -4009,18 +4061,18 @@ check_node_accept (mctx, node, idx)
{
case CHARACTER:
if (node->opr.c != ch)
- return 0;
+ return 0;
break;
case SIMPLE_BRACKET:
if (!bitset_contain (node->opr.sbcset, ch))
- return 0;
+ return 0;
break;
#ifdef RE_ENABLE_I18N
case OP_UTF8_PERIOD:
if (ch >= 0x80)
- return 0;
+ return 0;
/* FALLTHROUGH */
#endif
case OP_PERIOD:
@@ -4049,14 +4101,18 @@ check_node_accept (mctx, node, idx)
/* Extend the buffers, if the buffers have run out. */
static reg_errcode_t
-extend_buffers (mctx)
- re_match_context_t *mctx;
+internal_function __attribute_warn_unused_result__
+extend_buffers (re_match_context_t *mctx)
{
reg_errcode_t ret;
re_string_t *pstr = &mctx->input;
+ /* Avoid overflow. */
+ if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
+ return REG_ESPACE;
+
/* Double the lengthes of the buffers. */
- ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2));
if (BE (ret != REG_NOERROR, 0))
return ret;
@@ -4108,9 +4164,8 @@ extend_buffers (mctx)
/* Initialize MCTX. */
static reg_errcode_t
-match_ctx_init (mctx, eflags, n)
- re_match_context_t *mctx;
- int eflags, n;
+internal_function __attribute_warn_unused_result__
+match_ctx_init (re_match_context_t *mctx, int eflags, int n)
{
mctx->eflags = eflags;
mctx->match_last = -1;
@@ -4137,8 +4192,8 @@ match_ctx_init (mctx, eflags, n)
of the input, or changes the input string. */
static void
-match_ctx_clean (mctx)
- re_match_context_t *mctx;
+internal_function
+match_ctx_clean (re_match_context_t *mctx)
{
int st_idx;
for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
@@ -4167,8 +4222,8 @@ match_ctx_clean (mctx)
/* Free all the memory associated with MCTX. */
static void
-match_ctx_free (mctx)
- re_match_context_t *mctx;
+internal_function
+match_ctx_free (re_match_context_t *mctx)
{
/* First, free all the memory associated with MCTX->SUB_TOPS. */
match_ctx_clean (mctx);
@@ -4182,9 +4237,9 @@ match_ctx_free (mctx)
*/
static reg_errcode_t
-match_ctx_add_entry (mctx, node, str_idx, from, to)
- re_match_context_t *mctx;
- int node, str_idx, from, to;
+internal_function __attribute_warn_unused_result__
+match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
+ int to)
{
if (mctx->nbkref_ents >= mctx->abkref_ents)
{
@@ -4231,9 +4286,8 @@ match_ctx_add_entry (mctx, node, str_idx, from, to)
found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
static int
-search_cur_bkref_entry (mctx, str_idx)
- re_match_context_t *mctx;
- int str_idx;
+internal_function
+search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
{
int left, right, mid, last;
last = right = mctx->nbkref_ents;
@@ -4255,9 +4309,8 @@ search_cur_bkref_entry (mctx, str_idx)
at STR_IDX. */
static reg_errcode_t
-match_ctx_add_subtop (mctx, node, str_idx)
- re_match_context_t *mctx;
- int node, str_idx;
+internal_function __attribute_warn_unused_result__
+match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
{
#ifdef DEBUG
assert (mctx->sub_tops != NULL);
@@ -4286,9 +4339,8 @@ match_ctx_add_subtop (mctx, node, str_idx)
at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
static re_sub_match_last_t *
-match_ctx_add_sublast (subtop, node, str_idx)
- re_sub_match_top_t *subtop;
- int node, str_idx;
+internal_function
+match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
{
re_sub_match_last_t *new_entry;
if (BE (subtop->nlasts == subtop->alasts, 0))
@@ -4314,10 +4366,9 @@ match_ctx_add_sublast (subtop, node, str_idx)
}
static void
-sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx)
- re_sift_context_t *sctx;
- re_dfastate_t **sifted_sts, **limited_sts;
- int last_node, last_str_idx;
+internal_function
+sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node, int last_str_idx)
{
sctx->sifted_states = sifted_sts;
sctx->limited_states = limited_sts;