summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/.deps/regcomp.Plo1
-rw-r--r--lib/.deps/regerror.Plo1
-rw-r--r--lib/.deps/regexec.Plo1
-rw-r--r--lib/.deps/tre-ast.Plo65
-rw-r--r--lib/.deps/tre-compile.Plo78
-rw-r--r--lib/.deps/tre-match-approx.Plo1
-rw-r--r--lib/.deps/tre-match-backtrack.Plo70
-rw-r--r--lib/.deps/tre-match-parallel.Plo68
-rw-r--r--lib/.deps/tre-mem.Plo65
-rw-r--r--lib/.deps/tre-parse.Plo1
-rw-r--r--lib/.deps/tre-stack.Plo1
-rw-r--r--lib/Makefile16
-rw-r--r--lib/Makefile.am34
-rw-r--r--lib/Makefile.in595
-rw-r--r--lib/README69
-rw-r--r--lib/regcomp.c129
-rw-r--r--lib/regerror.c83
-rw-r--r--lib/regex.h38
-rw-r--r--lib/regexec.c348
-rw-r--r--lib/stamp-h21
-rw-r--r--lib/tre-ast.c226
-rw-r--r--lib/tre-ast.h128
-rw-r--r--lib/tre-compile.c2296
-rw-r--r--lib/tre-compile.h27
-rw-r--r--lib/tre-config.h44
-rw-r--r--lib/tre-config.h.in43
-rw-r--r--lib/tre-internal.h287
-rw-r--r--lib/tre-match-approx.c812
-rw-r--r--lib/tre-match-backtrack.c666
-rw-r--r--lib/tre-match-parallel.c501
-rw-r--r--lib/tre-match-utils.h215
-rw-r--r--lib/tre-mem.c155
-rw-r--r--lib/tre-mem.h66
-rw-r--r--lib/tre-parse.c1739
-rw-r--r--lib/tre-parse.h50
-rw-r--r--lib/tre-stack.c125
-rw-r--r--lib/tre-stack.h76
-rw-r--r--lib/tre.h276
-rw-r--r--lib/xmalloc.c351
-rw-r--r--lib/xmalloc.h77
40 files changed, 9825 insertions, 0 deletions
diff --git a/lib/.deps/regcomp.Plo b/lib/.deps/regcomp.Plo
new file mode 100644
index 0000000000000..9ce06a81ea45b
--- /dev/null
+++ b/lib/.deps/regcomp.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/lib/.deps/regerror.Plo b/lib/.deps/regerror.Plo
new file mode 100644
index 0000000000000..9ce06a81ea45b
--- /dev/null
+++ b/lib/.deps/regerror.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/lib/.deps/regexec.Plo b/lib/.deps/regexec.Plo
new file mode 100644
index 0000000000000..9ce06a81ea45b
--- /dev/null
+++ b/lib/.deps/regexec.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/lib/.deps/tre-ast.Plo b/lib/.deps/tre-ast.Plo
new file mode 100644
index 0000000000000..7f5464701b426
--- /dev/null
+++ b/lib/.deps/tre-ast.Plo
@@ -0,0 +1,65 @@
+tre-ast.lo .libs/tre-ast.o: tre-ast.c ../config.h /usr/include/assert.h \
+ /usr/include/sys/cdefs.h tre-ast.h tre-mem.h /usr/include/stdlib.h \
+ /usr/include/sys/_null.h /usr/include/sys/_types.h \
+ /usr/include/machine/_types.h tre-internal.h /usr/include/wchar.h \
+ /usr/include/machine/_limits.h /usr/include/_ctype.h \
+ /usr/include/runetype.h /usr/include/wctype.h /usr/include/ctype.h \
+ tre.h tre-config.h /usr/include/sys/types.h \
+ /usr/include/machine/endian.h /usr/include/sys/_pthreadtypes.h \
+ /usr/include/sys/select.h /usr/include/sys/_sigset.h \
+ /usr/include/sys/_timeval.h /usr/include/sys/timespec.h \
+ /usr/include/sys/_timespec.h tre-compile.h
+
+../config.h:
+
+/usr/include/assert.h:
+
+/usr/include/sys/cdefs.h:
+
+tre-ast.h:
+
+tre-mem.h:
+
+/usr/include/stdlib.h:
+
+/usr/include/sys/_null.h:
+
+/usr/include/sys/_types.h:
+
+/usr/include/machine/_types.h:
+
+tre-internal.h:
+
+/usr/include/wchar.h:
+
+/usr/include/machine/_limits.h:
+
+/usr/include/_ctype.h:
+
+/usr/include/runetype.h:
+
+/usr/include/wctype.h:
+
+/usr/include/ctype.h:
+
+tre.h:
+
+tre-config.h:
+
+/usr/include/sys/types.h:
+
+/usr/include/machine/endian.h:
+
+/usr/include/sys/_pthreadtypes.h:
+
+/usr/include/sys/select.h:
+
+/usr/include/sys/_sigset.h:
+
+/usr/include/sys/_timeval.h:
+
+/usr/include/sys/timespec.h:
+
+/usr/include/sys/_timespec.h:
+
+tre-compile.h:
diff --git a/lib/.deps/tre-compile.Plo b/lib/.deps/tre-compile.Plo
new file mode 100644
index 0000000000000..977d61b812e0c
--- /dev/null
+++ b/lib/.deps/tre-compile.Plo
@@ -0,0 +1,78 @@
+tre-compile.lo .libs/tre-compile.o: tre-compile.c ../config.h \
+ /usr/include/stdio.h /usr/include/sys/cdefs.h /usr/include/sys/_null.h \
+ /usr/include/sys/_types.h /usr/include/machine/_types.h \
+ /usr/include/assert.h /usr/include/string.h /usr/include/strings.h \
+ tre-internal.h /usr/include/wchar.h /usr/include/machine/_limits.h \
+ /usr/include/_ctype.h /usr/include/runetype.h /usr/include/wctype.h \
+ /usr/include/ctype.h tre.h tre-config.h /usr/include/sys/types.h \
+ /usr/include/machine/endian.h /usr/include/sys/_pthreadtypes.h \
+ /usr/include/sys/select.h /usr/include/sys/_sigset.h \
+ /usr/include/sys/_timeval.h /usr/include/sys/timespec.h \
+ /usr/include/sys/_timespec.h tre-mem.h /usr/include/stdlib.h \
+ tre-stack.h tre-ast.h tre-compile.h tre-parse.h xmalloc.h
+
+../config.h:
+
+/usr/include/stdio.h:
+
+/usr/include/sys/cdefs.h:
+
+/usr/include/sys/_null.h:
+
+/usr/include/sys/_types.h:
+
+/usr/include/machine/_types.h:
+
+/usr/include/assert.h:
+
+/usr/include/string.h:
+
+/usr/include/strings.h:
+
+tre-internal.h:
+
+/usr/include/wchar.h:
+
+/usr/include/machine/_limits.h:
+
+/usr/include/_ctype.h:
+
+/usr/include/runetype.h:
+
+/usr/include/wctype.h:
+
+/usr/include/ctype.h:
+
+tre.h:
+
+tre-config.h:
+
+/usr/include/sys/types.h:
+
+/usr/include/machine/endian.h:
+
+/usr/include/sys/_pthreadtypes.h:
+
+/usr/include/sys/select.h:
+
+/usr/include/sys/_sigset.h:
+
+/usr/include/sys/_timeval.h:
+
+/usr/include/sys/timespec.h:
+
+/usr/include/sys/_timespec.h:
+
+tre-mem.h:
+
+/usr/include/stdlib.h:
+
+tre-stack.h:
+
+tre-ast.h:
+
+tre-compile.h:
+
+tre-parse.h:
+
+xmalloc.h:
diff --git a/lib/.deps/tre-match-approx.Plo b/lib/.deps/tre-match-approx.Plo
new file mode 100644
index 0000000000000..9ce06a81ea45b
--- /dev/null
+++ b/lib/.deps/tre-match-approx.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/lib/.deps/tre-match-backtrack.Plo b/lib/.deps/tre-match-backtrack.Plo
new file mode 100644
index 0000000000000..add77982701c8
--- /dev/null
+++ b/lib/.deps/tre-match-backtrack.Plo
@@ -0,0 +1,70 @@
+tre-match-backtrack.lo .libs/tre-match-backtrack.o: \
+ tre-match-backtrack.c ../config.h /usr/include/assert.h \
+ /usr/include/sys/cdefs.h /usr/include/stdlib.h /usr/include/sys/_null.h \
+ /usr/include/sys/_types.h /usr/include/machine/_types.h \
+ /usr/include/string.h /usr/include/strings.h /usr/include/wchar.h \
+ /usr/include/machine/_limits.h /usr/include/_ctype.h \
+ /usr/include/runetype.h /usr/include/wctype.h tre-internal.h \
+ /usr/include/ctype.h tre.h tre-config.h /usr/include/sys/types.h \
+ /usr/include/machine/endian.h /usr/include/sys/_pthreadtypes.h \
+ /usr/include/sys/select.h /usr/include/sys/_sigset.h \
+ /usr/include/sys/_timeval.h /usr/include/sys/timespec.h \
+ /usr/include/sys/_timespec.h tre-mem.h tre-match-utils.h xmalloc.h
+
+../config.h:
+
+/usr/include/assert.h:
+
+/usr/include/sys/cdefs.h:
+
+/usr/include/stdlib.h:
+
+/usr/include/sys/_null.h:
+
+/usr/include/sys/_types.h:
+
+/usr/include/machine/_types.h:
+
+/usr/include/string.h:
+
+/usr/include/strings.h:
+
+/usr/include/wchar.h:
+
+/usr/include/machine/_limits.h:
+
+/usr/include/_ctype.h:
+
+/usr/include/runetype.h:
+
+/usr/include/wctype.h:
+
+tre-internal.h:
+
+/usr/include/ctype.h:
+
+tre.h:
+
+tre-config.h:
+
+/usr/include/sys/types.h:
+
+/usr/include/machine/endian.h:
+
+/usr/include/sys/_pthreadtypes.h:
+
+/usr/include/sys/select.h:
+
+/usr/include/sys/_sigset.h:
+
+/usr/include/sys/_timeval.h:
+
+/usr/include/sys/timespec.h:
+
+/usr/include/sys/_timespec.h:
+
+tre-mem.h:
+
+tre-match-utils.h:
+
+xmalloc.h:
diff --git a/lib/.deps/tre-match-parallel.Plo b/lib/.deps/tre-match-parallel.Plo
new file mode 100644
index 0000000000000..ed9d584edbcf7
--- /dev/null
+++ b/lib/.deps/tre-match-parallel.Plo
@@ -0,0 +1,68 @@
+tre-match-parallel.lo .libs/tre-match-parallel.o: tre-match-parallel.c \
+ ../config.h /usr/include/assert.h /usr/include/sys/cdefs.h \
+ /usr/include/stdlib.h /usr/include/sys/_null.h \
+ /usr/include/sys/_types.h /usr/include/machine/_types.h \
+ /usr/include/string.h /usr/include/strings.h /usr/include/wchar.h \
+ /usr/include/machine/_limits.h /usr/include/_ctype.h \
+ /usr/include/runetype.h /usr/include/wctype.h tre-internal.h \
+ /usr/include/ctype.h tre.h tre-config.h /usr/include/sys/types.h \
+ /usr/include/machine/endian.h /usr/include/sys/_pthreadtypes.h \
+ /usr/include/sys/select.h /usr/include/sys/_sigset.h \
+ /usr/include/sys/_timeval.h /usr/include/sys/timespec.h \
+ /usr/include/sys/_timespec.h tre-match-utils.h xmalloc.h
+
+../config.h:
+
+/usr/include/assert.h:
+
+/usr/include/sys/cdefs.h:
+
+/usr/include/stdlib.h:
+
+/usr/include/sys/_null.h:
+
+/usr/include/sys/_types.h:
+
+/usr/include/machine/_types.h:
+
+/usr/include/string.h:
+
+/usr/include/strings.h:
+
+/usr/include/wchar.h:
+
+/usr/include/machine/_limits.h:
+
+/usr/include/_ctype.h:
+
+/usr/include/runetype.h:
+
+/usr/include/wctype.h:
+
+tre-internal.h:
+
+/usr/include/ctype.h:
+
+tre.h:
+
+tre-config.h:
+
+/usr/include/sys/types.h:
+
+/usr/include/machine/endian.h:
+
+/usr/include/sys/_pthreadtypes.h:
+
+/usr/include/sys/select.h:
+
+/usr/include/sys/_sigset.h:
+
+/usr/include/sys/_timeval.h:
+
+/usr/include/sys/timespec.h:
+
+/usr/include/sys/_timespec.h:
+
+tre-match-utils.h:
+
+xmalloc.h:
diff --git a/lib/.deps/tre-mem.Plo b/lib/.deps/tre-mem.Plo
new file mode 100644
index 0000000000000..f8616fc6e8b9e
--- /dev/null
+++ b/lib/.deps/tre-mem.Plo
@@ -0,0 +1,65 @@
+tre-mem.lo .libs/tre-mem.o: tre-mem.c ../config.h /usr/include/stdlib.h \
+ /usr/include/sys/cdefs.h /usr/include/sys/_null.h \
+ /usr/include/sys/_types.h /usr/include/machine/_types.h \
+ /usr/include/string.h /usr/include/strings.h tre-internal.h \
+ /usr/include/wchar.h /usr/include/machine/_limits.h \
+ /usr/include/_ctype.h /usr/include/runetype.h /usr/include/wctype.h \
+ /usr/include/ctype.h tre.h tre-config.h /usr/include/sys/types.h \
+ /usr/include/machine/endian.h /usr/include/sys/_pthreadtypes.h \
+ /usr/include/sys/select.h /usr/include/sys/_sigset.h \
+ /usr/include/sys/_timeval.h /usr/include/sys/timespec.h \
+ /usr/include/sys/_timespec.h tre-mem.h xmalloc.h
+
+../config.h:
+
+/usr/include/stdlib.h:
+
+/usr/include/sys/cdefs.h:
+
+/usr/include/sys/_null.h:
+
+/usr/include/sys/_types.h:
+
+/usr/include/machine/_types.h:
+
+/usr/include/string.h:
+
+/usr/include/strings.h:
+
+tre-internal.h:
+
+/usr/include/wchar.h:
+
+/usr/include/machine/_limits.h:
+
+/usr/include/_ctype.h:
+
+/usr/include/runetype.h:
+
+/usr/include/wctype.h:
+
+/usr/include/ctype.h:
+
+tre.h:
+
+tre-config.h:
+
+/usr/include/sys/types.h:
+
+/usr/include/machine/endian.h:
+
+/usr/include/sys/_pthreadtypes.h:
+
+/usr/include/sys/select.h:
+
+/usr/include/sys/_sigset.h:
+
+/usr/include/sys/_timeval.h:
+
+/usr/include/sys/timespec.h:
+
+/usr/include/sys/_timespec.h:
+
+tre-mem.h:
+
+xmalloc.h:
diff --git a/lib/.deps/tre-parse.Plo b/lib/.deps/tre-parse.Plo
new file mode 100644
index 0000000000000..9ce06a81ea45b
--- /dev/null
+++ b/lib/.deps/tre-parse.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/lib/.deps/tre-stack.Plo b/lib/.deps/tre-stack.Plo
new file mode 100644
index 0000000000000..9ce06a81ea45b
--- /dev/null
+++ b/lib/.deps/tre-stack.Plo
@@ -0,0 +1 @@
+# dummy
diff --git a/lib/Makefile b/lib/Makefile
new file mode 100644
index 0000000000000..b5244fee998f7
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,16 @@
+#
+# $FreeBSD$
+#
+
+LIB= regex
+SHLIB_MAJOR= 1
+NO_MAN= yes
+
+CFLAGS+= -DHAVE_MBRTOWC -DHAVE_MBSTATE_T -DHAVE_WCTYPE_H \
+ -DTRE_REGEX_T_FIELD=value
+
+SRCS= regcomp.c regerror.c regexec.c tre-ast.c tre-compile.c \
+ tre-match-approx.c tre-match-backtrack.c tre-match-parallel.c \
+ tre-mem.c tre-parse.c tre-stack.c xmalloc.c
+
+.include <bsd.lib.mk>
diff --git a/lib/Makefile.am b/lib/Makefile.am
new file mode 100644
index 0000000000000..2c5b4dfd06b8a
--- /dev/null
+++ b/lib/Makefile.am
@@ -0,0 +1,34 @@
+## Process this file with automake to produce Makefile.in
+
+lib_LTLIBRARIES = libtre.la
+libtre_la_LDFLAGS = -no-undefined -version-info 5:0:0 $(LDFLAGS)
+libtre_la_LIBADD = $(LTLIBINTL)
+
+noinst_HEADERS = \
+ tre-ast.h \
+ tre-compile.h \
+ tre-internal.h \
+ tre-match-utils.h \
+ tre-mem.h \
+ tre-parse.h \
+ tre-stack.h \
+ xmalloc.h
+
+libtre_la_SOURCES = \
+ tre-ast.c \
+ tre-compile.c \
+ tre-match-backtrack.c \
+ tre-match-parallel.c \
+ tre-mem.c \
+ tre-parse.c \
+ tre-stack.c \
+ regcomp.c \
+ regexec.c \
+ regerror.c
+
+if TRE_APPROX
+libtre_la_SOURCES += tre-match-approx.c
+endif TRE_APPROX
+
+dist_pkginclude_HEADERS = tre.h regex.h
+nodist_pkginclude_HEADERS = tre-config.h
diff --git a/lib/Makefile.in b/lib/Makefile.in
new file mode 100644
index 0000000000000..a4fa5b9520071
--- /dev/null
+++ b/lib/Makefile.in
@@ -0,0 +1,595 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@TRE_APPROX_TRUE@am__append_1 = tre-match-approx.c
+subdir = lib
+DIST_COMMON = README $(dist_pkginclude_HEADERS) $(noinst_HEADERS) \
+ $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+ $(srcdir)/tre-config.h.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ac_libtool_tags.m4 \
+ $(top_srcdir)/m4/ax_check_funcs_comp.m4 \
+ $(top_srcdir)/m4/ax_check_sign.m4 \
+ $(top_srcdir)/m4/ax_decl_wchar_max.m4 \
+ $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/iconv.m4 \
+ $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/lib-ld.m4 \
+ $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \
+ $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \
+ $(top_srcdir)/m4/progtest.m4 \
+ $(top_srcdir)/m4/vl_prog_cc_warnings.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(SHELL) $(top_srcdir)/utils/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h tre-config.h
+CONFIG_CLEAN_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
+am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgincludedir)" \
+ "$(DESTDIR)$(pkgincludedir)"
+libLTLIBRARIES_INSTALL = $(INSTALL)
+LTLIBRARIES = $(lib_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+libtre_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
+am__libtre_la_SOURCES_DIST = tre-ast.c tre-compile.c \
+ tre-match-backtrack.c tre-match-parallel.c tre-mem.c \
+ tre-parse.c tre-stack.c regcomp.c regexec.c regerror.c \
+ tre-match-approx.c
+@TRE_APPROX_TRUE@am__objects_1 = tre-match-approx.lo
+am_libtre_la_OBJECTS = tre-ast.lo tre-compile.lo \
+ tre-match-backtrack.lo tre-match-parallel.lo tre-mem.lo \
+ tre-parse.lo tre-stack.lo regcomp.lo regexec.lo regerror.lo \
+ $(am__objects_1)
+libtre_la_OBJECTS = $(am_libtre_la_OBJECTS)
+libtre_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libtre_la_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/utils/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libtre_la_SOURCES)
+DIST_SOURCES = $(am__libtre_la_SOURCES_DIST)
+dist_pkgincludeHEADERS_INSTALL = $(INSTALL_HEADER)
+nodist_pkgincludeHEADERS_INSTALL = $(INSTALL_HEADER)
+HEADERS = $(dist_pkginclude_HEADERS) $(nodist_pkginclude_HEADERS) \
+ $(noinst_HEADERS)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@
+GMSGFMT = @GMSGFMT@
+GMSGFMT_015 = @GMSGFMT_015@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTLLIBS = @INTLLIBS@
+INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
+LDFLAGS = @LDFLAGS@
+LIBICONV = @LIBICONV@
+LIBINTL = @LIBINTL@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+LTLIBICONV = @LTLIBICONV@
+LTLIBINTL = @LTLIBINTL@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MSGFMT = @MSGFMT@
+MSGFMT_015 = @MSGFMT_015@
+MSGMERGE = @MSGMERGE@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+POSUB = @POSUB@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TRE_VERSION = @TRE_VERSION@
+USE_NLS = @USE_NLS@
+VERSION = @VERSION@
+XGETTEXT = @XGETTEXT@
+XGETTEXT_015 = @XGETTEXT_015@
+XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+lib_LTLIBRARIES = libtre.la
+libtre_la_LDFLAGS = -no-undefined -version-info 5:0:0 $(LDFLAGS)
+libtre_la_LIBADD = $(LTLIBINTL)
+noinst_HEADERS = \
+ tre-ast.h \
+ tre-compile.h \
+ tre-internal.h \
+ tre-match-utils.h \
+ tre-mem.h \
+ tre-parse.h \
+ tre-stack.h \
+ xmalloc.h
+
+libtre_la_SOURCES = tre-ast.c tre-compile.c tre-match-backtrack.c \
+ tre-match-parallel.c tre-mem.c tre-parse.c tre-stack.c \
+ regcomp.c regexec.c regerror.c $(am__append_1)
+dist_pkginclude_HEADERS = tre.h regex.h
+nodist_pkginclude_HEADERS = tre-config.h
+all: tre-config.h
+ $(MAKE) $(AM_MAKEFLAGS) all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+ && exit 0; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign lib/Makefile'; \
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --foreign lib/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+tre-config.h: stamp-h2
+ @if test ! -f $@; then \
+ rm -f stamp-h2; \
+ $(MAKE) $(AM_MAKEFLAGS) stamp-h2; \
+ else :; fi
+
+stamp-h2: $(srcdir)/tre-config.h.in $(top_builddir)/config.status
+ @rm -f stamp-h2
+ cd $(top_builddir) && $(SHELL) ./config.status lib/tre-config.h
+$(srcdir)/tre-config.h.in: $(am__configure_deps)
+ cd $(top_srcdir) && $(AUTOHEADER)
+ rm -f stamp-h2
+ touch $@
+
+distclean-hdr:
+ -rm -f tre-config.h stamp-h2
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ f=$(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(libdir)/$$f"; \
+ else :; fi; \
+ done
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ p=$(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$p'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$p"; \
+ done
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libtre.la: $(libtre_la_OBJECTS) $(libtre_la_DEPENDENCIES)
+ $(libtre_la_LINK) -rpath $(libdir) $(libtre_la_OBJECTS) $(libtre_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regcomp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regerror.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regexec.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tre-ast.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tre-compile.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tre-match-approx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tre-match-backtrack.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tre-match-parallel.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tre-mem.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tre-parse.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tre-stack.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-dist_pkgincludeHEADERS: $(dist_pkginclude_HEADERS)
+ @$(NORMAL_INSTALL)
+ test -z "$(pkgincludedir)" || $(MKDIR_P) "$(DESTDIR)$(pkgincludedir)"
+ @list='$(dist_pkginclude_HEADERS)'; for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ f=$(am__strip_dir) \
+ echo " $(dist_pkgincludeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(pkgincludedir)/$$f'"; \
+ $(dist_pkgincludeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(pkgincludedir)/$$f"; \
+ done
+
+uninstall-dist_pkgincludeHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(dist_pkginclude_HEADERS)'; for p in $$list; do \
+ f=$(am__strip_dir) \
+ echo " rm -f '$(DESTDIR)$(pkgincludedir)/$$f'"; \
+ rm -f "$(DESTDIR)$(pkgincludedir)/$$f"; \
+ done
+install-nodist_pkgincludeHEADERS: $(nodist_pkginclude_HEADERS)
+ @$(NORMAL_INSTALL)
+ test -z "$(pkgincludedir)" || $(MKDIR_P) "$(DESTDIR)$(pkgincludedir)"
+ @list='$(nodist_pkginclude_HEADERS)'; for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ f=$(am__strip_dir) \
+ echo " $(nodist_pkgincludeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(pkgincludedir)/$$f'"; \
+ $(nodist_pkgincludeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(pkgincludedir)/$$f"; \
+ done
+
+uninstall-nodist_pkgincludeHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(nodist_pkginclude_HEADERS)'; for p in $$list; do \
+ f=$(am__strip_dir) \
+ echo " rm -f '$(DESTDIR)$(pkgincludedir)/$$f'"; \
+ rm -f "$(DESTDIR)$(pkgincludedir)/$$f"; \
+ done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) tre-config.h.in $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) tre-config.h.in $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) tre-config.h.in $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ list='$(SOURCES) $(HEADERS) tre-config.h.in $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(HEADERS) tre-config.h
+installdirs:
+ for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgincludedir)" "$(DESTDIR)$(pkgincludedir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-hdr distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am: install-dist_pkgincludeHEADERS \
+ install-nodist_pkgincludeHEADERS
+
+install-dvi: install-dvi-am
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-dist_pkgincludeHEADERS \
+ uninstall-libLTLIBRARIES uninstall-nodist_pkgincludeHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libLTLIBRARIES clean-libtool ctags distclean \
+ distclean-compile distclean-generic distclean-hdr \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dist_pkgincludeHEADERS install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am \
+ install-libLTLIBRARIES install-man \
+ install-nodist_pkgincludeHEADERS install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-dist_pkgincludeHEADERS \
+ uninstall-libLTLIBRARIES uninstall-nodist_pkgincludeHEADERS
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/lib/README b/lib/README
new file mode 100644
index 0000000000000..f4d4021035f27
--- /dev/null
+++ b/lib/README
@@ -0,0 +1,69 @@
+This code is structured roughly as follows:
+
+xmalloc.c:
+ - Wrappers for the malloc() functions, for error generation and
+ memory leak checking purposes.
+
+tre-mem.c:
+ - A simple and efficient memory allocator.
+
+tre-stack.c:
+ - Implements a simple stack data structure.
+
+tre-ast.c:
+ - Abstract syntax tree (AST) definitions.
+
+tre-parse.c:
+ - Regexp parser. Parses a POSIX regexp (with TRE extensions) into
+ an abstract syntax tree (AST).
+
+tre-compile.c:
+ - Compiles ASTs to ready-to-use regex objects. Comprised of two parts:
+ * Routine to convert an AST to a tagged AST. A tagged AST has
+ appropriate minimized or maximized tags added to keep track of
+ submatches.
+ * Routine to convert tagged ASTs to tagged nondeterministic state
+ machines (TNFAs) without epsilon transitions (transitions on
+ empty strings).
+
+tre-match-parallel.c:
+ - Parallel TNFA matcher.
+ * The matcher basically takes a string and a TNFA and finds the
+ leftmost longest match and submatches in one pass over the input
+ string. Only the beginning of the input string is scanned until
+ a leftmost match and longest match is found.
+ * The matcher cannot handle back references, but the worst case
+ time consumption is O(l) where l is the length of the input
+ string. The space consumption is constant.
+
+tre-match-backtrack.c:
+ - A traditional backtracking matcher.
+ * Like the parallel matcher, takes a string and a TNFA and finds
+ the leftmost longest match and submatches. Portions of the
+ input string may (and usually are) scanned multiple times.
+ * Can handle back references. The worst case time consumption,
+ however, is O(k^l) where k is some constant and l is the length
+ of the input string. The worst case space consumption is O(l).
+
+tre-match-approx.c:
+ - Approximate parallel TNFA matcher.
+ * Finds the leftmost and longest match and submatches in one pass
+ over the input string. The match may contain errors. Each
+ missing, substituted, or extra character in the match increases
+ the cost of the match. A maximum cost for the returned match
+ can be given. The cost of the found match is returned.
+ * Cannot handle back references. The space and time consumption
+ bounds are the same as for the parallel exact matcher, but
+ in general this matcher is slower than the exact matcher.
+
+regcomp.c:
+ - Implementation of the regcomp() family of functions as simple
+ wrappers for tre_compile().
+
+regexec.c:
+ - Implementation of the regexec() family of functions.
+ * The appropriate matcher is dispatched according to the
+ features used in the compiled regex object.
+
+regerror.c:
+ - Implements the regerror() function.
diff --git a/lib/regcomp.c b/lib/regcomp.c
new file mode 100644
index 0000000000000..281b38e81f3cb
--- /dev/null
+++ b/lib/regcomp.c
@@ -0,0 +1,129 @@
+/*
+ tre_regcomp.c - TRE POSIX compatible regex compilation functions.
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "tre.h"
+#include "tre-internal.h"
+#include "xmalloc.h"
+
+int
+tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
+{
+ int ret;
+#if TRE_WCHAR
+ tre_char_t *wregex;
+ size_t wlen;
+
+ wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
+ if (wregex == NULL)
+ return REG_ESPACE;
+
+ /* If the current locale uses the standard single byte encoding of
+ characters, we don't do a multibyte string conversion. If we did,
+ many applications which use the default locale would break since
+ the default "C" locale uses the 7-bit ASCII character set, and
+ all characters with the eighth bit set would be considered invalid. */
+#if TRE_MULTIBYTE
+ if (TRE_MB_CUR_MAX == 1)
+#endif /* TRE_MULTIBYTE */
+ {
+ unsigned int i;
+ const unsigned char *str = (const unsigned char *)regex;
+ tre_char_t *wstr = wregex;
+
+ for (i = 0; i < n; i++)
+ *(wstr++) = *(str++);
+ wlen = n;
+ }
+#if TRE_MULTIBYTE
+ else
+ {
+ int consumed;
+ tre_char_t *wcptr = wregex;
+#ifdef HAVE_MBSTATE_T
+ mbstate_t state;
+ memset(&state, '\0', sizeof(state));
+#endif /* HAVE_MBSTATE_T */
+ while (n > 0)
+ {
+ consumed = tre_mbrtowc(wcptr, regex, n, &state);
+
+ switch (consumed)
+ {
+ case 0:
+ if (*regex == '\0')
+ consumed = 1;
+ else
+ {
+ xfree(wregex);
+ return REG_BADPAT;
+ }
+ break;
+ case -1:
+ DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
+ xfree(wregex);
+ return REG_BADPAT;
+ case -2:
+ /* The last character wasn't complete. Let's not call it a
+ fatal error. */
+ consumed = n;
+ break;
+ }
+ regex += consumed;
+ n -= consumed;
+ wcptr++;
+ }
+ wlen = wcptr - wregex;
+ }
+#endif /* TRE_MULTIBYTE */
+
+ wregex[wlen] = L'\0';
+ ret = tre_compile(preg, wregex, wlen, cflags);
+ xfree(wregex);
+#else /* !TRE_WCHAR */
+ ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags);
+#endif /* !TRE_WCHAR */
+
+ return ret;
+}
+
+int
+tre_regcomp(regex_t *preg, const char *regex, int cflags)
+{
+ return tre_regncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
+}
+
+
+#ifdef TRE_WCHAR
+int
+tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
+{
+ return tre_compile(preg, regex, n, cflags);
+}
+
+int
+tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
+{
+ return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags);
+}
+#endif /* TRE_WCHAR */
+
+void
+tre_regfree(regex_t *preg)
+{
+ tre_free(preg);
+}
+
+/* EOF */
diff --git a/lib/regerror.c b/lib/regerror.c
new file mode 100644
index 0000000000000..701f701ed67fb
--- /dev/null
+++ b/lib/regerror.c
@@ -0,0 +1,83 @@
+/*
+ tre_regerror.c - POSIX tre_regerror() implementation for TRE.
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <string.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+
+#include "tre-internal.h"
+#include "tre.h"
+
+#ifdef HAVE_GETTEXT
+#include <libintl.h>
+#else
+#define dgettext(p, s) s
+#define gettext(s) s
+#endif
+
+#define _(String) dgettext(PACKAGE, String)
+#define gettext_noop(String) String
+
+/* Error message strings for error codes listed in `tre.h'. This list
+ needs to be in sync with the codes listed there, naturally. */
+static const char *tre_error_messages[] =
+ { gettext_noop("No error"), /* REG_OK */
+ gettext_noop("No match"), /* REG_NOMATCH */
+ gettext_noop("Invalid regexp"), /* REG_BADPAT */
+ gettext_noop("Unknown collating element"), /* REG_ECOLLATE */
+ gettext_noop("Unknown character class name"), /* REG_ECTYPE */
+ gettext_noop("Trailing backslash"), /* REG_EESCAPE */
+ gettext_noop("Invalid back reference"), /* REG_ESUBREG */
+ gettext_noop("Missing ']'"), /* REG_EBRACK */
+ gettext_noop("Missing ')'"), /* REG_EPAREN */
+ gettext_noop("Missing '}'"), /* REG_EBRACE */
+ gettext_noop("Invalid contents of {}"), /* REG_BADBR */
+ gettext_noop("Invalid character range"), /* REG_ERANGE */
+ gettext_noop("Out of memory"), /* REG_ESPACE */
+ gettext_noop("Invalid use of repetition operators") /* REG_BADRPT */
+ };
+
+size_t
+tre_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
+{
+ const char *err;
+ size_t err_len;
+
+ /*LINTED*/(void)&preg;
+ if (errcode >= 0
+ && errcode < (int)(sizeof(tre_error_messages)
+ / sizeof(*tre_error_messages)))
+ err = gettext(tre_error_messages[errcode]);
+ else
+ err = gettext("Unknown error");
+
+ err_len = strlen(err) + 1;
+ if (errbuf_size > 0 && errbuf != NULL)
+ {
+ if (err_len > errbuf_size)
+ {
+ strncpy(errbuf, err, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = '\0';
+ }
+ else
+ {
+ strcpy(errbuf, err);
+ }
+ }
+ return err_len;
+}
+
+/* EOF */
diff --git a/lib/regex.h b/lib/regex.h
new file mode 100644
index 0000000000000..78034d1c04e40
--- /dev/null
+++ b/lib/regex.h
@@ -0,0 +1,38 @@
+/*
+ regex.h - TRE legacy API
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+ This header is for source level compatibility with old code using
+ the <tre/regex.h> header which defined the TRE API functions without
+ a prefix. New code should include <tre/tre.h> instead.
+
+*/
+
+#ifndef TRE_REXEX_H
+#define TRE_REGEX_H 1
+
+#include "tre.h"
+
+#ifndef TRE_USE_SYSTEM_REGEX_H
+#define regcomp tre_regcomp
+#define regerror tre_regerror
+#define regexec tre_regexec
+#define regfree tre_regfree
+#endif /* TRE_USE_SYSTEM_REGEX_H */
+
+#define regacomp tre_regacomp
+#define regaexec tre_regaexec
+#define regancomp tre_regancomp
+#define reganexec tre_reganexec
+#define regawncomp tre_regawncomp
+#define regawnexec tre_regawnexec
+#define regncomp tre_regncomp
+#define regnexec tre_regnexec
+#define regwcomp tre_regwcomp
+#define regwexec tre_regwexec
+#define regwncomp tre_regwncomp
+#define regwnexec tre_regwnexec
+
+#endif /* TRE_REGEX_H */
diff --git a/lib/regexec.c b/lib/regexec.c
new file mode 100644
index 0000000000000..54779ef4597df
--- /dev/null
+++ b/lib/regexec.c
@@ -0,0 +1,348 @@
+/*
+ tre_regexec.c - TRE POSIX compatible matching functions (and more).
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef TRE_USE_ALLOCA
+/* AIX requires this to be the first thing in the file. */
+#ifndef __GNUC__
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+ #pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+# endif
+# endif
+# endif
+#endif
+#endif /* TRE_USE_ALLOCA */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+#ifndef TRE_WCHAR
+#include <ctype.h>
+#endif /* !TRE_WCHAR */
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+#include <limits.h>
+
+#include "tre-internal.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+
+/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
+ endpoint values. */
+void
+tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
+ const tre_tnfa_t *tnfa, int *tags, int match_eo)
+{
+ tre_submatch_data_t *submatch_data;
+ unsigned int i, j;
+ int *parents;
+
+ i = 0;
+ if (match_eo >= 0 && !(cflags & REG_NOSUB))
+ {
+ /* Construct submatch offsets from the tags. */
+ DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
+ submatch_data = tnfa->submatch_data;
+ while (i < tnfa->num_submatches && i < nmatch)
+ {
+ if (submatch_data[i].so_tag == tnfa->end_tag)
+ pmatch[i].rm_so = match_eo;
+ else
+ pmatch[i].rm_so = tags[submatch_data[i].so_tag];
+
+ if (submatch_data[i].eo_tag == tnfa->end_tag)
+ pmatch[i].rm_eo = match_eo;
+ else
+ pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
+
+ /* If either of the endpoints were not used, this submatch
+ was not part of the match. */
+ if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
+ pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+
+ DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i,
+ submatch_data[i].so_tag, pmatch[i].rm_so,
+ submatch_data[i].eo_tag, pmatch[i].rm_eo));
+ i++;
+ }
+ /* Reset all submatches that are not within all of their parent
+ submatches. */
+ i = 0;
+ while (i < tnfa->num_submatches && i < nmatch)
+ {
+ if (pmatch[i].rm_eo == -1)
+ assert(pmatch[i].rm_so == -1);
+ assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
+
+ parents = submatch_data[i].parents;
+ if (parents != NULL)
+ for (j = 0; parents[j] >= 0; j++)
+ {
+ DPRINT(("pmatch[%d] parent %d\n", i, parents[j]));
+ if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
+ || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
+ pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+ }
+ i++;
+ }
+ }
+
+ while (i < nmatch)
+ {
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ i++;
+ }
+}
+
+
+/*
+ Wrapper functions for POSIX compatible regexp matching.
+*/
+
+int
+tre_have_backrefs(const regex_t *preg)
+{
+ tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ return tnfa->have_backrefs;
+}
+
+int
+tre_have_approx(const regex_t *preg)
+{
+ tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ return tnfa->have_approx;
+}
+
+static int
+tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
+ tre_str_type_t type, size_t nmatch, regmatch_t pmatch[],
+ int eflags)
+{
+ reg_errcode_t status;
+ int *tags = NULL, eo;
+ if (tnfa->num_tags > 0 && nmatch > 0)
+ {
+#ifdef TRE_USE_ALLOCA
+ tags = alloca(sizeof(*tags) * tnfa->num_tags);
+#else /* !TRE_USE_ALLOCA */
+ tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+#endif /* !TRE_USE_ALLOCA */
+ if (tags == NULL)
+ return REG_ESPACE;
+ }
+
+ /* Dispatch to the appropriate matcher. */
+ if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER)
+ {
+ /* The regex has back references, use the backtracking matcher. */
+ if (type == STR_USER)
+ {
+ const tre_str_source *source = string;
+ if (source->rewind == NULL || source->compare == NULL)
+ /* The backtracking matcher requires rewind and compare
+ capabilities from the input stream. */
+ return REG_BADPAT;
+ }
+ status = tre_tnfa_run_backtrack(tnfa, string, (int)len, type,
+ tags, eflags, &eo);
+ }
+#ifdef TRE_APPROX
+ else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER)
+ {
+ /* The regex uses approximate matching, use the approximate matcher. */
+ regamatch_t match;
+ regaparams_t params;
+ tre_regaparams_default(&params);
+ params.max_err = 0;
+ params.max_cost = 0;
+ status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
+ &match, params, eflags, &eo);
+ }
+#endif /* TRE_APPROX */
+ else
+ {
+ /* Exact matching, no back references, use the parallel matcher. */
+ status = tre_tnfa_run_parallel(tnfa, string, (int)len, type,
+ tags, eflags, &eo);
+ }
+
+ if (status == REG_OK)
+ /* A match was found, so fill the submatch registers. */
+ tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
+#ifndef TRE_USE_ALLOCA
+ if (tags)
+ xfree(tags);
+#endif /* !TRE_USE_ALLOCA */
+ return status;
+}
+
+int
+tre_regnexec(const regex_t *preg, const char *str, size_t len,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+ tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
+
+ return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags);
+}
+
+int
+tre_regexec(const regex_t *preg, const char *str,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+ return tre_regnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags);
+}
+
+
+#ifdef TRE_WCHAR
+
+int
+tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+ tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags);
+}
+
+int
+tre_regwexec(const regex_t *preg, const wchar_t *str,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+ return tre_regwnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags);
+}
+
+#endif /* TRE_WCHAR */
+
+int
+tre_reguexec(const regex_t *preg, const tre_str_source *str,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+ tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
+}
+
+
+#ifdef TRE_APPROX
+
+/*
+ Wrapper functions for approximate regexp matching.
+*/
+
+static int
+tre_match_approx(const tre_tnfa_t *tnfa, const void *string, size_t len,
+ tre_str_type_t type, regamatch_t *match, regaparams_t params,
+ int eflags)
+{
+ reg_errcode_t status;
+ int *tags = NULL, eo;
+
+ /* If the regexp does not use approximate matching features, the
+ maximum cost is zero, and the approximate matcher isn't forced,
+ use the exact matcher instead. */
+ if (params.max_cost == 0 && !tnfa->have_approx
+ && !(eflags & REG_APPROX_MATCHER))
+ return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch,
+ eflags);
+
+ /* Back references are not supported by the approximate matcher. */
+ if (tnfa->have_backrefs)
+ return REG_BADPAT;
+
+ if (tnfa->num_tags > 0 && match->nmatch > 0)
+ {
+#if TRE_USE_ALLOCA
+ tags = alloca(sizeof(*tags) * tnfa->num_tags);
+#else /* !TRE_USE_ALLOCA */
+ tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+#endif /* !TRE_USE_ALLOCA */
+ if (tags == NULL)
+ return REG_ESPACE;
+ }
+ status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
+ match, params, eflags, &eo);
+ if (status == REG_OK)
+ tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo);
+#ifndef TRE_USE_ALLOCA
+ if (tags)
+ xfree(tags);
+#endif /* !TRE_USE_ALLOCA */
+ return status;
+}
+
+int
+tre_reganexec(const regex_t *preg, const char *str, size_t len,
+ regamatch_t *match, regaparams_t params, int eflags)
+{
+ tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
+
+ return tre_match_approx(tnfa, str, len, type, match, params, eflags);
+}
+
+int
+tre_regaexec(const regex_t *preg, const char *str,
+ regamatch_t *match, regaparams_t params, int eflags)
+{
+ return tre_reganexec(preg, str, (unsigned)-1, match, params, eflags);
+}
+
+#ifdef TRE_WCHAR
+
+int
+tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len,
+ regamatch_t *match, regaparams_t params, int eflags)
+{
+ tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ return tre_match_approx(tnfa, str, len, STR_WIDE,
+ match, params, eflags);
+}
+
+int
+tre_regawexec(const regex_t *preg, const wchar_t *str,
+ regamatch_t *match, regaparams_t params, int eflags)
+{
+ return tre_regawnexec(preg, str, (unsigned)-1, match, params, eflags);
+}
+
+#endif /* TRE_WCHAR */
+
+void
+tre_regaparams_default(regaparams_t *params)
+{
+ memset(params, 0, sizeof(*params));
+ params->cost_ins = 1;
+ params->cost_del = 1;
+ params->cost_subst = 1;
+ params->max_cost = INT_MAX;
+ params->max_ins = INT_MAX;
+ params->max_del = INT_MAX;
+ params->max_subst = INT_MAX;
+ params->max_err = INT_MAX;
+}
+
+#endif /* TRE_APPROX */
+
+/* EOF */
diff --git a/lib/stamp-h2 b/lib/stamp-h2
new file mode 100644
index 0000000000000..c328f705cb548
--- /dev/null
+++ b/lib/stamp-h2
@@ -0,0 +1 @@
+timestamp for lib/tre-config.h
diff --git a/lib/tre-ast.c b/lib/tre-ast.c
new file mode 100644
index 0000000000000..acb387acce3ae
--- /dev/null
+++ b/lib/tre-ast.c
@@ -0,0 +1,226 @@
+/*
+ tre-ast.c - Abstract syntax tree (AST) routines
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <assert.h>
+
+#include "tre-ast.h"
+#include "tre-mem.h"
+
+tre_ast_node_t *
+tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
+{
+ tre_ast_node_t *node;
+
+ node = tre_mem_calloc(mem, sizeof(*node));
+ if (!node)
+ return NULL;
+ node->obj = tre_mem_calloc(mem, size);
+ if (!node->obj)
+ return NULL;
+ node->type = type;
+ node->nullable = -1;
+ node->submatch_id = -1;
+
+ return node;
+}
+
+tre_ast_node_t *
+tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
+{
+ tre_ast_node_t *node;
+ tre_literal_t *lit;
+
+ node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
+ if (!node)
+ return NULL;
+ lit = node->obj;
+ lit->code_min = code_min;
+ lit->code_max = code_max;
+ lit->position = position;
+
+ return node;
+}
+
+tre_ast_node_t *
+tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
+ int minimal)
+{
+ tre_ast_node_t *node;
+ tre_iteration_t *iter;
+
+ node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
+ if (!node)
+ return NULL;
+ iter = node->obj;
+ iter->arg = arg;
+ iter->min = min;
+ iter->max = max;
+ iter->minimal = minimal;
+ node->num_submatches = arg->num_submatches;
+
+ return node;
+}
+
+tre_ast_node_t *
+tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
+{
+ tre_ast_node_t *node;
+
+ node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
+ if (node == NULL)
+ return NULL;
+ ((tre_union_t *)node->obj)->left = left;
+ ((tre_union_t *)node->obj)->right = right;
+ node->num_submatches = left->num_submatches + right->num_submatches;
+
+ return node;
+}
+
+tre_ast_node_t *
+tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
+ tre_ast_node_t *right)
+{
+ tre_ast_node_t *node;
+
+ node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
+ if (node == NULL)
+ return NULL;
+ ((tre_catenation_t *)node->obj)->left = left;
+ ((tre_catenation_t *)node->obj)->right = right;
+ node->num_submatches = left->num_submatches + right->num_submatches;
+
+ return node;
+}
+
+#ifdef TRE_DEBUG
+
+static void
+tre_findent(FILE *stream, int i)
+{
+ while (i-- > 0)
+ fputc(' ', stream);
+}
+
+void
+tre_print_params(int *params)
+{
+ int i;
+ if (params)
+ {
+ DPRINT(("params ["));
+ for (i = 0; i < TRE_PARAM_LAST; i++)
+ {
+ if (params[i] == TRE_PARAM_UNSET)
+ DPRINT(("unset"));
+ else if (params[i] == TRE_PARAM_DEFAULT)
+ DPRINT(("default"));
+ else
+ DPRINT(("%d", params[i]));
+ if (i < TRE_PARAM_LAST - 1)
+ DPRINT((", "));
+ }
+ DPRINT(("]"));
+ }
+}
+
+static void
+tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent)
+{
+ int code_min, code_max, pos;
+ int num_tags = ast->num_tags;
+ tre_literal_t *lit;
+ tre_iteration_t *iter;
+
+ tre_findent(stream, indent);
+ switch (ast->type)
+ {
+ case LITERAL:
+ lit = ast->obj;
+ code_min = lit->code_min;
+ code_max = lit->code_max;
+ pos = lit->position;
+ if (IS_EMPTY(lit))
+ {
+ fprintf(stream, "literal empty\n");
+ }
+ else if (IS_ASSERTION(lit))
+ {
+ int i;
+ char *assertions[] = { "bol", "eol", "ctype", "!ctype",
+ "bow", "eow", "wb", "!wb" };
+ if (code_max >= ASSERT_LAST << 1)
+ assert(0);
+ fprintf(stream, "assertions: ");
+ for (i = 0; (1 << i) <= ASSERT_LAST; i++)
+ if (code_max & (1 << i))
+ fprintf(stream, "%s ", assertions[i]);
+ fprintf(stream, "\n");
+ }
+ else if (IS_TAG(lit))
+ {
+ fprintf(stream, "tag %d\n", code_max);
+ }
+ else if (IS_BACKREF(lit))
+ {
+ fprintf(stream, "backref %d, pos %d\n", code_max, pos);
+ }
+ else if (IS_PARAMETER(lit))
+ {
+ tre_print_params(lit->u.params);
+ fprintf(stream, "\n");
+ }
+ else
+ {
+ fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, "
+ "%d tags\n", code_min, code_max, code_min, code_max, pos,
+ ast->submatch_id, num_tags);
+ }
+ break;
+ case ITERATION:
+ iter = ast->obj;
+ fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n",
+ iter->min, iter->max, ast->submatch_id, num_tags,
+ iter->minimal ? "minimal" : "greedy");
+ tre_do_print(stream, iter->arg, indent + 2);
+ break;
+ case UNION:
+ fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags);
+ tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2);
+ tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2);
+ break;
+ case CATENATION:
+ fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id,
+ num_tags);
+ tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2);
+ tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void
+tre_ast_fprint(FILE *stream, tre_ast_node_t *ast)
+{
+ tre_do_print(stream, ast, 0);
+}
+
+void
+tre_ast_print(tre_ast_node_t *tree)
+{
+ printf("AST:\n");
+ tre_ast_fprint(stdout, tree);
+}
+
+#endif /* TRE_DEBUG */
+
+/* EOF */
diff --git a/lib/tre-ast.h b/lib/tre-ast.h
new file mode 100644
index 0000000000000..d9af3762f9acc
--- /dev/null
+++ b/lib/tre-ast.h
@@ -0,0 +1,128 @@
+/*
+ tre-ast.h - Abstract syntax tree (AST) definitions
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+
+#ifndef TRE_AST_H
+#define TRE_AST_H 1
+
+#include "tre-mem.h"
+#include "tre-internal.h"
+#include "tre-compile.h"
+
+/* The different AST node types. */
+typedef enum {
+ LITERAL,
+ CATENATION,
+ ITERATION,
+ UNION
+} tre_ast_type_t;
+
+/* Special subtypes of TRE_LITERAL. */
+#define EMPTY -1 /* Empty leaf (denotes empty string). */
+#define ASSERTION -2 /* Assertion leaf. */
+#define TAG -3 /* Tag leaf. */
+#define BACKREF -4 /* Back reference leaf. */
+#define PARAMETER -5 /* Parameter. */
+
+#define IS_SPECIAL(x) ((x)->code_min < 0)
+#define IS_EMPTY(x) ((x)->code_min == EMPTY)
+#define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
+#define IS_TAG(x) ((x)->code_min == TAG)
+#define IS_BACKREF(x) ((x)->code_min == BACKREF)
+#define IS_PARAMETER(x) ((x)->code_min == PARAMETER)
+
+
+/* A generic AST node. All AST nodes consist of this node on the top
+ level with `obj' pointing to the actual content. */
+typedef struct {
+ tre_ast_type_t type; /* Type of the node. */
+ void *obj; /* Pointer to actual node. */
+ int nullable;
+ int submatch_id;
+ int num_submatches;
+ int num_tags;
+ tre_pos_and_tags_t *firstpos;
+ tre_pos_and_tags_t *lastpos;
+} tre_ast_node_t;
+
+
+/* A "literal" node. These are created for assertions, back references,
+ tags, matching parameter settings, and all expressions that match one
+ character. */
+typedef struct {
+ long code_min;
+ long code_max;
+ int position;
+ union {
+ tre_ctype_t class;
+ int *params;
+ } u;
+ tre_ctype_t *neg_classes;
+} tre_literal_t;
+
+/* A "catenation" node. These are created when two regexps are concatenated.
+ If there are more than one subexpressions in sequence, the `left' part
+ holds all but the last, and `right' part holds the last subexpression
+ (catenation is left associative). */
+typedef struct {
+ tre_ast_node_t *left;
+ tre_ast_node_t *right;
+} tre_catenation_t;
+
+/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
+ operators. */
+typedef struct {
+ /* Subexpression to match. */
+ tre_ast_node_t *arg;
+ /* Minimum number of consecutive matches. */
+ int min;
+ /* Maximum number of consecutive matches. */
+ int max;
+ /* If 0, match as many characters as possible, if 1 match as few as
+ possible. Note that this does not always mean the same thing as
+ matching as many/few repetitions as possible. */
+ unsigned int minimal:1;
+ /* Approximate matching parameters (or NULL). */
+ int *params;
+} tre_iteration_t;
+
+/* An "union" node. These are created for the "|" operator. */
+typedef struct {
+ tre_ast_node_t *left;
+ tre_ast_node_t *right;
+} tre_union_t;
+
+tre_ast_node_t *
+tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
+
+tre_ast_node_t *
+tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
+
+tre_ast_node_t *
+tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
+ int minimal);
+
+tre_ast_node_t *
+tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
+
+tre_ast_node_t *
+tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
+ tre_ast_node_t *right);
+
+#ifdef TRE_DEBUG
+void
+tre_ast_print(tre_ast_node_t *tree);
+
+/* XXX - rethink AST printing API */
+void
+tre_print_params(int *params);
+#endif /* TRE_DEBUG */
+
+#endif /* TRE_AST_H */
+
+/* EOF */
diff --git a/lib/tre-compile.c b/lib/tre-compile.c
new file mode 100644
index 0000000000000..948cc469895e6
--- /dev/null
+++ b/lib/tre-compile.c
@@ -0,0 +1,2296 @@
+/*
+ tre-compile.c - TRE regex compiler
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+/*
+ TODO:
+ - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
+ function calls.
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+#include "tre-internal.h"
+#include "tre-mem.h"
+#include "tre-stack.h"
+#include "tre-ast.h"
+#include "tre-parse.h"
+#include "tre-compile.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+/*
+ Algorithms to setup tags so that submatch addressing can be done.
+*/
+
+
+/* Inserts a catenation node to the root of the tree given in `node'.
+ As the left child a new tag with number `tag_id' to `node' is added,
+ and the right child is the old root. */
+static reg_errcode_t
+tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
+{
+ tre_catenation_t *c;
+
+ DPRINT(("add_tag_left: tag %d\n", tag_id));
+
+ c = tre_mem_alloc(mem, sizeof(*c));
+ if (c == NULL)
+ return REG_ESPACE;
+ c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
+ if (c->left == NULL)
+ return REG_ESPACE;
+ c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
+ if (c->right == NULL)
+ return REG_ESPACE;
+
+ c->right->obj = node->obj;
+ c->right->type = node->type;
+ c->right->nullable = -1;
+ c->right->submatch_id = -1;
+ c->right->firstpos = NULL;
+ c->right->lastpos = NULL;
+ c->right->num_tags = 0;
+ node->obj = c;
+ node->type = CATENATION;
+ return REG_OK;
+}
+
+/* Inserts a catenation node to the root of the tree given in `node'.
+ As the right child a new tag with number `tag_id' to `node' is added,
+ and the left child is the old root. */
+static reg_errcode_t
+tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
+{
+ tre_catenation_t *c;
+
+ DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
+
+ c = tre_mem_alloc(mem, sizeof(*c));
+ if (c == NULL)
+ return REG_ESPACE;
+ c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
+ if (c->right == NULL)
+ return REG_ESPACE;
+ c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
+ if (c->left == NULL)
+ return REG_ESPACE;
+
+ c->left->obj = node->obj;
+ c->left->type = node->type;
+ c->left->nullable = -1;
+ c->left->submatch_id = -1;
+ c->left->firstpos = NULL;
+ c->left->lastpos = NULL;
+ c->left->num_tags = 0;
+ node->obj = c;
+ node->type = CATENATION;
+ return REG_OK;
+}
+
+typedef enum {
+ ADDTAGS_RECURSE,
+ ADDTAGS_AFTER_ITERATION,
+ ADDTAGS_AFTER_UNION_LEFT,
+ ADDTAGS_AFTER_UNION_RIGHT,
+ ADDTAGS_AFTER_CAT_LEFT,
+ ADDTAGS_AFTER_CAT_RIGHT,
+ ADDTAGS_SET_SUBMATCH_END
+} tre_addtags_symbol_t;
+
+
+typedef struct {
+ int tag;
+ int next_tag;
+} tre_tag_states_t;
+
+
+/* Go through `regset' and set submatch data for submatches that are
+ using this tag. */
+static void
+tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
+{
+ int i;
+
+ for (i = 0; regset[i] >= 0; i++)
+ {
+ int id = regset[i] / 2;
+ int start = !(regset[i] % 2);
+ DPRINT((" Using tag %d for %s offset of "
+ "submatch %d\n", tag,
+ start ? "start" : "end", id));
+ if (start)
+ tnfa->submatch_data[id].so_tag = tag;
+ else
+ tnfa->submatch_data[id].eo_tag = tag;
+ }
+ regset[0] = -1;
+}
+
+
+/* Adds tags to appropriate locations in the parse tree in `tree', so that
+ subexpressions marked for submatch addressing can be traced. */
+static reg_errcode_t
+tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
+ tre_tnfa_t *tnfa)
+{
+ reg_errcode_t status = REG_OK;
+ tre_addtags_symbol_t symbol;
+ tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
+ int bottom = tre_stack_num_objects(stack);
+ /* True for first pass (counting number of needed tags) */
+ int first_pass = (mem == NULL || tnfa == NULL);
+ int *regset, *orig_regset;
+ int num_tags = 0; /* Total number of tags. */
+ int num_minimals = 0; /* Number of special minimal tags. */
+ int tag = 0; /* The tag that is to be added next. */
+ int next_tag = 1; /* Next tag to use after this one. */
+ int *parents; /* Stack of submatches the current submatch is
+ contained in. */
+ int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
+ tre_tag_states_t *saved_states;
+
+ tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
+ if (!first_pass)
+ {
+ tnfa->end_tag = 0;
+ tnfa->minimal_tags[0] = -1;
+ }
+
+ regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
+ if (regset == NULL)
+ return REG_ESPACE;
+ regset[0] = -1;
+ orig_regset = regset;
+
+ parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
+ if (parents == NULL)
+ {
+ xfree(regset);
+ return REG_ESPACE;
+ }
+ parents[0] = -1;
+
+ saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
+ if (saved_states == NULL)
+ {
+ xfree(regset);
+ xfree(parents);
+ return REG_ESPACE;
+ }
+ else
+ {
+ unsigned int i;
+ for (i = 0; i <= tnfa->num_submatches; i++)
+ saved_states[i].tag = -1;
+ }
+
+ STACK_PUSH(stack, voidptr, node);
+ STACK_PUSH(stack, int, ADDTAGS_RECURSE);
+
+ while (tre_stack_num_objects(stack) > bottom)
+ {
+ if (status != REG_OK)
+ break;
+
+ symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
+ switch (symbol)
+ {
+
+ case ADDTAGS_SET_SUBMATCH_END:
+ {
+ int id = tre_stack_pop_int(stack);
+ int i;
+
+ /* Add end of this submatch to regset. */
+ for (i = 0; regset[i] >= 0; i++);
+ regset[i] = id * 2 + 1;
+ regset[i + 1] = -1;
+
+ /* Pop this submatch from the parents stack. */
+ for (i = 0; parents[i] >= 0; i++);
+ parents[i - 1] = -1;
+ break;
+ }
+
+ case ADDTAGS_RECURSE:
+ node = tre_stack_pop_voidptr(stack);
+
+ if (node->submatch_id >= 0)
+ {
+ int id = node->submatch_id;
+ int i;
+
+
+ /* Add start of this submatch to regset. */
+ for (i = 0; regset[i] >= 0; i++);
+ regset[i] = id * 2;
+ regset[i + 1] = -1;
+
+ if (!first_pass)
+ {
+ for (i = 0; parents[i] >= 0; i++);
+ tnfa->submatch_data[id].parents = NULL;
+ if (i > 0)
+ {
+ int *p = xmalloc(sizeof(*p) * (i + 1));
+ if (p == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ assert(tnfa->submatch_data[id].parents == NULL);
+ tnfa->submatch_data[id].parents = p;
+ for (i = 0; parents[i] >= 0; i++)
+ p[i] = parents[i];
+ p[i] = -1;
+ }
+ }
+
+ /* Add end of this submatch to regset after processing this
+ node. */
+ STACK_PUSHX(stack, int, node->submatch_id);
+ STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
+ }
+
+ switch (node->type)
+ {
+ case LITERAL:
+ {
+ tre_literal_t *lit = node->obj;
+
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
+ {
+ int i;
+ DPRINT(("Literal %d-%d\n",
+ (int)lit->code_min, (int)lit->code_max));
+ if (regset[0] >= 0)
+ {
+ /* Regset is not empty, so add a tag before the
+ literal or backref. */
+ if (!first_pass)
+ {
+ status = tre_add_tag_left(mem, node, tag);
+ tnfa->tag_directions[tag] = direction;
+ if (minimal_tag >= 0)
+ {
+ DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+ tre_purge_regset(regset, tnfa, tag);
+ }
+ else
+ {
+ DPRINT((" num_tags = 1\n"));
+ node->num_tags = 1;
+ }
+
+ DPRINT((" num_tags++\n"));
+ regset[0] = -1;
+ tag = next_tag;
+ num_tags++;
+ next_tag++;
+ }
+ }
+ else
+ {
+ assert(!IS_TAG(lit));
+ }
+ break;
+ }
+ case CATENATION:
+ {
+ tre_catenation_t *cat = node->obj;
+ tre_ast_node_t *left = cat->left;
+ tre_ast_node_t *right = cat->right;
+ int reserved_tag = -1;
+ DPRINT(("Catenation, next_tag = %d\n", next_tag));
+
+
+ /* After processing right child. */
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
+
+ /* Process right child. */
+ STACK_PUSHX(stack, voidptr, right);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* After processing left child. */
+ STACK_PUSHX(stack, int, next_tag + left->num_tags);
+ DPRINT((" Pushing %d for after left\n",
+ next_tag + left->num_tags));
+ if (left->num_tags > 0 && right->num_tags > 0)
+ {
+ /* Reserve the next tag to the right child. */
+ DPRINT((" Reserving next_tag %d to right child\n",
+ next_tag));
+ reserved_tag = next_tag;
+ next_tag++;
+ }
+ STACK_PUSHX(stack, int, reserved_tag);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
+
+ /* Process left child. */
+ STACK_PUSHX(stack, voidptr, left);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ }
+ break;
+ case ITERATION:
+ {
+ tre_iteration_t *iter = node->obj;
+ DPRINT(("Iteration\n"));
+
+ if (first_pass)
+ {
+ STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
+ }
+ else
+ {
+ STACK_PUSHX(stack, int, tag);
+ STACK_PUSHX(stack, int, iter->minimal);
+ }
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
+
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* Regset is not empty, so add a tag here. */
+ if (regset[0] >= 0 || iter->minimal)
+ {
+ if (!first_pass)
+ {
+ int i;
+ status = tre_add_tag_left(mem, node, tag);
+ if (iter->minimal)
+ tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
+ else
+ tnfa->tag_directions[tag] = direction;
+ if (minimal_tag >= 0)
+ {
+ DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+ tre_purge_regset(regset, tnfa, tag);
+ }
+
+ DPRINT((" num_tags++\n"));
+ regset[0] = -1;
+ tag = next_tag;
+ num_tags++;
+ next_tag++;
+ }
+ direction = TRE_TAG_MINIMIZE;
+ }
+ break;
+ case UNION:
+ {
+ tre_union_t *uni = node->obj;
+ tre_ast_node_t *left = uni->left;
+ tre_ast_node_t *right = uni->right;
+ int left_tag;
+ int right_tag;
+
+ if (regset[0] >= 0)
+ {
+ left_tag = next_tag;
+ right_tag = next_tag + 1;
+ }
+ else
+ {
+ left_tag = tag;
+ right_tag = next_tag;
+ }
+
+ DPRINT(("Union\n"));
+
+ /* After processing right child. */
+ STACK_PUSHX(stack, int, right_tag);
+ STACK_PUSHX(stack, int, left_tag);
+ STACK_PUSHX(stack, voidptr, regset);
+ STACK_PUSHX(stack, int, regset[0] >= 0);
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, voidptr, right);
+ STACK_PUSHX(stack, voidptr, left);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
+
+ /* Process right child. */
+ STACK_PUSHX(stack, voidptr, right);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* After processing left child. */
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
+
+ /* Process left child. */
+ STACK_PUSHX(stack, voidptr, left);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* Regset is not empty, so add a tag here. */
+ if (regset[0] >= 0)
+ {
+ if (!first_pass)
+ {
+ int i;
+ status = tre_add_tag_left(mem, node, tag);
+ tnfa->tag_directions[tag] = direction;
+ if (minimal_tag >= 0)
+ {
+ DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+ tre_purge_regset(regset, tnfa, tag);
+ }
+
+ DPRINT((" num_tags++\n"));
+ regset[0] = -1;
+ tag = next_tag;
+ num_tags++;
+ next_tag++;
+ }
+
+ if (node->num_submatches > 0)
+ {
+ /* The next two tags are reserved for markers. */
+ next_tag++;
+ tag = next_tag;
+ next_tag++;
+ }
+
+ break;
+ }
+ }
+
+ if (node->submatch_id >= 0)
+ {
+ int i;
+ /* Push this submatch on the parents stack. */
+ for (i = 0; parents[i] >= 0; i++);
+ parents[i] = node->submatch_id;
+ parents[i + 1] = -1;
+ }
+
+ break; /* end case: ADDTAGS_RECURSE */
+
+ case ADDTAGS_AFTER_ITERATION:
+ {
+ int minimal = 0;
+ int enter_tag;
+ node = tre_stack_pop_voidptr(stack);
+ if (first_pass)
+ {
+ node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
+ + tre_stack_pop_int(stack);
+ minimal_tag = -1;
+ }
+ else
+ {
+ minimal = tre_stack_pop_int(stack);
+ enter_tag = tre_stack_pop_int(stack);
+ if (minimal)
+ minimal_tag = enter_tag;
+ }
+
+ DPRINT(("After iteration\n"));
+ if (!first_pass)
+ {
+ DPRINT((" Setting direction to %s\n",
+ minimal ? "minimize" : "maximize"));
+ if (minimal)
+ direction = TRE_TAG_MINIMIZE;
+ else
+ direction = TRE_TAG_MAXIMIZE;
+ }
+ break;
+ }
+
+ case ADDTAGS_AFTER_CAT_LEFT:
+ {
+ int new_tag = tre_stack_pop_int(stack);
+ next_tag = tre_stack_pop_int(stack);
+ DPRINT(("After cat left, tag = %d, next_tag = %d\n",
+ tag, next_tag));
+ if (new_tag >= 0)
+ {
+ DPRINT((" Setting tag to %d\n", new_tag));
+ tag = new_tag;
+ }
+ break;
+ }
+
+ case ADDTAGS_AFTER_CAT_RIGHT:
+ DPRINT(("After cat right\n"));
+ node = tre_stack_pop_voidptr(stack);
+ if (first_pass)
+ node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
+ + ((tre_catenation_t *)node->obj)->right->num_tags;
+ break;
+
+ case ADDTAGS_AFTER_UNION_LEFT:
+ DPRINT(("After union left\n"));
+ /* Lift the bottom of the `regset' array so that when processing
+ the right operand the items currently in the array are
+ invisible. The original bottom was saved at ADDTAGS_UNION and
+ will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
+ while (*regset >= 0)
+ regset++;
+ break;
+
+ case ADDTAGS_AFTER_UNION_RIGHT:
+ {
+ int added_tags, tag_left, tag_right;
+ tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
+ tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
+ DPRINT(("After union right\n"));
+ node = tre_stack_pop_voidptr(stack);
+ added_tags = tre_stack_pop_int(stack);
+ if (first_pass)
+ {
+ node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
+ + ((tre_union_t *)node->obj)->right->num_tags + added_tags
+ + ((node->num_submatches > 0) ? 2 : 0);
+ }
+ regset = tre_stack_pop_voidptr(stack);
+ tag_left = tre_stack_pop_int(stack);
+ tag_right = tre_stack_pop_int(stack);
+
+ /* Add tags after both children, the left child gets a smaller
+ tag than the right child. This guarantees that we prefer
+ the left child over the right child. */
+ /* XXX - This is not always necessary (if the children have
+ tags which must be seen for every match of that child). */
+ /* XXX - Check if this is the only place where tre_add_tag_right
+ is used. If so, use tre_add_tag_left (putting the tag before
+ the child as opposed after the child) and throw away
+ tre_add_tag_right. */
+ if (node->num_submatches > 0)
+ {
+ if (!first_pass)
+ {
+ status = tre_add_tag_right(mem, left, tag_left);
+ tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
+ status = tre_add_tag_right(mem, right, tag_right);
+ tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
+ }
+ DPRINT((" num_tags += 2\n"));
+ num_tags += 2;
+ }
+ direction = TRE_TAG_MAXIMIZE;
+ break;
+ }
+
+ default:
+ assert(0);
+ break;
+
+ } /* end switch(symbol) */
+ } /* end while(tre_stack_num_objects(stack) > bottom) */
+
+ if (!first_pass)
+ tre_purge_regset(regset, tnfa, tag);
+
+ if (!first_pass && minimal_tag >= 0)
+ {
+ int i;
+ DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+
+ DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
+ first_pass? "First pass" : "Second pass", num_tags));
+
+ assert(tree->num_tags == num_tags);
+ tnfa->end_tag = num_tags;
+ tnfa->num_tags = num_tags;
+ tnfa->num_minimals = num_minimals;
+ xfree(orig_regset);
+ xfree(parents);
+ xfree(saved_states);
+ return status;
+}
+
+
+
+/*
+ AST to TNFA compilation routines.
+*/
+
+typedef enum {
+ COPY_RECURSE,
+ COPY_SET_RESULT_PTR
+} tre_copyast_symbol_t;
+
+/* Flags for tre_copy_ast(). */
+#define COPY_REMOVE_TAGS 1
+#define COPY_MAXIMIZE_FIRST_TAG 2
+
+static reg_errcode_t
+tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
+ int flags, int *pos_add, tre_tag_direction_t *tag_directions,
+ tre_ast_node_t **copy, int *max_pos)
+{
+ reg_errcode_t status = REG_OK;
+ int bottom = tre_stack_num_objects(stack);
+ int num_copied = 0;
+ int first_tag = 1;
+ tre_ast_node_t **result = copy;
+ tre_copyast_symbol_t symbol;
+
+ STACK_PUSH(stack, voidptr, ast);
+ STACK_PUSH(stack, int, COPY_RECURSE);
+
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
+ {
+ tre_ast_node_t *node;
+ if (status != REG_OK)
+ break;
+
+ symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
+ switch (symbol)
+ {
+ case COPY_SET_RESULT_PTR:
+ result = tre_stack_pop_voidptr(stack);
+ break;
+ case COPY_RECURSE:
+ node = tre_stack_pop_voidptr(stack);
+ switch (node->type)
+ {
+ case LITERAL:
+ {
+ tre_literal_t *lit = node->obj;
+ int pos = lit->position;
+ int min = lit->code_min;
+ int max = lit->code_max;
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
+ {
+ /* XXX - e.g. [ab] has only one position but two
+ nodes, so we are creating holes in the state space
+ here. Not fatal, just wastes memory. */
+ pos += *pos_add;
+ num_copied++;
+ }
+ else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
+ {
+ /* Change this tag to empty. */
+ min = EMPTY;
+ max = pos = -1;
+ }
+ else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
+ && first_tag)
+ {
+ /* Maximize the first tag. */
+ tag_directions[max] = TRE_TAG_MAXIMIZE;
+ first_tag = 0;
+ }
+ *result = tre_ast_new_literal(mem, min, max, pos);
+ if (*result == NULL)
+ status = REG_ESPACE;
+
+ if (pos > *max_pos)
+ *max_pos = pos;
+ break;
+ }
+ case UNION:
+ {
+ tre_union_t *uni = node->obj;
+ tre_union_t *tmp;
+ *result = tre_ast_new_union(mem, uni->left, uni->right);
+ if (*result == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ tmp = (*result)->obj;
+ result = &tmp->left;
+ STACK_PUSHX(stack, voidptr, uni->right);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ STACK_PUSHX(stack, voidptr, &tmp->right);
+ STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
+ STACK_PUSHX(stack, voidptr, uni->left);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ break;
+ }
+ case CATENATION:
+ {
+ tre_catenation_t *cat = node->obj;
+ tre_catenation_t *tmp;
+ *result = tre_ast_new_catenation(mem, cat->left, cat->right);
+ if (*result == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ tmp = (*result)->obj;
+ tmp->left = NULL;
+ tmp->right = NULL;
+ result = &tmp->left;
+
+ STACK_PUSHX(stack, voidptr, cat->right);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ STACK_PUSHX(stack, voidptr, &tmp->right);
+ STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
+ STACK_PUSHX(stack, voidptr, cat->left);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ break;
+ }
+ case ITERATION:
+ {
+ tre_iteration_t *iter = node->obj;
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ *result = tre_ast_new_iter(mem, iter->arg, iter->min,
+ iter->max, iter->minimal);
+ if (*result == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ iter = (*result)->obj;
+ result = &iter->arg;
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ }
+ }
+ *pos_add += num_copied;
+ return status;
+}
+
+typedef enum {
+ EXPAND_RECURSE,
+ EXPAND_AFTER_ITER
+} tre_expand_ast_symbol_t;
+
+/* Expands each iteration node that has a finite nonzero minimum or maximum
+ iteration count to a catenated sequence of copies of the node. */
+static reg_errcode_t
+tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
+ int *position, tre_tag_direction_t *tag_directions,
+ int *max_depth)
+{
+ reg_errcode_t status = REG_OK;
+ int bottom = tre_stack_num_objects(stack);
+ int pos_add = 0;
+ int pos_add_total = 0;
+ int max_pos = 0;
+ /* Current approximate matching parameters. */
+ int params[TRE_PARAM_LAST];
+ /* Approximate parameter nesting level. */
+ int params_depth = 0;
+ int iter_depth = 0;
+ int i;
+
+ for (i = 0; i < TRE_PARAM_LAST; i++)
+ params[i] = TRE_PARAM_DEFAULT;
+
+ STACK_PUSHR(stack, voidptr, ast);
+ STACK_PUSHR(stack, int, EXPAND_RECURSE);
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
+ {
+ tre_ast_node_t *node;
+ tre_expand_ast_symbol_t symbol;
+
+ if (status != REG_OK)
+ break;
+
+ DPRINT(("pos_add %d\n", pos_add));
+
+ symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
+ node = tre_stack_pop_voidptr(stack);
+ switch (symbol)
+ {
+ case EXPAND_RECURSE:
+ switch (node->type)
+ {
+ case LITERAL:
+ {
+ tre_literal_t *lit= node->obj;
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
+ {
+ lit->position += pos_add;
+ if (lit->position > max_pos)
+ max_pos = lit->position;
+ }
+ break;
+ }
+ case UNION:
+ {
+ tre_union_t *uni = node->obj;
+ STACK_PUSHX(stack, voidptr, uni->right);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ STACK_PUSHX(stack, voidptr, uni->left);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ break;
+ }
+ case CATENATION:
+ {
+ tre_catenation_t *cat = node->obj;
+ STACK_PUSHX(stack, voidptr, cat->right);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ STACK_PUSHX(stack, voidptr, cat->left);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ break;
+ }
+ case ITERATION:
+ {
+ tre_iteration_t *iter = node->obj;
+ STACK_PUSHX(stack, int, pos_add);
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ /* If we are going to expand this node at EXPAND_AFTER_ITER
+ then don't increase the `pos' fields of the nodes now, it
+ will get done when expanding. */
+ if (iter->min > 1 || iter->max > 1)
+ pos_add = 0;
+ iter_depth++;
+ DPRINT(("iter\n"));
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case EXPAND_AFTER_ITER:
+ {
+ tre_iteration_t *iter = node->obj;
+ int pos_add_last;
+ pos_add = tre_stack_pop_int(stack);
+ pos_add_last = pos_add;
+ if (iter->min > 1 || iter->max > 1)
+ {
+ tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
+ int j;
+ int pos_add_save = pos_add;
+
+ /* Create a catenated sequence of copies of the node. */
+ for (j = 0; j < iter->min; j++)
+ {
+ tre_ast_node_t *copy;
+ /* Remove tags from all but the last copy. */
+ int flags = ((j + 1 < iter->min)
+ ? COPY_REMOVE_TAGS
+ : COPY_MAXIMIZE_FIRST_TAG);
+ DPRINT((" pos_add %d\n", pos_add));
+ pos_add_save = pos_add;
+ status = tre_copy_ast(mem, stack, iter->arg, flags,
+ &pos_add, tag_directions, &copy,
+ &max_pos);
+ if (status != REG_OK)
+ return status;
+ if (seq1 != NULL)
+ seq1 = tre_ast_new_catenation(mem, seq1, copy);
+ else
+ seq1 = copy;
+ if (seq1 == NULL)
+ return REG_ESPACE;
+ }
+
+ if (iter->max == -1)
+ {
+ /* No upper limit. */
+ pos_add_save = pos_add;
+ status = tre_copy_ast(mem, stack, iter->arg, 0,
+ &pos_add, NULL, &seq2, &max_pos);
+ if (status != REG_OK)
+ return status;
+ seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
+ if (seq2 == NULL)
+ return REG_ESPACE;
+ }
+ else
+ {
+ for (j = iter->min; j < iter->max; j++)
+ {
+ tre_ast_node_t *tmp, *copy;
+ pos_add_save = pos_add;
+ status = tre_copy_ast(mem, stack, iter->arg, 0,
+ &pos_add, NULL, &copy, &max_pos);
+ if (status != REG_OK)
+ return status;
+ if (seq2 != NULL)
+ seq2 = tre_ast_new_catenation(mem, copy, seq2);
+ else
+ seq2 = copy;
+ if (seq2 == NULL)
+ return REG_ESPACE;
+ tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
+ if (tmp == NULL)
+ return REG_ESPACE;
+ seq2 = tre_ast_new_union(mem, tmp, seq2);
+ if (seq2 == NULL)
+ return REG_ESPACE;
+ }
+ }
+
+ pos_add = pos_add_save;
+ if (seq1 == NULL)
+ seq1 = seq2;
+ else if (seq2 != NULL)
+ seq1 = tre_ast_new_catenation(mem, seq1, seq2);
+ if (seq1 == NULL)
+ return REG_ESPACE;
+ node->obj = seq1->obj;
+ node->type = seq1->type;
+ }
+
+ iter_depth--;
+ pos_add_total += pos_add - pos_add_last;
+ if (iter_depth == 0)
+ pos_add = pos_add_total;
+
+ /* If approximate parameters are specified, surround the result
+ with two parameter setting nodes. The one on the left sets
+ the specified parameters, and the one on the right restores
+ the old parameters. */
+ if (iter->params)
+ {
+ tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
+ int *old_params;
+
+ tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
+ if (!tmp_l)
+ return REG_ESPACE;
+ ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
+ iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
+ tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
+ if (!tmp_r)
+ return REG_ESPACE;
+ old_params = tre_mem_alloc(mem, sizeof(*old_params)
+ * TRE_PARAM_LAST);
+ if (!old_params)
+ return REG_ESPACE;
+ for (i = 0; i < TRE_PARAM_LAST; i++)
+ old_params[i] = params[i];
+ ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
+ old_params[TRE_PARAM_DEPTH] = params_depth;
+ /* XXX - this is the only place where ast_new_node is
+ needed -- should be moved inside AST module. */
+ node_copy = tre_ast_new_node(mem, ITERATION,
+ sizeof(tre_iteration_t));
+ if (!node_copy)
+ return REG_ESPACE;
+ node_copy->obj = node->obj;
+ tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
+ if (!tmp_node)
+ return REG_ESPACE;
+ tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
+ if (!tmp_node)
+ return REG_ESPACE;
+ /* Replace the contents of `node' with `tmp_node'. */
+ memcpy(node, tmp_node, sizeof(*node));
+ node->obj = tmp_node->obj;
+ node->type = tmp_node->type;
+ params_depth++;
+ if (params_depth > *max_depth)
+ *max_depth = params_depth;
+ }
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ *position += pos_add_total;
+
+ /* `max_pos' should never be larger than `*position' if the above
+ code works, but just an extra safeguard let's make sure
+ `*position' is set large enough so enough memory will be
+ allocated for the transition table. */
+ if (max_pos > *position)
+ *position = max_pos;
+
+#ifdef TRE_DEBUG
+ DPRINT(("Expanded AST:\n"));
+ tre_ast_print(ast);
+ DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
+#endif
+
+ return status;
+}
+
+static tre_pos_and_tags_t *
+tre_set_empty(tre_mem_t mem)
+{
+ tre_pos_and_tags_t *new_set;
+
+ new_set = tre_mem_calloc(mem, sizeof(*new_set));
+ if (new_set == NULL)
+ return NULL;
+
+ new_set[0].position = -1;
+ new_set[0].code_min = -1;
+ new_set[0].code_max = -1;
+
+ return new_set;
+}
+
+static tre_pos_and_tags_t *
+tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
+ tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
+{
+ tre_pos_and_tags_t *new_set;
+
+ new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
+ if (new_set == NULL)
+ return NULL;
+
+ new_set[0].position = position;
+ new_set[0].code_min = code_min;
+ new_set[0].code_max = code_max;
+ new_set[0].class = class;
+ new_set[0].neg_classes = neg_classes;
+ new_set[0].backref = backref;
+ new_set[1].position = -1;
+ new_set[1].code_min = -1;
+ new_set[1].code_max = -1;
+
+ return new_set;
+}
+
+static tre_pos_and_tags_t *
+tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
+ int *tags, int assertions, int *params)
+{
+ int s1, s2, i, j;
+ tre_pos_and_tags_t *new_set;
+ int *new_tags;
+ int num_tags;
+
+ for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
+ for (s1 = 0; set1[s1].position >= 0; s1++);
+ for (s2 = 0; set2[s2].position >= 0; s2++);
+ new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
+ if (!new_set )
+ return NULL;
+
+ for (s1 = 0; set1[s1].position >= 0; s1++)
+ {
+ new_set[s1].position = set1[s1].position;
+ new_set[s1].code_min = set1[s1].code_min;
+ new_set[s1].code_max = set1[s1].code_max;
+ new_set[s1].assertions = set1[s1].assertions | assertions;
+ new_set[s1].class = set1[s1].class;
+ new_set[s1].neg_classes = set1[s1].neg_classes;
+ new_set[s1].backref = set1[s1].backref;
+ if (set1[s1].tags == NULL && tags == NULL)
+ new_set[s1].tags = NULL;
+ else
+ {
+ for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
+ new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
+ * (i + num_tags + 1)));
+ if (new_tags == NULL)
+ return NULL;
+ for (j = 0; j < i; j++)
+ new_tags[j] = set1[s1].tags[j];
+ for (i = 0; i < num_tags; i++)
+ new_tags[j + i] = tags[i];
+ new_tags[j + i] = -1;
+ new_set[s1].tags = new_tags;
+ }
+ if (set1[s1].params)
+ new_set[s1].params = set1[s1].params;
+ if (params)
+ {
+ if (!new_set[s1].params)
+ new_set[s1].params = params;
+ else
+ {
+ new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
+ TRE_PARAM_LAST);
+ if (!new_set[s1].params)
+ return NULL;
+ for (i = 0; i < TRE_PARAM_LAST; i++)
+ if (params[i] != TRE_PARAM_UNSET)
+ new_set[s1].params[i] = params[i];
+ }
+ }
+ }
+
+ for (s2 = 0; set2[s2].position >= 0; s2++)
+ {
+ new_set[s1 + s2].position = set2[s2].position;
+ new_set[s1 + s2].code_min = set2[s2].code_min;
+ new_set[s1 + s2].code_max = set2[s2].code_max;
+ /* XXX - why not | assertions here as well? */
+ new_set[s1 + s2].assertions = set2[s2].assertions;
+ new_set[s1 + s2].class = set2[s2].class;
+ new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
+ new_set[s1 + s2].backref = set2[s2].backref;
+ if (set2[s2].tags == NULL)
+ new_set[s1 + s2].tags = NULL;
+ else
+ {
+ for (i = 0; set2[s2].tags[i] >= 0; i++);
+ new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
+ if (new_tags == NULL)
+ return NULL;
+ for (j = 0; j < i; j++)
+ new_tags[j] = set2[s2].tags[j];
+ new_tags[j] = -1;
+ new_set[s1 + s2].tags = new_tags;
+ }
+ if (set2[s2].params)
+ new_set[s1 + s2].params = set2[s2].params;
+ if (params)
+ {
+ if (!new_set[s1 + s2].params)
+ new_set[s1 + s2].params = params;
+ else
+ {
+ new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
+ TRE_PARAM_LAST);
+ if (!new_set[s1 + s2].params)
+ return NULL;
+ for (i = 0; i < TRE_PARAM_LAST; i++)
+ if (params[i] != TRE_PARAM_UNSET)
+ new_set[s1 + s2].params[i] = params[i];
+ }
+ }
+ }
+ new_set[s1 + s2].position = -1;
+ return new_set;
+}
+
+/* Finds the empty path through `node' which is the one that should be
+ taken according to POSIX.2 rules, and adds the tags on that path to
+ `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
+ set to the number of tags seen on the path. */
+static reg_errcode_t
+tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
+ int *assertions, int *params, int *num_tags_seen,
+ int *params_seen)
+{
+ tre_literal_t *lit;
+ tre_union_t *uni;
+ tre_catenation_t *cat;
+ tre_iteration_t *iter;
+ int i;
+ int bottom = tre_stack_num_objects(stack);
+ reg_errcode_t status = REG_OK;
+ if (num_tags_seen)
+ *num_tags_seen = 0;
+ if (params_seen)
+ *params_seen = 0;
+
+ status = tre_stack_push_voidptr(stack, node);
+
+ /* Walk through the tree recursively. */
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
+ {
+ node = tre_stack_pop_voidptr(stack);
+
+ switch (node->type)
+ {
+ case LITERAL:
+ lit = (tre_literal_t *)node->obj;
+ switch (lit->code_min)
+ {
+ case TAG:
+ if (lit->code_max >= 0)
+ {
+ if (tags != NULL)
+ {
+ /* Add the tag to `tags'. */
+ for (i = 0; tags[i] >= 0; i++)
+ if (tags[i] == lit->code_max)
+ break;
+ if (tags[i] < 0)
+ {
+ tags[i] = lit->code_max;
+ tags[i + 1] = -1;
+ }
+ }
+ if (num_tags_seen)
+ (*num_tags_seen)++;
+ }
+ break;
+ case ASSERTION:
+ assert(lit->code_max >= 1
+ || lit->code_max <= ASSERT_LAST);
+ if (assertions != NULL)
+ *assertions |= lit->code_max;
+ break;
+ case PARAMETER:
+ if (params != NULL)
+ for (i = 0; i < TRE_PARAM_LAST; i++)
+ params[i] = lit->u.params[i];
+ if (params_seen != NULL)
+ *params_seen = 1;
+ break;
+ case EMPTY:
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+
+ case UNION:
+ /* Subexpressions starting earlier take priority over ones
+ starting later, so we prefer the left subexpression over the
+ right subexpression. */
+ uni = (tre_union_t *)node->obj;
+ if (uni->left->nullable)
+ STACK_PUSHX(stack, voidptr, uni->left)
+ else if (uni->right->nullable)
+ STACK_PUSHX(stack, voidptr, uni->right)
+ else
+ assert(0);
+ break;
+
+ case CATENATION:
+ /* The path must go through both children. */
+ cat = (tre_catenation_t *)node->obj;
+ assert(cat->left->nullable);
+ assert(cat->right->nullable);
+ STACK_PUSHX(stack, voidptr, cat->left);
+ STACK_PUSHX(stack, voidptr, cat->right);
+ break;
+
+ case ITERATION:
+ /* A match with an empty string is preferred over no match at
+ all, so we go through the argument if possible. */
+ iter = (tre_iteration_t *)node->obj;
+ if (iter->arg->nullable)
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ return status;
+}
+
+
+typedef enum {
+ NFL_RECURSE,
+ NFL_POST_UNION,
+ NFL_POST_CATENATION,
+ NFL_POST_ITERATION
+} tre_nfl_stack_symbol_t;
+
+
+/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
+ the nodes of the AST `tree'. */
+static reg_errcode_t
+tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
+{
+ int bottom = tre_stack_num_objects(stack);
+
+ STACK_PUSHR(stack, voidptr, tree);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+
+ while (tre_stack_num_objects(stack) > bottom)
+ {
+ tre_nfl_stack_symbol_t symbol;
+ tre_ast_node_t *node;
+
+ symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
+ node = tre_stack_pop_voidptr(stack);
+ switch (symbol)
+ {
+ case NFL_RECURSE:
+ switch (node->type)
+ {
+ case LITERAL:
+ {
+ tre_literal_t *lit = (tre_literal_t *)node->obj;
+ if (IS_BACKREF(lit))
+ {
+ /* Back references: nullable = false, firstpos = {i},
+ lastpos = {i}. */
+ node->nullable = 0;
+ node->firstpos = tre_set_one(mem, lit->position, 0,
+ TRE_CHAR_MAX, 0, NULL, -1);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_one(mem, lit->position, 0,
+ TRE_CHAR_MAX, 0, NULL,
+ (int)lit->code_max);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ }
+ else if (lit->code_min < 0)
+ {
+ /* Tags, empty strings, params, and zero width assertions:
+ nullable = true, firstpos = {}, and lastpos = {}. */
+ node->nullable = 1;
+ node->firstpos = tre_set_empty(mem);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_empty(mem);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ }
+ else
+ {
+ /* Literal at position i: nullable = false, firstpos = {i},
+ lastpos = {i}. */
+ node->nullable = 0;
+ node->firstpos =
+ tre_set_one(mem, lit->position, (int)lit->code_min,
+ (int)lit->code_max, 0, NULL, -1);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_one(mem, lit->position,
+ (int)lit->code_min,
+ (int)lit->code_max,
+ lit->u.class, lit->neg_classes,
+ -1);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ }
+ break;
+ }
+
+ case UNION:
+ /* Compute the attributes for the two subtrees, and after that
+ for this node. */
+ STACK_PUSHR(stack, voidptr, node);
+ STACK_PUSHR(stack, int, NFL_POST_UNION);
+ STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ break;
+
+ case CATENATION:
+ /* Compute the attributes for the two subtrees, and after that
+ for this node. */
+ STACK_PUSHR(stack, voidptr, node);
+ STACK_PUSHR(stack, int, NFL_POST_CATENATION);
+ STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ break;
+
+ case ITERATION:
+ /* Compute the attributes for the subtree, and after that for
+ this node. */
+ STACK_PUSHR(stack, voidptr, node);
+ STACK_PUSHR(stack, int, NFL_POST_ITERATION);
+ STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ break;
+ }
+ break; /* end case: NFL_RECURSE */
+
+ case NFL_POST_UNION:
+ {
+ tre_union_t *uni = (tre_union_t *)node->obj;
+ node->nullable = uni->left->nullable || uni->right->nullable;
+ node->firstpos = tre_set_union(mem, uni->left->firstpos,
+ uni->right->firstpos, NULL, 0, NULL);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_union(mem, uni->left->lastpos,
+ uni->right->lastpos, NULL, 0, NULL);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ break;
+ }
+
+ case NFL_POST_ITERATION:
+ {
+ tre_iteration_t *iter = (tre_iteration_t *)node->obj;
+
+ if (iter->min == 0 || iter->arg->nullable)
+ node->nullable = 1;
+ else
+ node->nullable = 0;
+ node->firstpos = iter->arg->firstpos;
+ node->lastpos = iter->arg->lastpos;
+ break;
+ }
+
+ case NFL_POST_CATENATION:
+ {
+ int num_tags, *tags, assertions, params_seen;
+ int *params;
+ reg_errcode_t status;
+ tre_catenation_t *cat = node->obj;
+ node->nullable = cat->left->nullable && cat->right->nullable;
+
+ /* Compute firstpos. */
+ if (cat->left->nullable)
+ {
+ /* The left side matches the empty string. Make a first pass
+ with tre_match_empty() to get the number of tags and
+ parameters. */
+ status = tre_match_empty(stack, cat->left,
+ NULL, NULL, NULL, &num_tags,
+ &params_seen);
+ if (status != REG_OK)
+ return status;
+ /* Allocate arrays for the tags and parameters. */
+ tags = xmalloc(sizeof(*tags) * (num_tags + 1));
+ if (!tags)
+ return REG_ESPACE;
+ tags[0] = -1;
+ assertions = 0;
+ params = NULL;
+ if (params_seen)
+ {
+ params = tre_mem_alloc(mem, sizeof(*params)
+ * TRE_PARAM_LAST);
+ if (!params)
+ {
+ xfree(tags);
+ return REG_ESPACE;
+ }
+ }
+ /* Second pass with tre_mach_empty() to get the list of
+ tags and parameters. */
+ status = tre_match_empty(stack, cat->left, tags,
+ &assertions, params, NULL, NULL);
+ if (status != REG_OK)
+ {
+ xfree(tags);
+ return status;
+ }
+ node->firstpos =
+ tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
+ tags, assertions, params);
+ xfree(tags);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ }
+ else
+ {
+ node->firstpos = cat->left->firstpos;
+ }
+
+ /* Compute lastpos. */
+ if (cat->right->nullable)
+ {
+ /* The right side matches the empty string. Make a first pass
+ with tre_match_empty() to get the number of tags and
+ parameters. */
+ status = tre_match_empty(stack, cat->right,
+ NULL, NULL, NULL, &num_tags,
+ &params_seen);
+ if (status != REG_OK)
+ return status;
+ /* Allocate arrays for the tags and parameters. */
+ tags = xmalloc(sizeof(int) * (num_tags + 1));
+ if (!tags)
+ return REG_ESPACE;
+ tags[0] = -1;
+ assertions = 0;
+ params = NULL;
+ if (params_seen)
+ {
+ params = tre_mem_alloc(mem, sizeof(*params)
+ * TRE_PARAM_LAST);
+ if (!params)
+ {
+ xfree(tags);
+ return REG_ESPACE;
+ }
+ }
+ /* Second pass with tre_mach_empty() to get the list of
+ tags and parameters. */
+ status = tre_match_empty(stack, cat->right, tags,
+ &assertions, params, NULL, NULL);
+ if (status != REG_OK)
+ {
+ xfree(tags);
+ return status;
+ }
+ node->lastpos =
+ tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
+ tags, assertions, params);
+ xfree(tags);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ }
+ else
+ {
+ node->lastpos = cat->right->lastpos;
+ }
+ break;
+ }
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ return REG_OK;
+}
+
+
+/* Adds a transition from each position in `p1' to each position in `p2'. */
+static reg_errcode_t
+tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
+ tre_tnfa_transition_t *transitions,
+ int *counts, int *offs)
+{
+ tre_pos_and_tags_t *orig_p2 = p2;
+ tre_tnfa_transition_t *trans;
+ int i, j, k, l, dup, prev_p2_pos;
+
+ if (transitions != NULL)
+ while (p1->position >= 0)
+ {
+ p2 = orig_p2;
+ prev_p2_pos = -1;
+ while (p2->position >= 0)
+ {
+ /* Optimization: if this position was already handled, skip it. */
+ if (p2->position == prev_p2_pos)
+ {
+ p2++;
+ continue;
+ }
+ prev_p2_pos = p2->position;
+ /* Set `trans' to point to the next unused transition from
+ position `p1->position'. */
+ trans = transitions + offs[p1->position];
+ while (trans->state != NULL)
+ {
+#if 0
+ /* If we find a previous transition from `p1->position' to
+ `p2->position', it is overwritten. This can happen only
+ if there are nested loops in the regexp, like in "((a)*)*".
+ In POSIX.2 repetition using the outer loop is always
+ preferred over using the inner loop. Therefore the
+ transition for the inner loop is useless and can be thrown
+ away. */
+ /* XXX - The same position is used for all nodes in a bracket
+ expression, so this optimization cannot be used (it will
+ break bracket expressions) unless I figure out a way to
+ detect it here. */
+ if (trans->state_id == p2->position)
+ {
+ DPRINT(("*"));
+ break;
+ }
+#endif
+ trans++;
+ }
+
+ if (trans->state == NULL)
+ (trans + 1)->state = NULL;
+ /* Use the character ranges, assertions, etc. from `p1' for
+ the transition from `p1' to `p2'. */
+ trans->code_min = p1->code_min;
+ trans->code_max = p1->code_max;
+ trans->state = transitions + offs[p2->position];
+ trans->state_id = p2->position;
+ trans->assertions = p1->assertions | p2->assertions
+ | (p1->class ? ASSERT_CHAR_CLASS : 0)
+ | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
+ if (p1->backref >= 0)
+ {
+ assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
+ assert(p2->backref < 0);
+ trans->u.backref = p1->backref;
+ trans->assertions |= ASSERT_BACKREF;
+ }
+ else
+ trans->u.class = p1->class;
+ if (p1->neg_classes != NULL)
+ {
+ for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
+ trans->neg_classes =
+ xmalloc(sizeof(*trans->neg_classes) * (i + 1));
+ if (trans->neg_classes == NULL)
+ return REG_ESPACE;
+ for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
+ trans->neg_classes[i] = p1->neg_classes[i];
+ trans->neg_classes[i] = (tre_ctype_t)0;
+ }
+ else
+ trans->neg_classes = NULL;
+
+ /* Find out how many tags this transition has. */
+ i = 0;
+ if (p1->tags != NULL)
+ while(p1->tags[i] >= 0)
+ i++;
+ j = 0;
+ if (p2->tags != NULL)
+ while(p2->tags[j] >= 0)
+ j++;
+
+ /* If we are overwriting a transition, free the old tag array. */
+ if (trans->tags != NULL)
+ xfree(trans->tags);
+ trans->tags = NULL;
+
+ /* If there were any tags, allocate an array and fill it. */
+ if (i + j > 0)
+ {
+ trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
+ if (!trans->tags)
+ return REG_ESPACE;
+ i = 0;
+ if (p1->tags != NULL)
+ while(p1->tags[i] >= 0)
+ {
+ trans->tags[i] = p1->tags[i];
+ i++;
+ }
+ l = i;
+ j = 0;
+ if (p2->tags != NULL)
+ while (p2->tags[j] >= 0)
+ {
+ /* Don't add duplicates. */
+ dup = 0;
+ for (k = 0; k < i; k++)
+ if (trans->tags[k] == p2->tags[j])
+ {
+ dup = 1;
+ break;
+ }
+ if (!dup)
+ trans->tags[l++] = p2->tags[j];
+ j++;
+ }
+ trans->tags[l] = -1;
+ }
+
+ /* Set the parameter array. If both `p2' and `p1' have same
+ parameters, the values in `p2' override those in `p1'. */
+ if (p1->params || p2->params)
+ {
+ if (!trans->params)
+ trans->params = xmalloc(sizeof(*trans->params)
+ * TRE_PARAM_LAST);
+ if (!trans->params)
+ return REG_ESPACE;
+ for (i = 0; i < TRE_PARAM_LAST; i++)
+ {
+ trans->params[i] = TRE_PARAM_UNSET;
+ if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
+ trans->params[i] = p1->params[i];
+ if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
+ trans->params[i] = p2->params[i];
+ }
+ }
+ else
+ {
+ if (trans->params)
+ xfree(trans->params);
+ trans->params = NULL;
+ }
+
+
+#ifdef TRE_DEBUG
+ {
+ int *tags;
+
+ DPRINT((" %2d -> %2d on %3d", p1->position, p2->position,
+ p1->code_min));
+ if (p1->code_max != p1->code_min)
+ DPRINT(("-%3d", p1->code_max));
+ tags = trans->tags;
+ if (tags)
+ {
+ DPRINT((", tags ["));
+ while (*tags >= 0)
+ {
+ DPRINT(("%d", *tags));
+ tags++;
+ if (*tags >= 0)
+ DPRINT((","));
+ }
+ DPRINT(("]"));
+ }
+ if (trans->assertions)
+ DPRINT((", assert %d", trans->assertions));
+ if (trans->assertions & ASSERT_BACKREF)
+ DPRINT((", backref %d", trans->u.backref));
+ else if (trans->u.class)
+ DPRINT((", class %ld", (long)trans->u.class));
+ if (trans->neg_classes)
+ DPRINT((", neg_classes %p", trans->neg_classes));
+ if (trans->params)
+ {
+ DPRINT((", "));
+ tre_print_params(trans->params);
+ }
+ DPRINT(("\n"));
+ }
+#endif /* TRE_DEBUG */
+ p2++;
+ }
+ p1++;
+ }
+ else
+ /* Compute a maximum limit for the number of transitions leaving
+ from each state. */
+ while (p1->position >= 0)
+ {
+ p2 = orig_p2;
+ while (p2->position >= 0)
+ {
+ counts[p1->position]++;
+ p2++;
+ }
+ p1++;
+ }
+ return REG_OK;
+}
+
+/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
+ labelled with one character range (there are no transitions on empty
+ strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
+ the regexp. */
+static reg_errcode_t
+tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
+ int *counts, int *offs)
+{
+ tre_union_t *uni;
+ tre_catenation_t *cat;
+ tre_iteration_t *iter;
+ reg_errcode_t errcode = REG_OK;
+
+ /* XXX - recurse using a stack!. */
+ switch (node->type)
+ {
+ case LITERAL:
+ break;
+ case UNION:
+ uni = (tre_union_t *)node->obj;
+ errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
+ if (errcode != REG_OK)
+ return errcode;
+ errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
+ break;
+
+ case CATENATION:
+ cat = (tre_catenation_t *)node->obj;
+ /* Add a transition from each position in cat->left->lastpos
+ to each position in cat->right->firstpos. */
+ errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
+ transitions, counts, offs);
+ if (errcode != REG_OK)
+ return errcode;
+ errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
+ if (errcode != REG_OK)
+ return errcode;
+ errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
+ break;
+
+ case ITERATION:
+ iter = (tre_iteration_t *)node->obj;
+ assert(iter->max == -1 || iter->max == 1);
+
+ if (iter->max == -1)
+ {
+ assert(iter->min == 0 || iter->min == 1);
+ /* Add a transition from each last position in the iterated
+ expression to each first position. */
+ errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
+ transitions, counts, offs);
+ if (errcode != REG_OK)
+ return errcode;
+ }
+ errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
+ break;
+ }
+ return errcode;
+}
+
+
+#define ERROR_EXIT(err) \
+ do \
+ { \
+ errcode = err; \
+ if (/*CONSTCOND*/1) \
+ goto error_exit; \
+ } \
+ while (/*CONSTCOND*/0)
+
+
+int
+tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+{
+ tre_stack_t *stack;
+ tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
+ tre_pos_and_tags_t *p;
+ int *counts = NULL, *offs = NULL;
+ int i, add = 0;
+ tre_tnfa_transition_t *transitions, *initial;
+ tre_tnfa_t *tnfa = NULL;
+ tre_submatch_data_t *submatch_data;
+ tre_tag_direction_t *tag_directions = NULL;
+ reg_errcode_t errcode;
+ tre_mem_t mem;
+
+ /* Parse context. */
+ tre_parse_ctx_t parse_ctx;
+
+ /* Allocate a stack used throughout the compilation process for various
+ purposes. */
+ stack = tre_stack_new(512, 10240, 128);
+ if (!stack)
+ return REG_ESPACE;
+ /* Allocate a fast memory allocator. */
+ mem = tre_mem_new();
+ if (!mem)
+ {
+ tre_stack_destroy(stack);
+ return REG_ESPACE;
+ }
+
+ /* Parse the regexp. */
+ memset(&parse_ctx, 0, sizeof(parse_ctx));
+ parse_ctx.mem = mem;
+ parse_ctx.stack = stack;
+ parse_ctx.re = regex;
+ parse_ctx.len = n;
+ parse_ctx.cflags = cflags;
+ parse_ctx.max_backref = -1;
+ DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
+ errcode = tre_parse(&parse_ctx);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+ preg->re_nsub = parse_ctx.submatch_id - 1;
+ tree = parse_ctx.result;
+
+ /* Back references and approximate matching cannot currently be used
+ in the same regexp. */
+ if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
+ ERROR_EXIT(REG_BADPAT);
+
+#ifdef TRE_DEBUG
+ tre_ast_print(tree);
+#endif /* TRE_DEBUG */
+
+ /* Referring to nonexistent subexpressions is illegal. */
+ if (parse_ctx.max_backref > (int)preg->re_nsub)
+ ERROR_EXIT(REG_ESUBREG);
+
+ /* Allocate the TNFA struct. */
+ tnfa = xcalloc(1, sizeof(tre_tnfa_t));
+ if (tnfa == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->have_backrefs = parse_ctx.max_backref >= 0;
+ tnfa->have_approx = parse_ctx.have_approx;
+ tnfa->num_submatches = parse_ctx.submatch_id;
+
+ /* Set up tags for submatch addressing. If REG_NOSUB is set and the
+ regexp does not have back references, this can be skipped. */
+ if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
+ {
+ DPRINT(("tre_compile: setting up tags\n"));
+
+ /* Figure out how many tags we will need. */
+ errcode = tre_add_tags(NULL, stack, tree, tnfa);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+#ifdef TRE_DEBUG
+ tre_ast_print(tree);
+#endif /* TRE_DEBUG */
+
+ if (tnfa->num_tags > 0)
+ {
+ tag_directions = xmalloc(sizeof(*tag_directions)
+ * (tnfa->num_tags + 1));
+ if (tag_directions == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->tag_directions = tag_directions;
+ memset(tag_directions, -1,
+ sizeof(*tag_directions) * (tnfa->num_tags + 1));
+ }
+ tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
+ sizeof(tnfa->minimal_tags));
+ if (tnfa->minimal_tags == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
+ sizeof(*submatch_data));
+ if (submatch_data == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->submatch_data = submatch_data;
+
+ errcode = tre_add_tags(mem, stack, tree, tnfa);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+#ifdef TRE_DEBUG
+ for (i = 0; i < parse_ctx.submatch_id; i++)
+ DPRINT(("pmatch[%d] = {t%d, t%d}\n",
+ i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
+ for (i = 0; i < tnfa->num_tags; i++)
+ DPRINT(("t%d is %s\n", i,
+ tag_directions[i] == TRE_TAG_MINIMIZE ?
+ "minimized" : "maximized"));
+#endif /* TRE_DEBUG */
+ }
+
+ /* Expand iteration nodes. */
+ errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
+ tag_directions, &tnfa->params_depth);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+ /* Add a dummy node for the final state.
+ XXX - For certain patterns this dummy node can be optimized away,
+ for example "a*" or "ab*". Figure out a simple way to detect
+ this possibility. */
+ tmp_ast_l = tree;
+ tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
+ if (tmp_ast_r == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
+ if (tree == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+#ifdef TRE_DEBUG
+ tre_ast_print(tree);
+ DPRINT(("Number of states: %d\n", parse_ctx.position));
+#endif /* TRE_DEBUG */
+
+ errcode = tre_compute_nfl(mem, stack, tree);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+ counts = xmalloc(sizeof(int) * parse_ctx.position);
+ if (counts == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ offs = xmalloc(sizeof(int) * parse_ctx.position);
+ if (offs == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ for (i = 0; i < parse_ctx.position; i++)
+ counts[i] = 0;
+ tre_ast_to_tnfa(tree, NULL, counts, NULL);
+
+ add = 0;
+ for (i = 0; i < parse_ctx.position; i++)
+ {
+ offs[i] = add;
+ add += counts[i] + 1;
+ counts[i] = 0;
+ }
+ transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
+ if (transitions == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->transitions = transitions;
+ tnfa->num_transitions = add;
+
+ DPRINT(("Converting to TNFA:\n"));
+ errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+ /* If in eight bit mode, compute a table of characters that can be the
+ first character of a match. */
+ tnfa->first_char = -1;
+ if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
+ {
+ int count = 0;
+ tre_cint_t k;
+ DPRINT(("Characters that can start a match:"));
+ tnfa->firstpos_chars = xcalloc(256, sizeof(char));
+ if (tnfa->firstpos_chars == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ for (p = tree->firstpos; p->position >= 0; p++)
+ {
+ tre_tnfa_transition_t *j = transitions + offs[p->position];
+ while (j->state != NULL)
+ {
+ for (k = j->code_min; k <= j->code_max && k < 256; k++)
+ {
+ DPRINT((" %d", k));
+ tnfa->firstpos_chars[k] = 1;
+ count++;
+ }
+ j++;
+ }
+ }
+ DPRINT(("\n"));
+#define TRE_OPTIMIZE_FIRST_CHAR 1
+#if TRE_OPTIMIZE_FIRST_CHAR
+ if (count == 1)
+ {
+ for (k = 0; k < 256; k++)
+ if (tnfa->firstpos_chars[k])
+ {
+ DPRINT(("first char must be %d\n", k));
+ tnfa->first_char = k;
+ xfree(tnfa->firstpos_chars);
+ tnfa->firstpos_chars = NULL;
+ break;
+ }
+ }
+#endif
+
+ }
+ else
+ tnfa->firstpos_chars = NULL;
+
+
+ p = tree->firstpos;
+ i = 0;
+ while (p->position >= 0)
+ {
+ i++;
+
+#ifdef TRE_DEBUG
+ {
+ int *tags;
+ DPRINT(("initial: %d", p->position));
+ tags = p->tags;
+ if (tags != NULL)
+ {
+ if (*tags >= 0)
+ DPRINT(("/"));
+ while (*tags >= 0)
+ {
+ DPRINT(("%d", *tags));
+ tags++;
+ if (*tags >= 0)
+ DPRINT((","));
+ }
+ }
+ DPRINT((", assert %d", p->assertions));
+ if (p->params)
+ {
+ DPRINT((", "));
+ tre_print_params(p->params);
+ }
+ DPRINT(("\n"));
+ }
+#endif /* TRE_DEBUG */
+
+ p++;
+ }
+
+ initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
+ if (initial == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->initial = initial;
+
+ i = 0;
+ for (p = tree->firstpos; p->position >= 0; p++)
+ {
+ initial[i].state = transitions + offs[p->position];
+ initial[i].state_id = p->position;
+ initial[i].tags = NULL;
+ /* Copy the arrays p->tags, and p->params, they are allocated
+ from a tre_mem object. */
+ if (p->tags)
+ {
+ int j;
+ for (j = 0; p->tags[j] >= 0; j++);
+ initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
+ if (!initial[i].tags)
+ ERROR_EXIT(REG_ESPACE);
+ memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
+ }
+ initial[i].params = NULL;
+ if (p->params)
+ {
+ initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
+ if (!initial[i].params)
+ ERROR_EXIT(REG_ESPACE);
+ memcpy(initial[i].params, p->params,
+ sizeof(*p->params) * TRE_PARAM_LAST);
+ }
+ initial[i].assertions = p->assertions;
+ i++;
+ }
+ initial[i].state = NULL;
+
+ tnfa->num_transitions = add;
+ tnfa->final = transitions + offs[tree->lastpos[0].position];
+ tnfa->num_states = parse_ctx.position;
+ tnfa->cflags = cflags;
+
+ DPRINT(("final state %p\n", (void *)tnfa->final));
+
+ tre_mem_destroy(mem);
+ tre_stack_destroy(stack);
+ xfree(counts);
+ xfree(offs);
+
+ preg->TRE_REGEX_T_FIELD = (void *)tnfa;
+ return REG_OK;
+
+ error_exit:
+ /* Free everything that was allocated and return the error code. */
+ tre_mem_destroy(mem);
+ if (stack != NULL)
+ tre_stack_destroy(stack);
+ if (counts != NULL)
+ xfree(counts);
+ if (offs != NULL)
+ xfree(offs);
+ preg->TRE_REGEX_T_FIELD = (void *)tnfa;
+ tre_free(preg);
+ return errcode;
+}
+
+
+
+
+void
+tre_free(regex_t *preg)
+{
+ tre_tnfa_t *tnfa;
+ unsigned int i;
+ tre_tnfa_transition_t *trans;
+
+ tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ if (!tnfa)
+ return;
+
+ for (i = 0; i < tnfa->num_transitions; i++)
+ if (tnfa->transitions[i].state)
+ {
+ if (tnfa->transitions[i].tags)
+ xfree(tnfa->transitions[i].tags);
+ if (tnfa->transitions[i].neg_classes)
+ xfree(tnfa->transitions[i].neg_classes);
+ if (tnfa->transitions[i].params)
+ xfree(tnfa->transitions[i].params);
+ }
+ if (tnfa->transitions)
+ xfree(tnfa->transitions);
+
+ if (tnfa->initial)
+ {
+ for (trans = tnfa->initial; trans->state; trans++)
+ {
+ if (trans->tags)
+ xfree(trans->tags);
+ if (trans->params)
+ xfree(trans->params);
+ }
+ xfree(tnfa->initial);
+ }
+
+ if (tnfa->submatch_data)
+ {
+ for (i = 0; i < tnfa->num_submatches; i++)
+ if (tnfa->submatch_data[i].parents)
+ xfree(tnfa->submatch_data[i].parents);
+ xfree(tnfa->submatch_data);
+ }
+
+ if (tnfa->tag_directions)
+ xfree(tnfa->tag_directions);
+ if (tnfa->firstpos_chars)
+ xfree(tnfa->firstpos_chars);
+ if (tnfa->minimal_tags)
+ xfree(tnfa->minimal_tags);
+ xfree(tnfa);
+}
+
+char *
+tre_version(void)
+{
+ static char str[256];
+ char *version;
+
+ if (str[0] == 0)
+ {
+ (void) tre_config(TRE_CONFIG_VERSION, &version);
+ (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
+ }
+ return str;
+}
+
+int
+tre_config(int query, void *result)
+{
+ int *int_result = result;
+ const char **string_result = result;
+
+ switch (query)
+ {
+ case TRE_CONFIG_APPROX:
+#ifdef TRE_APPROX
+ *int_result = 1;
+#else /* !TRE_APPROX */
+ *int_result = 0;
+#endif /* !TRE_APPROX */
+ return REG_OK;
+
+ case TRE_CONFIG_WCHAR:
+#ifdef TRE_WCHAR
+ *int_result = 1;
+#else /* !TRE_WCHAR */
+ *int_result = 0;
+#endif /* !TRE_WCHAR */
+ return REG_OK;
+
+ case TRE_CONFIG_MULTIBYTE:
+#ifdef TRE_MULTIBYTE
+ *int_result = 1;
+#else /* !TRE_MULTIBYTE */
+ *int_result = 0;
+#endif /* !TRE_MULTIBYTE */
+ return REG_OK;
+
+ case TRE_CONFIG_SYSTEM_ABI:
+#ifdef TRE_CONFIG_SYSTEM_ABI
+ *int_result = 1;
+#else /* !TRE_CONFIG_SYSTEM_ABI */
+ *int_result = 0;
+#endif /* !TRE_CONFIG_SYSTEM_ABI */
+ return REG_OK;
+
+ case TRE_CONFIG_VERSION:
+ *string_result = TRE_VERSION;
+ return REG_OK;
+ }
+
+ return REG_NOMATCH;
+}
+
+
+/* EOF */
diff --git a/lib/tre-compile.h b/lib/tre-compile.h
new file mode 100644
index 0000000000000..51d5ac94acc61
--- /dev/null
+++ b/lib/tre-compile.h
@@ -0,0 +1,27 @@
+/*
+ tre-compile.h: Regex compilation definitions
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+
+#ifndef TRE_COMPILE_H
+#define TRE_COMPILE_H 1
+
+typedef struct {
+ int position;
+ int code_min;
+ int code_max;
+ int *tags;
+ int assertions;
+ tre_ctype_t class;
+ tre_ctype_t *neg_classes;
+ int backref;
+ int *params;
+} tre_pos_and_tags_t;
+
+#endif /* TRE_COMPILE_H */
+
+/* EOF */
diff --git a/lib/tre-config.h b/lib/tre-config.h
new file mode 100644
index 0000000000000..bf2d596aeb4de
--- /dev/null
+++ b/lib/tre-config.h
@@ -0,0 +1,44 @@
+/* lib/tre-config.h. Generated from tre-config.h.in by configure. */
+/* tre-config.h.in. This file has all definitions that are needed in
+ `tre.h'. Note that this file must contain only the bare minimum
+ of definitions without the TRE_ prefix to avoid conflicts between
+ definitions here and definitions included from somewhere else. */
+
+/* Define to 1 if you have the <libutf8.h> header file. */
+/* #undef HAVE_LIBUTF8_H */
+
+/* Define to 1 if the system has the type `reg_errcode_t'. */
+/* #undef HAVE_REG_ERRCODE_T */
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <wchar.h> header file. */
+#define HAVE_WCHAR_H 1
+
+/* Define if you want to enable approximate matching functionality. */
+#define TRE_APPROX 1
+
+/* Define to enable multibyte character set support. */
+#define TRE_MULTIBYTE 1
+
+/* Define to the absolute path to the system tre.h */
+/* #undef TRE_SYSTEM_REGEX_H_PATH */
+
+/* Define to include the system regex.h from tre.h */
+/* #undef TRE_USE_SYSTEM_REGEX_H */
+
+/* Define to enable wide character (wchar_t) support. */
+#define TRE_WCHAR 1
+
+/* TRE version string. */
+#define TRE_VERSION "0.8.0"
+
+/* TRE version level 1. */
+#define TRE_VERSION_1 0
+
+/* TRE version level 2. */
+#define TRE_VERSION_2 8
+
+/* TRE version level 3. */
+#define TRE_VERSION_3 0
diff --git a/lib/tre-config.h.in b/lib/tre-config.h.in
new file mode 100644
index 0000000000000..f5b5af11a4c66
--- /dev/null
+++ b/lib/tre-config.h.in
@@ -0,0 +1,43 @@
+/* tre-config.h.in. This file has all definitions that are needed in
+ `tre.h'. Note that this file must contain only the bare minimum
+ of definitions without the TRE_ prefix to avoid conflicts between
+ definitions here and definitions included from somewhere else. */
+
+/* Define to 1 if you have the <libutf8.h> header file. */
+#undef HAVE_LIBUTF8_H
+
+/* Define to 1 if the system has the type `reg_errcode_t'. */
+#undef HAVE_REG_ERRCODE_T
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <wchar.h> header file. */
+#undef HAVE_WCHAR_H
+
+/* Define if you want to enable approximate matching functionality. */
+#undef TRE_APPROX
+
+/* Define to enable multibyte character set support. */
+#undef TRE_MULTIBYTE
+
+/* Define to the absolute path to the system tre.h */
+#undef TRE_SYSTEM_REGEX_H_PATH
+
+/* Define to include the system regex.h from tre.h */
+#undef TRE_USE_SYSTEM_REGEX_H
+
+/* Define to enable wide character (wchar_t) support. */
+#undef TRE_WCHAR
+
+/* TRE version string. */
+#undef TRE_VERSION
+
+/* TRE version level 1. */
+#undef TRE_VERSION_1
+
+/* TRE version level 2. */
+#undef TRE_VERSION_2
+
+/* TRE version level 3. */
+#undef TRE_VERSION_3
diff --git a/lib/tre-internal.h b/lib/tre-internal.h
new file mode 100644
index 0000000000000..dacde8c872243
--- /dev/null
+++ b/lib/tre-internal.h
@@ -0,0 +1,287 @@
+/*
+ tre-internal.h - TRE internal definitions
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifndef TRE_INTERNAL_H
+#define TRE_INTERNAL_H 1
+
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* !HAVE_WCTYPE_H */
+
+#include <ctype.h>
+#include "tre.h"
+
+#ifdef TRE_DEBUG
+#include <stdio.h>
+#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
+#else /* !TRE_DEBUG */
+#define DPRINT(msg) do { } while(/*CONSTCOND*/0)
+#endif /* !TRE_DEBUG */
+
+#define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
+
+#ifdef HAVE_MBRTOWC
+#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
+#else /* !HAVE_MBRTOWC */
+#ifdef HAVE_MBTOWC
+#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
+#endif /* HAVE_MBTOWC */
+#endif /* !HAVE_MBRTOWC */
+
+#ifdef TRE_MULTIBYTE
+#ifdef HAVE_MBSTATE_T
+#define TRE_MBSTATE
+#endif /* TRE_MULTIBYTE */
+#endif /* HAVE_MBSTATE_T */
+
+/* Define the character types and functions. */
+#ifdef TRE_WCHAR
+
+/* Wide characters. */
+typedef wint_t tre_cint_t;
+#define TRE_CHAR_MAX WCHAR_MAX
+
+#ifdef TRE_MULTIBYTE
+#define TRE_MB_CUR_MAX MB_CUR_MAX
+#else /* !TRE_MULTIBYTE */
+#define TRE_MB_CUR_MAX 1
+#endif /* !TRE_MULTIBYTE */
+
+#define tre_isalnum iswalnum
+#define tre_isalpha iswalpha
+#ifdef HAVE_ISWBLANK
+#define tre_isblank iswblank
+#endif /* HAVE_ISWBLANK */
+#define tre_iscntrl iswcntrl
+#define tre_isdigit iswdigit
+#define tre_isgraph iswgraph
+#define tre_islower iswlower
+#define tre_isprint iswprint
+#define tre_ispunct iswpunct
+#define tre_isspace iswspace
+#define tre_isupper iswupper
+#define tre_isxdigit iswxdigit
+
+#define tre_tolower towlower
+#define tre_toupper towupper
+#define tre_strlen wcslen
+
+#else /* !TRE_WCHAR */
+
+/* 8 bit characters. */
+typedef short tre_cint_t;
+#define TRE_CHAR_MAX 255
+#define TRE_MB_CUR_MAX 1
+
+#define tre_isalnum isalnum
+#define tre_isalpha isalpha
+#ifdef HAVE_ISASCII
+#define tre_isascii isascii
+#endif /* HAVE_ISASCII */
+#ifdef HAVE_ISBLANK
+#define tre_isblank isblank
+#endif /* HAVE_ISBLANK */
+#define tre_iscntrl iscntrl
+#define tre_isdigit isdigit
+#define tre_isgraph isgraph
+#define tre_islower islower
+#define tre_isprint isprint
+#define tre_ispunct ispunct
+#define tre_isspace isspace
+#define tre_isupper isupper
+#define tre_isxdigit isxdigit
+
+#define tre_tolower(c) (tre_cint_t)(tolower(c))
+#define tre_toupper(c) (tre_cint_t)(toupper(c))
+#define tre_strlen(s) (strlen((const char*)s))
+
+#endif /* !TRE_WCHAR */
+
+#if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
+#define TRE_USE_SYSTEM_WCTYPE 1
+#endif
+
+#ifdef TRE_USE_SYSTEM_WCTYPE
+/* Use system provided iswctype() and wctype(). */
+typedef wctype_t tre_ctype_t;
+#define tre_isctype iswctype
+#define tre_ctype wctype
+#else /* !TRE_USE_SYSTEM_WCTYPE */
+/* Define our own versions of iswctype() and wctype(). */
+typedef int (*tre_ctype_t)(tre_cint_t);
+#define tre_isctype(c, type) ( (type)(c) )
+tre_ctype_t tre_ctype(const char *name);
+#endif /* !TRE_USE_SYSTEM_WCTYPE */
+
+typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
+
+/* Returns number of bytes to add to (char *)ptr to make it
+ properly aligned for the type. */
+#define ALIGN(ptr, type) \
+ ((((long)ptr) % sizeof(type)) \
+ ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
+ : 0)
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
+#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
+
+/* Define STRF to the correct printf formatter for strings. */
+#ifdef TRE_WCHAR
+#define STRF "ls"
+#else /* !TRE_WCHAR */
+#define STRF "s"
+#endif /* !TRE_WCHAR */
+
+/* TNFA transition type. A TNFA state is an array of transitions,
+ the terminator is a transition with NULL `state'. */
+typedef struct tnfa_transition tre_tnfa_transition_t;
+
+struct tnfa_transition {
+ /* Range of accepted characters. */
+ tre_cint_t code_min;
+ tre_cint_t code_max;
+ /* Pointer to the destination state. */
+ tre_tnfa_transition_t *state;
+ /* ID number of the destination state. */
+ int state_id;
+ /* -1 terminated array of tags (or NULL). */
+ int *tags;
+ /* Matching parameters settings (or NULL). */
+ int *params;
+ /* Assertion bitmap. */
+ int assertions;
+ /* Assertion parameters. */
+ union {
+ /* Character class assertion. */
+ tre_ctype_t class;
+ /* Back reference assertion. */
+ int backref;
+ } u;
+ /* Negative character class assertions. */
+ tre_ctype_t *neg_classes;
+};
+
+
+/* Assertions. */
+#define ASSERT_AT_BOL 1 /* Beginning of line. */
+#define ASSERT_AT_EOL 2 /* End of line. */
+#define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */
+#define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */
+#define ASSERT_AT_BOW 16 /* Beginning of word. */
+#define ASSERT_AT_EOW 32 /* End of word. */
+#define ASSERT_AT_WB 64 /* Word boundary. */
+#define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */
+#define ASSERT_BACKREF 256 /* A back reference in `backref'. */
+#define ASSERT_LAST 256
+
+/* Tag directions. */
+typedef enum {
+ TRE_TAG_MINIMIZE = 0,
+ TRE_TAG_MAXIMIZE = 1
+} tre_tag_direction_t;
+
+/* Parameters that can be changed dynamically while matching. */
+typedef enum {
+ TRE_PARAM_COST_INS = 0,
+ TRE_PARAM_COST_DEL = 1,
+ TRE_PARAM_COST_SUBST = 2,
+ TRE_PARAM_COST_MAX = 3,
+ TRE_PARAM_MAX_INS = 4,
+ TRE_PARAM_MAX_DEL = 5,
+ TRE_PARAM_MAX_SUBST = 6,
+ TRE_PARAM_MAX_ERR = 7,
+ TRE_PARAM_DEPTH = 8,
+ TRE_PARAM_LAST = 9
+} tre_param_t;
+
+/* Unset matching parameter */
+#define TRE_PARAM_UNSET -1
+
+/* Signifies the default matching parameter value. */
+#define TRE_PARAM_DEFAULT -2
+
+/* Instructions to compute submatch register values from tag values
+ after a successful match. */
+struct tre_submatch_data {
+ /* Tag that gives the value for rm_so (submatch start offset). */
+ int so_tag;
+ /* Tag that gives the value for rm_eo (submatch end offset). */
+ int eo_tag;
+ /* List of submatches this submatch is contained in. */
+ int *parents;
+};
+
+typedef struct tre_submatch_data tre_submatch_data_t;
+
+
+/* TNFA definition. */
+typedef struct tnfa tre_tnfa_t;
+
+struct tnfa {
+ tre_tnfa_transition_t *transitions;
+ unsigned int num_transitions;
+ tre_tnfa_transition_t *initial;
+ tre_tnfa_transition_t *final;
+ tre_submatch_data_t *submatch_data;
+ char *firstpos_chars;
+ int first_char;
+ unsigned int num_submatches;
+ tre_tag_direction_t *tag_directions;
+ int *minimal_tags;
+ int num_tags;
+ int num_minimals;
+ int end_tag;
+ int num_states;
+ int cflags;
+ int have_backrefs;
+ int have_approx;
+ int params_depth;
+};
+
+int
+tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags);
+
+void
+tre_free(regex_t *preg);
+
+void
+tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
+ const tre_tnfa_t *tnfa, int *tags, int match_eo);
+
+reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
+ tre_str_type_t type, int *match_tags, int eflags,
+ int *match_end_ofs);
+
+reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
+ tre_str_type_t type, int *match_tags, int eflags,
+ int *match_end_ofs);
+
+reg_errcode_t
+tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
+ int len, tre_str_type_t type, int *match_tags,
+ int eflags, int *match_end_ofs);
+
+#ifdef TRE_APPROX
+reg_errcode_t
+tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
+ tre_str_type_t type, int *match_tags,
+ regamatch_t *match, regaparams_t params,
+ int eflags, int *match_end_ofs);
+#endif /* TRE_APPROX */
+
+#endif /* TRE_INTERNAL_H */
+
+/* EOF */
diff --git a/lib/tre-match-approx.c b/lib/tre-match-approx.c
new file mode 100644
index 0000000000000..74ea330689053
--- /dev/null
+++ b/lib/tre-match-approx.c
@@ -0,0 +1,812 @@
+/*
+ tre-match-approx.c - TRE approximate regex matching engine
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+/* AIX requires this to be the first thing in the file. */
+#ifdef TRE_USE_ALLOCA
+#ifndef __GNUC__
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+ #pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+# endif
+# endif
+# endif
+#endif
+#endif /* TRE_USE_ALLOCA */
+
+#define __USE_STRING_INLINES
+#undef __NO_INLINE__
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+#ifndef TRE_WCHAR
+#include <ctype.h>
+#endif /* !TRE_WCHAR */
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+
+#include "tre-internal.h"
+#include "tre-match-utils.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+#define TRE_M_COST 0
+#define TRE_M_NUM_INS 1
+#define TRE_M_NUM_DEL 2
+#define TRE_M_NUM_SUBST 3
+#define TRE_M_NUM_ERR 4
+#define TRE_M_LAST 5
+
+#define TRE_M_MAX_DEPTH 3
+
+typedef struct {
+ /* State in the TNFA transition table. */
+ tre_tnfa_transition_t *state;
+ /* Position in input string. */
+ int pos;
+ /* Tag values. */
+ int *tags;
+ /* Matching parameters. */
+ regaparams_t params;
+ /* Nesting depth of parameters. This is used as an index in
+ the `costs' array. */
+ int depth;
+ /* Costs and counter values for different parameter nesting depths. */
+ int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
+} tre_tnfa_approx_reach_t;
+
+
+#ifdef TRE_DEBUG
+/* Prints the `reach' array in a readable fashion with DPRINT. */
+static void
+tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
+ int pos, int num_tags)
+{
+ int id;
+
+ /* Print each state on one line. */
+ DPRINT((" reach:\n"));
+ for (id = 0; id < tnfa->num_states; id++)
+ {
+ int i, j;
+ if (reach[id].pos < pos)
+ continue; /* Not reached. */
+ DPRINT((" %03d, costs ", id));
+ for (i = 0; i <= reach[id].depth; i++)
+ {
+ DPRINT(("["));
+ for (j = 0; j < TRE_M_LAST; j++)
+ {
+ DPRINT(("%2d", reach[id].costs[i][j]));
+ if (j + 1 < TRE_M_LAST)
+ DPRINT((","));
+ }
+ DPRINT(("]"));
+ if (i + 1 <= reach[id].depth)
+ DPRINT((", "));
+ }
+ DPRINT(("\n tags "));
+ for (i = 0; i < num_tags; i++)
+ {
+ DPRINT(("%02d", reach[id].tags[i]));
+ if (i + 1 < num_tags)
+ DPRINT((","));
+ }
+ DPRINT(("\n"));
+ }
+ DPRINT(("\n"));
+}
+#endif /* TRE_DEBUG */
+
+
+/* Sets the matching parameters in `reach' to the ones defined in the `pa'
+ array. If `pa' specifies default values, they are taken from
+ `default_params'. */
+inline static void
+tre_set_params(tre_tnfa_approx_reach_t *reach,
+ int *pa, regaparams_t default_params)
+{
+ int value;
+
+ /* If depth is increased reset costs and counters to zero for the
+ new levels. */
+ value = pa[TRE_PARAM_DEPTH];
+ assert(value <= TRE_M_MAX_DEPTH);
+ if (value > reach->depth)
+ {
+ int i, j;
+ for (i = reach->depth + 1; i <= value; i++)
+ for (j = 0; j < TRE_M_LAST; j++)
+ reach->costs[i][j] = 0;
+ }
+ reach->depth = value;
+
+ /* Set insert cost. */
+ value = pa[TRE_PARAM_COST_INS];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.cost_ins = default_params.cost_ins;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.cost_ins = value;
+
+ /* Set delete cost. */
+ value = pa[TRE_PARAM_COST_DEL];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.cost_del = default_params.cost_del;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.cost_del = value;
+
+ /* Set substitute cost. */
+ value = pa[TRE_PARAM_COST_SUBST];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.cost_subst = default_params.cost_subst;
+ else
+ reach->params.cost_subst = value;
+
+ /* Set maximum cost. */
+ value = pa[TRE_PARAM_COST_MAX];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_cost = default_params.max_cost;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_cost = value;
+
+ /* Set maximum inserts. */
+ value = pa[TRE_PARAM_MAX_INS];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_ins = default_params.max_ins;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_ins = value;
+
+ /* Set maximum deletes. */
+ value = pa[TRE_PARAM_MAX_DEL];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_del = default_params.max_del;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_del = value;
+
+ /* Set maximum substitutes. */
+ value = pa[TRE_PARAM_MAX_SUBST];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_subst = default_params.max_subst;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_subst = value;
+
+ /* Set maximum number of errors. */
+ value = pa[TRE_PARAM_MAX_ERR];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_err = default_params.max_err;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_err = value;
+}
+
+reg_errcode_t
+tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
+ tre_str_type_t type, int *match_tags,
+ regamatch_t *match, regaparams_t default_params,
+ int eflags, int *match_end_ofs)
+{
+ /* State variables required by GET_NEXT_WCHAR. */
+ tre_char_t prev_c = 0, next_c = 0;
+ const char *str_byte = string;
+ int pos = -1;
+ unsigned int pos_add_next = 1;
+#ifdef TRE_WCHAR
+ const wchar_t *str_wide = string;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* !TRE_WCHAR */
+#endif /* TRE_WCHAR */
+ int reg_notbol = eflags & REG_NOTBOL;
+ int reg_noteol = eflags & REG_NOTEOL;
+ int reg_newline = tnfa->cflags & REG_NEWLINE;
+ int str_user_end = 0;
+
+ int prev_pos;
+
+ /* Number of tags. */
+ int num_tags;
+ /* The reach tables. */
+ tre_tnfa_approx_reach_t *reach, *reach_next;
+ /* Tag array for temporary use. */
+ int *tmp_tags;
+
+ /* End offset of best match so far, or -1 if no match found yet. */
+ int match_eo = -1;
+ /* Costs of the match. */
+ int match_costs[TRE_M_LAST];
+
+ /* Space for temporary data required for matching. */
+ unsigned char *buf;
+
+ int i, id;
+
+ if (!match_tags)
+ num_tags = 0;
+ else
+ num_tags = tnfa->num_tags;
+
+#ifdef TRE_MBSTATE
+ memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+ DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
+ "match_tags %p\n",
+ type, len, eflags,
+ match_tags));
+ DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
+ default_params.max_cost,
+ default_params.cost_ins,
+ default_params.cost_del,
+ default_params.cost_subst));
+
+ /* Allocate memory for temporary data required for matching. This needs to
+ be done for every matching operation to be thread safe. This allocates
+ everything in a single large block from the stack frame using alloca()
+ or with malloc() if alloca is unavailable. */
+ {
+ unsigned char *buf_cursor;
+ /* Space needed for one array of tags. */
+ int tag_bytes = sizeof(*tmp_tags) * num_tags;
+ /* Space needed for one reach table. */
+ int reach_bytes = sizeof(*reach_next) * tnfa->num_states;
+ /* Total space needed. */
+ int total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
+ /* Add some extra to make sure we can align the pointers. The multiplier
+ used here must be equal to the number of ALIGN calls below. */
+ total_bytes += (sizeof(long) - 1) * 3;
+
+ /* Allocate the memory. */
+#ifdef TRE_USE_ALLOCA
+ buf = alloca(total_bytes);
+#else /* !TRE_USE_ALLOCA */
+ buf = xmalloc((unsigned)total_bytes);
+#endif /* !TRE_USE_ALLOCA */
+ if (!buf)
+ return REG_ESPACE;
+ memset(buf, 0, (size_t)total_bytes);
+
+ /* Allocate `tmp_tags' from `buf'. */
+ tmp_tags = (void *)buf;
+ buf_cursor = buf + tag_bytes;
+ buf_cursor += ALIGN(buf_cursor, long);
+
+ /* Allocate `reach' from `buf'. */
+ reach = (void *)buf_cursor;
+ buf_cursor += reach_bytes;
+ buf_cursor += ALIGN(buf_cursor, long);
+
+ /* Allocate `reach_next' from `buf'. */
+ reach_next = (void *)buf_cursor;
+ buf_cursor += reach_bytes;
+ buf_cursor += ALIGN(buf_cursor, long);
+
+ /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
+ for (i = 0; i < tnfa->num_states; i++)
+ {
+ reach[i].tags = (void *)buf_cursor;
+ buf_cursor += tag_bytes;
+ reach_next[i].tags = (void *)buf_cursor;
+ buf_cursor += tag_bytes;
+ }
+ assert(buf_cursor <= buf + total_bytes);
+ }
+
+ for (i = 0; i < TRE_M_LAST; i++)
+ match_costs[i] = INT_MAX;
+
+ /* Mark the reach arrays empty. */
+ for (i = 0; i < tnfa->num_states; i++)
+ reach[i].pos = reach_next[i].pos = -2;
+
+ prev_pos = pos;
+ GET_NEXT_WCHAR();
+ pos = 0;
+
+ while (/*CONSTCOND*/1)
+ {
+ DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
+
+ /* Add initial states to `reach_next' if an exact match has not yet
+ been found. */
+ if (match_costs[TRE_M_COST] > 0)
+ {
+ tre_tnfa_transition_t *trans;
+ DPRINT((" init"));
+ for (trans = tnfa->initial; trans->state; trans++)
+ {
+ int stateid = trans->state_id;
+
+ /* If this state is not currently in `reach_next', add it
+ there. */
+ if (reach_next[stateid].pos < pos)
+ {
+ if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
+ {
+ /* Assertions failed, don't add this state. */
+ DPRINT((" !%d (assert)", stateid));
+ continue;
+ }
+ DPRINT((" %d", stateid));
+ reach_next[stateid].state = trans->state;
+ reach_next[stateid].pos = pos;
+
+ /* Compute tag values after this transition. */
+ for (i = 0; i < num_tags; i++)
+ reach_next[stateid].tags[i] = -1;
+
+ if (trans->tags)
+ for (i = 0; trans->tags[i] >= 0; i++)
+ if (trans->tags[i] < num_tags)
+ reach_next[stateid].tags[trans->tags[i]] = pos;
+
+ /* Set the parameters, depth, and costs. */
+ reach_next[stateid].params = default_params;
+ reach_next[stateid].depth = 0;
+ for (i = 0; i < TRE_M_LAST; i++)
+ reach_next[stateid].costs[0][i] = 0;
+ if (trans->params)
+ tre_set_params(&reach_next[stateid], trans->params,
+ default_params);
+
+ /* If this is the final state, mark the exact match. */
+ if (trans->state == tnfa->final)
+ {
+ match_eo = pos;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = reach_next[stateid].tags[i];
+ for (i = 0; i < TRE_M_LAST; i++)
+ match_costs[i] = 0;
+ }
+ }
+ }
+ DPRINT(("\n"));
+ }
+
+
+ /* Handle inserts. This is done by pretending there's an epsilon
+ transition from each state in `reach' back to the same state.
+ We don't need to worry about the final state here; this will never
+ give a better match than what we already have. */
+ for (id = 0; id < tnfa->num_states; id++)
+ {
+ int depth;
+ int cost, cost0;
+
+ if (reach[id].pos != prev_pos)
+ {
+ DPRINT((" insert: %d not reached\n", id));
+ continue; /* Not reached. */
+ }
+
+ depth = reach[id].depth;
+
+ /* Compute and check cost at current depth. */
+ cost = reach[id].costs[depth][TRE_M_COST];
+ if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
+ cost += reach[id].params.cost_ins;
+ if (cost > reach[id].params.max_cost)
+ continue; /* Cost too large. */
+
+ /* Check number of inserts at current depth. */
+ if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
+ > reach[id].params.max_ins)
+ continue; /* Too many inserts. */
+
+ /* Check total number of errors at current depth. */
+ if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
+ > reach[id].params.max_err)
+ continue; /* Too many errors. */
+
+ /* Compute overall cost. */
+ cost0 = cost;
+ if (depth > 0)
+ {
+ cost0 = reach[id].costs[0][TRE_M_COST];
+ if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
+ cost0 += reach[id].params.cost_ins;
+ else
+ cost0 += default_params.cost_ins;
+ }
+
+ DPRINT((" insert: from %d to %d, cost %d: ", id, id,
+ reach[id].costs[depth][TRE_M_COST]));
+ if (reach_next[id].pos == pos
+ && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
+ {
+ DPRINT(("lose\n"));
+ continue;
+ }
+ DPRINT(("win\n"));
+
+ /* Copy state, position, tags, parameters, and depth. */
+ reach_next[id].state = reach[id].state;
+ reach_next[id].pos = pos;
+ for (i = 0; i < num_tags; i++)
+ reach_next[id].tags[i] = reach[id].tags[i];
+ reach_next[id].params = reach[id].params;
+ reach_next[id].depth = reach[id].depth;
+
+ /* Set the costs after this transition. */
+ memcpy(reach_next[id].costs, reach[id].costs,
+ sizeof(reach_next[id].costs[0][0])
+ * TRE_M_LAST * (depth + 1));
+ reach_next[id].costs[depth][TRE_M_COST] = cost;
+ reach_next[id].costs[depth][TRE_M_NUM_INS]++;
+ reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
+ if (depth > 0)
+ {
+ reach_next[id].costs[0][TRE_M_COST] = cost0;
+ reach_next[id].costs[0][TRE_M_NUM_INS]++;
+ reach_next[id].costs[0][TRE_M_NUM_ERR]++;
+ }
+
+ }
+
+
+ /* Handle deletes. This is done by traversing through the whole TNFA
+ pretending that all transitions are epsilon transitions, until
+ no more states can be reached with better costs. */
+ {
+ /* XXX - dynamic ringbuffer size */
+ tre_tnfa_approx_reach_t *ringbuffer[512];
+ tre_tnfa_approx_reach_t **deque_start, **deque_end;
+
+ deque_start = deque_end = ringbuffer;
+
+ /* Add all states in `reach_next' to the deque. */
+ for (id = 0; id < tnfa->num_states; id++)
+ {
+ if (reach_next[id].pos != pos)
+ continue;
+ *deque_end = &reach_next[id];
+ deque_end++;
+ assert(deque_end != deque_start);
+ }
+
+ /* Repeat until the deque is empty. */
+ while (deque_end != deque_start)
+ {
+ tre_tnfa_approx_reach_t *reach_p;
+ int depth;
+ int cost, cost0;
+ tre_tnfa_transition_t *trans;
+
+ /* Pop the first item off the deque. */
+ reach_p = *deque_start;
+ id = reach_p - reach_next;
+ depth = reach_p->depth;
+
+ /* Compute cost at current depth. */
+ cost = reach_p->costs[depth][TRE_M_COST];
+ if (reach_p->params.cost_del != TRE_PARAM_UNSET)
+ cost += reach_p->params.cost_del;
+
+ /* Check cost, number of deletes, and total number of errors
+ at current depth. */
+ if (cost > reach_p->params.max_cost
+ || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
+ > reach_p->params.max_del)
+ || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
+ > reach_p->params.max_err))
+ {
+ /* Too many errors or cost too large. */
+ DPRINT((" delete: from %03d: cost too large\n", id));
+ deque_start++;
+ if (deque_start >= (ringbuffer + 512))
+ deque_start = ringbuffer;
+ continue;
+ }
+
+ /* Compute overall cost. */
+ cost0 = cost;
+ if (depth > 0)
+ {
+ cost0 = reach_p->costs[0][TRE_M_COST];
+ if (reach_p->params.cost_del != TRE_PARAM_UNSET)
+ cost0 += reach_p->params.cost_del;
+ else
+ cost0 += default_params.cost_del;
+ }
+
+ for (trans = reach_p->state; trans->state; trans++)
+ {
+ int dest_id = trans->state_id;
+ DPRINT((" delete: from %03d to %03d, cost %d (%d): ",
+ id, dest_id, cost0, reach_p->params.max_cost));
+
+ if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
+ {
+ DPRINT(("assertion failed\n"));
+ continue;
+ }
+
+ /* Compute tag values after this transition. */
+ for (i = 0; i < num_tags; i++)
+ tmp_tags[i] = reach_p->tags[i];
+ if (trans->tags)
+ for (i = 0; trans->tags[i] >= 0; i++)
+ if (trans->tags[i] < num_tags)
+ tmp_tags[trans->tags[i]] = pos;
+
+ /* If another path has also reached this state, choose the one
+ with the smallest cost or best tags if costs are equal. */
+ if (reach_next[dest_id].pos == pos
+ && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
+ || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
+ && (!match_tags
+ || !tre_tag_order(num_tags,
+ tnfa->tag_directions,
+ tmp_tags,
+ reach_next[dest_id].tags)))))
+ {
+ DPRINT(("lose, cost0 %d, have %d\n",
+ cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
+ continue;
+ }
+ DPRINT(("win\n"));
+
+ /* Set state, position, tags, parameters, depth, and costs. */
+ reach_next[dest_id].state = trans->state;
+ reach_next[dest_id].pos = pos;
+ for (i = 0; i < num_tags; i++)
+ reach_next[dest_id].tags[i] = tmp_tags[i];
+
+ reach_next[dest_id].params = reach_p->params;
+ if (trans->params)
+ tre_set_params(&reach_next[dest_id], trans->params,
+ default_params);
+
+ reach_next[dest_id].depth = reach_p->depth;
+ memcpy(&reach_next[dest_id].costs,
+ reach_p->costs,
+ sizeof(reach_p->costs[0][0])
+ * TRE_M_LAST * (depth + 1));
+ reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
+ reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
+ reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
+ if (depth > 0)
+ {
+ reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
+ reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
+ reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
+ }
+
+ if (trans->state == tnfa->final
+ && (match_eo < 0
+ || match_costs[TRE_M_COST] > cost0
+ || (match_costs[TRE_M_COST] == cost0
+ && (num_tags > 0
+ && tmp_tags[0] <= match_tags[0]))))
+ {
+ DPRINT((" setting new match at %d, cost %d\n",
+ pos, cost0));
+ match_eo = pos;
+ memcpy(match_costs, reach_next[dest_id].costs[0],
+ sizeof(match_costs[0]) * TRE_M_LAST);
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = tmp_tags[i];
+ }
+
+ /* Add to the end of the deque. */
+ *deque_end = &reach_next[dest_id];
+ deque_end++;
+ if (deque_end >= (ringbuffer + 512))
+ deque_end = ringbuffer;
+ assert(deque_end != deque_start);
+ }
+ deque_start++;
+ if (deque_start >= (ringbuffer + 512))
+ deque_start = ringbuffer;
+ }
+
+ }
+
+#ifdef TRE_DEBUG
+ tre_print_reach(tnfa, reach_next, pos, num_tags);
+#endif /* TRE_DEBUG */
+
+ /* Check for end of string. */
+ if (len < 0)
+ {
+ if (type == STR_USER)
+ {
+ if (str_user_end)
+ break;
+ }
+ else if (next_c == L'\0')
+ break;
+ }
+ else
+ {
+ if (pos >= len)
+ break;
+ }
+
+ prev_pos = pos;
+ GET_NEXT_WCHAR();
+
+ /* Swap `reach' and `reach_next'. */
+ {
+ tre_tnfa_approx_reach_t *tmp;
+ tmp = reach;
+ reach = reach_next;
+ reach_next = tmp;
+ }
+
+ /* Handle exact matches and substitutions. */
+ for (id = 0; id < tnfa->num_states; id++)
+ {
+ tre_tnfa_transition_t *trans;
+
+ if (reach[id].pos < prev_pos)
+ continue; /* Not reached. */
+ for (trans = reach[id].state; trans->state; trans++)
+ {
+ int dest_id;
+ int depth;
+ int cost, cost0, err;
+
+ if (trans->assertions
+ && (CHECK_ASSERTIONS(trans->assertions)
+ || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
+ {
+ DPRINT((" exact, from %d: assert failed\n", id));
+ continue;
+ }
+
+ depth = reach[id].depth;
+ dest_id = trans->state_id;
+
+ cost = reach[id].costs[depth][TRE_M_COST];
+ cost0 = reach[id].costs[0][TRE_M_COST];
+ err = 0;
+
+ if (trans->code_min > (tre_cint_t)prev_c
+ || trans->code_max < (tre_cint_t)prev_c)
+ {
+ /* Handle substitutes. The required character was not in
+ the string, so match it in place of whatever was supposed
+ to be there and increase costs accordingly. */
+ err = 1;
+
+ /* Compute and check cost at current depth. */
+ cost = reach[id].costs[depth][TRE_M_COST];
+ if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
+ cost += reach[id].params.cost_subst;
+ if (cost > reach[id].params.max_cost)
+ continue; /* Cost too large. */
+
+ /* Check number of substitutes at current depth. */
+ if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
+ > reach[id].params.max_subst)
+ continue; /* Too many substitutes. */
+
+ /* Check total number of errors at current depth. */
+ if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
+ > reach[id].params.max_err)
+ continue; /* Too many errors. */
+
+ /* Compute overall cost. */
+ cost0 = cost;
+ if (depth > 0)
+ {
+ cost0 = reach[id].costs[0][TRE_M_COST];
+ if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
+ cost0 += reach[id].params.cost_subst;
+ else
+ cost0 += default_params.cost_subst;
+ }
+ DPRINT((" subst, from %03d to %03d, cost %d: ",
+ id, dest_id, cost0));
+ }
+ else
+ DPRINT((" exact, from %03d to %03d, cost %d: ",
+ id, dest_id, cost0));
+
+ /* Compute tag values after this transition. */
+ for (i = 0; i < num_tags; i++)
+ tmp_tags[i] = reach[id].tags[i];
+ if (trans->tags)
+ for (i = 0; trans->tags[i] >= 0; i++)
+ if (trans->tags[i] < num_tags)
+ tmp_tags[trans->tags[i]] = pos;
+
+ /* If another path has also reached this state, choose the
+ one with the smallest cost or best tags if costs are equal. */
+ if (reach_next[dest_id].pos == pos
+ && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
+ || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
+ && !tre_tag_order(num_tags, tnfa->tag_directions,
+ tmp_tags,
+ reach_next[dest_id].tags))))
+ {
+ DPRINT(("lose\n"));
+ continue;
+ }
+ DPRINT(("win %d %d\n",
+ reach_next[dest_id].pos,
+ reach_next[dest_id].costs[0][TRE_M_COST]));
+
+ /* Set state, position, tags, and depth. */
+ reach_next[dest_id].state = trans->state;
+ reach_next[dest_id].pos = pos;
+ for (i = 0; i < num_tags; i++)
+ reach_next[dest_id].tags[i] = tmp_tags[i];
+ reach_next[dest_id].depth = reach[id].depth;
+
+ /* Set parameters. */
+ reach_next[dest_id].params = reach[id].params;
+ if (trans->params)
+ tre_set_params(&reach_next[dest_id], trans->params,
+ default_params);
+
+ /* Set the costs after this transition. */
+ memcpy(&reach_next[dest_id].costs,
+ reach[id].costs,
+ sizeof(reach[id].costs[0][0])
+ * TRE_M_LAST * (depth + 1));
+ reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
+ reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
+ reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
+ if (depth > 0)
+ {
+ reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
+ reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
+ reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
+ }
+
+ if (trans->state == tnfa->final
+ && (match_eo < 0
+ || cost0 < match_costs[TRE_M_COST]
+ || (cost0 == match_costs[TRE_M_COST]
+ && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
+ {
+ DPRINT((" setting new match at %d, cost %d\n",
+ pos, cost0));
+ match_eo = pos;
+ for (i = 0; i < TRE_M_LAST; i++)
+ match_costs[i] = reach_next[dest_id].costs[0][i];
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = tmp_tags[i];
+ }
+ }
+ }
+ }
+
+ DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
+ match_costs[TRE_M_COST]));
+
+#ifndef TRE_USE_ALLOCA
+ if (buf)
+ xfree(buf);
+#endif /* !TRE_USE_ALLOCA */
+
+ match->cost = match_costs[TRE_M_COST];
+ match->num_ins = match_costs[TRE_M_NUM_INS];
+ match->num_del = match_costs[TRE_M_NUM_DEL];
+ match->num_subst = match_costs[TRE_M_NUM_SUBST];
+ *match_end_ofs = match_eo;
+
+ return match_eo >= 0 ? REG_OK : REG_NOMATCH;
+}
diff --git a/lib/tre-match-backtrack.c b/lib/tre-match-backtrack.c
new file mode 100644
index 0000000000000..eab13a7bc8ab7
--- /dev/null
+++ b/lib/tre-match-backtrack.c
@@ -0,0 +1,666 @@
+/*
+ tre-match-backtrack.c - TRE backtracking regex matching engine
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+/*
+ This matcher is for regexps that use back referencing. Regexp matching
+ with back referencing is an NP-complete problem on the number of back
+ references. The easiest way to match them is to use a backtracking
+ routine which basically goes through all possible paths in the TNFA
+ and chooses the one which results in the best (leftmost and longest)
+ match. This can be spectacularly expensive and may run out of stack
+ space, but there really is no better known generic algorithm. Quoting
+ Henry Spencer from comp.compilers:
+ <URL: http://compilers.iecc.com/comparch/article/93-03-102>
+
+ POSIX.2 REs require longest match, which is really exciting to
+ implement since the obsolete ("basic") variant also includes
+ \<digit>. I haven't found a better way of tackling this than doing
+ a preliminary match using a DFA (or simulation) on a modified RE
+ that just replicates subREs for \<digit>, and then doing a
+ backtracking match to determine whether the subRE matches were
+ right. This can be rather slow, but I console myself with the
+ thought that people who use \<digit> deserve very slow execution.
+ (Pun unintentional but very appropriate.)
+
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef TRE_USE_ALLOCA
+/* AIX requires this to be the first thing in the file. */
+#ifndef __GNUC__
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+ #pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+# endif
+# endif
+# endif
+#endif
+#endif /* TRE_USE_ALLOCA */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+#ifndef TRE_WCHAR
+#include <ctype.h>
+#endif /* !TRE_WCHAR */
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+
+#include "tre-internal.h"
+#include "tre-mem.h"
+#include "tre-match-utils.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+typedef struct {
+ int pos;
+ const char *str_byte;
+#ifdef TRE_WCHAR
+ const wchar_t *str_wide;
+#endif /* TRE_WCHAR */
+ tre_tnfa_transition_t *state;
+ int state_id;
+ int next_c;
+ int *tags;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+} tre_backtrack_item_t;
+
+typedef struct tre_backtrack_struct {
+ tre_backtrack_item_t item;
+ struct tre_backtrack_struct *prev;
+ struct tre_backtrack_struct *next;
+} *tre_backtrack_t;
+
+#ifdef TRE_WCHAR
+#define BT_STACK_WIDE_IN(_str_wide) stack->item.str_wide = (_str_wide)
+#define BT_STACK_WIDE_OUT (str_wide) = stack->item.str_wide
+#else /* !TRE_WCHAR */
+#define BT_STACK_WIDE_IN(_str_wide)
+#define BT_STACK_WIDE_OUT
+#endif /* !TRE_WCHAR */
+
+#ifdef TRE_MBSTATE
+#define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate)
+#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
+#else /* !TRE_MBSTATE */
+#define BT_STACK_MBSTATE_IN
+#define BT_STACK_MBSTATE_OUT
+#endif /* !TRE_MBSTATE */
+
+
+#ifdef TRE_USE_ALLOCA
+#define tre_bt_mem_new tre_mem_newa
+#define tre_bt_mem_alloc tre_mem_alloca
+#define tre_bt_mem_destroy(obj) do { } while (0)
+#else /* !TRE_USE_ALLOCA */
+#define tre_bt_mem_new tre_mem_new
+#define tre_bt_mem_alloc tre_mem_alloc
+#define tre_bt_mem_destroy tre_mem_destroy
+#endif /* !TRE_USE_ALLOCA */
+
+
+#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
+ do \
+ { \
+ int i; \
+ if (!stack->next) \
+ { \
+ tre_backtrack_t s; \
+ s = tre_bt_mem_alloc(mem, sizeof(*s)); \
+ if (!s) \
+ { \
+ tre_bt_mem_destroy(mem); \
+ if (tags) \
+ xfree(tags); \
+ if (pmatch) \
+ xfree(pmatch); \
+ if (states_seen) \
+ xfree(states_seen); \
+ return REG_ESPACE; \
+ } \
+ s->prev = stack; \
+ s->next = NULL; \
+ s->item.tags = tre_bt_mem_alloc(mem, \
+ sizeof(*tags) * tnfa->num_tags); \
+ if (!s->item.tags) \
+ { \
+ tre_bt_mem_destroy(mem); \
+ if (tags) \
+ xfree(tags); \
+ if (pmatch) \
+ xfree(pmatch); \
+ if (states_seen) \
+ xfree(states_seen); \
+ return REG_ESPACE; \
+ } \
+ stack->next = s; \
+ stack = s; \
+ } \
+ else \
+ stack = stack->next; \
+ stack->item.pos = (_pos); \
+ stack->item.str_byte = (_str_byte); \
+ BT_STACK_WIDE_IN(_str_wide); \
+ stack->item.state = (_state); \
+ stack->item.state_id = (_state_id); \
+ stack->item.next_c = (_next_c); \
+ for (i = 0; i < tnfa->num_tags; i++) \
+ stack->item.tags[i] = (_tags)[i]; \
+ BT_STACK_MBSTATE_IN; \
+ } \
+ while (/*CONSTCOND*/0)
+
+#define BT_STACK_POP() \
+ do \
+ { \
+ int i; \
+ assert(stack->prev); \
+ pos = stack->item.pos; \
+ if (type == STR_USER) \
+ str_source->rewind(pos + pos_add_next, str_source->context); \
+ str_byte = stack->item.str_byte; \
+ BT_STACK_WIDE_OUT; \
+ state = stack->item.state; \
+ next_c = stack->item.next_c; \
+ for (i = 0; i < tnfa->num_tags; i++) \
+ tags[i] = stack->item.tags[i]; \
+ BT_STACK_MBSTATE_OUT; \
+ stack = stack->prev; \
+ } \
+ while (/*CONSTCOND*/0)
+
+#undef MIN
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+
+reg_errcode_t
+tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
+ int len, tre_str_type_t type, int *match_tags,
+ int eflags, int *match_end_ofs)
+{
+ /* State variables required by GET_NEXT_WCHAR. */
+ tre_char_t prev_c = 0, next_c = 0;
+ const char *str_byte = string;
+ int pos = 0;
+ unsigned int pos_add_next = 1;
+#ifdef TRE_WCHAR
+ const wchar_t *str_wide = string;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+#endif /* TRE_WCHAR */
+ int reg_notbol = eflags & REG_NOTBOL;
+ int reg_noteol = eflags & REG_NOTEOL;
+ int reg_newline = tnfa->cflags & REG_NEWLINE;
+ int str_user_end = 0;
+
+ /* These are used to remember the necessary values of the above
+ variables to return to the position where the current search
+ started from. */
+ int next_c_start;
+ const char *str_byte_start;
+ int pos_start = -1;
+#ifdef TRE_WCHAR
+ const wchar_t *str_wide_start;
+#endif /* TRE_WCHAR */
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate_start;
+#endif /* TRE_MBSTATE */
+
+ /* End offset of best match so far, or -1 if no match found yet. */
+ int match_eo = -1;
+ /* Tag arrays. */
+ int *next_tags, *tags = NULL;
+ /* Current TNFA state. */
+ tre_tnfa_transition_t *state;
+ int *states_seen = NULL;
+
+ /* Memory allocator to for allocating the backtracking stack. */
+ tre_mem_t mem = tre_bt_mem_new();
+
+ /* The backtracking stack. */
+ tre_backtrack_t stack;
+
+ tre_tnfa_transition_t *trans_i;
+ regmatch_t *pmatch = NULL;
+ int ret;
+
+#ifdef TRE_MBSTATE
+ memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+ if (!mem)
+ return REG_ESPACE;
+ stack = tre_bt_mem_alloc(mem, sizeof(*stack));
+ if (!stack)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ stack->prev = NULL;
+ stack->next = NULL;
+
+ DPRINT(("tnfa_execute_backtrack, input type %d\n", type));
+ DPRINT(("len = %d\n", len));
+
+#ifdef TRE_USE_ALLOCA
+ tags = alloca(sizeof(*tags) * tnfa->num_tags);
+ pmatch = alloca(sizeof(*pmatch) * tnfa->num_submatches);
+ states_seen = alloca(sizeof(*states_seen) * tnfa->num_states);
+#else /* !TRE_USE_ALLOCA */
+ if (tnfa->num_tags)
+ {
+ tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+ if (!tags)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ }
+ if (tnfa->num_submatches)
+ {
+ pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
+ if (!pmatch)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ }
+ if (tnfa->num_states)
+ {
+ states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
+ if (!states_seen)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ }
+#endif /* !TRE_USE_ALLOCA */
+
+ retry:
+ {
+ int i;
+ for (i = 0; i < tnfa->num_tags; i++)
+ {
+ tags[i] = -1;
+ if (match_tags)
+ match_tags[i] = -1;
+ }
+ for (i = 0; i < tnfa->num_states; i++)
+ states_seen[i] = 0;
+ }
+
+ state = NULL;
+ pos = pos_start;
+ if (type == STR_USER)
+ str_source->rewind(pos + pos_add_next, str_source->context);
+ GET_NEXT_WCHAR();
+ pos_start = pos;
+ next_c_start = next_c;
+ str_byte_start = str_byte;
+#ifdef TRE_WCHAR
+ str_wide_start = str_wide;
+#endif /* TRE_WCHAR */
+#ifdef TRE_MBSTATE
+ mbstate_start = mbstate;
+#endif /* TRE_MBSTATE */
+
+ /* Handle initial states. */
+ next_tags = NULL;
+ for (trans_i = tnfa->initial; trans_i->state; trans_i++)
+ {
+ DPRINT(("> init %p, prev_c %lc\n", trans_i->state, (tre_cint_t)prev_c));
+ if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
+ {
+ DPRINT(("assert failed\n"));
+ continue;
+ }
+ if (state == NULL)
+ {
+ /* Start from this state. */
+ state = trans_i->state;
+ next_tags = trans_i->tags;
+ }
+ else
+ {
+ /* Backtrack to this state. */
+ DPRINT(("saving state %d for backtracking\n", trans_i->state_id));
+ BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state,
+ trans_i->state_id, next_c, tags, mbstate);
+ {
+ int *tmp = trans_i->tags;
+ if (tmp)
+ while (*tmp >= 0)
+ stack->item.tags[*tmp++] = pos;
+ }
+ }
+ }
+
+ if (next_tags)
+ for (; *next_tags >= 0; next_tags++)
+ tags[*next_tags] = pos;
+
+
+ DPRINT(("entering match loop, pos %d, str_byte %p\n", pos, str_byte));
+ DPRINT(("pos:chr/code | state and tags\n"));
+ DPRINT(("-------------+------------------------------------------------\n"));
+
+ if (state == NULL)
+ goto backtrack;
+
+ while (/*CONSTCOND*/1)
+ {
+ tre_tnfa_transition_t *next_state;
+ int empty_br_match;
+
+ DPRINT(("start loop\n"));
+ if (state == tnfa->final)
+ {
+ DPRINT((" match found, %d %d\n", match_eo, pos));
+ if (match_eo < pos
+ || (match_eo == pos
+ && match_tags
+ && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
+ tags, match_tags)))
+ {
+ int i;
+ /* This match wins the previous match. */
+ DPRINT((" win previous\n"));
+ match_eo = pos;
+ if (match_tags)
+ for (i = 0; i < tnfa->num_tags; i++)
+ match_tags[i] = tags[i];
+ }
+ /* Our TNFAs never have transitions leaving from the final state,
+ so we jump right to backtracking. */
+ goto backtrack;
+ }
+
+#ifdef TRE_DEBUG
+ DPRINT(("%3d:%2lc/%05d | %p ", pos, (tre_cint_t)next_c, (int)next_c,
+ state));
+ {
+ int i;
+ for (i = 0; i < tnfa->num_tags; i++)
+ DPRINT(("%d%s", tags[i], i < tnfa->num_tags - 1 ? ", " : ""));
+ DPRINT(("\n"));
+ }
+#endif /* TRE_DEBUG */
+
+ /* Go to the next character in the input string. */
+ empty_br_match = 0;
+ trans_i = state;
+ if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
+ {
+ /* This is a back reference state. All transitions leaving from
+ this state have the same back reference "assertion". Instead
+ of reading the next character, we match the back reference. */
+ int so, eo, bt = trans_i->u.backref;
+ int bt_len;
+ int result;
+
+ DPRINT((" should match back reference %d\n", bt));
+ /* Get the substring we need to match against. Remember to
+ turn off REG_NOSUB temporarily. */
+ tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & /*LINTED*/!REG_NOSUB,
+ tnfa, tags, pos);
+ so = pmatch[bt].rm_so;
+ eo = pmatch[bt].rm_eo;
+ bt_len = eo - so;
+
+#ifdef TRE_DEBUG
+ {
+ int slen;
+ if (len < 0)
+ slen = bt_len;
+ else
+ slen = MIN(bt_len, len - pos);
+
+ if (type == STR_BYTE)
+ {
+ DPRINT((" substring (len %d) is [%d, %d[: '%.*s'\n",
+ bt_len, so, eo, bt_len, (char*)string + so));
+ DPRINT((" current string is '%.*s'\n", slen, str_byte - 1));
+ }
+#ifdef TRE_WCHAR
+ else if (type == STR_WIDE)
+ {
+ DPRINT((" substring (len %d) is [%d, %d[: '%.*" STRF "'\n",
+ bt_len, so, eo, bt_len, (wchar_t*)string + so));
+ DPRINT((" current string is '%.*" STRF "'\n",
+ slen, str_wide - 1));
+ }
+#endif /* TRE_WCHAR */
+ }
+#endif
+
+ if (len < 0)
+ {
+ if (type == STR_USER)
+ result = str_source->compare((unsigned)so, (unsigned)pos,
+ (unsigned)bt_len,
+ str_source->context);
+#ifdef TRE_WCHAR
+ else if (type == STR_WIDE)
+ result = wcsncmp((const wchar_t*)string + so, str_wide - 1,
+ (size_t)bt_len);
+#endif /* TRE_WCHAR */
+ else
+ result = strncmp((const char*)string + so, str_byte - 1,
+ (size_t)bt_len);
+ }
+ else if (len - pos < bt_len)
+ result = 1;
+#ifdef TRE_WCHAR
+ else if (type == STR_WIDE)
+ result = wmemcmp((const wchar_t*)string + so, str_wide - 1,
+ (size_t)bt_len);
+#endif /* TRE_WCHAR */
+ else
+ result = memcmp((const char*)string + so, str_byte - 1,
+ (size_t)bt_len);
+
+ if (result == 0)
+ {
+ /* Back reference matched. Check for infinite loop. */
+ if (bt_len == 0)
+ empty_br_match = 1;
+ if (empty_br_match && states_seen[trans_i->state_id])
+ {
+ DPRINT((" avoid loop\n"));
+ goto backtrack;
+ }
+
+ states_seen[trans_i->state_id] = empty_br_match;
+
+ /* Advance in input string and resync `prev_c', `next_c'
+ and pos. */
+ DPRINT((" back reference matched\n"));
+ str_byte += bt_len - 1;
+#ifdef TRE_WCHAR
+ str_wide += bt_len - 1;
+#endif /* TRE_WCHAR */
+ pos += bt_len - 1;
+ GET_NEXT_WCHAR();
+ DPRINT((" pos now %d\n", pos));
+ }
+ else
+ {
+ DPRINT((" back reference did not match\n"));
+ goto backtrack;
+ }
+ }
+ else
+ {
+ /* Check for end of string. */
+ if (len < 0)
+ {
+ if (type == STR_USER)
+ {
+ if (str_user_end)
+ goto backtrack;
+ }
+ else if (next_c == L'\0')
+ goto backtrack;
+ }
+ else
+ {
+ if (pos >= len)
+ goto backtrack;
+ }
+
+ /* Read the next character. */
+ GET_NEXT_WCHAR();
+ }
+
+ next_state = NULL;
+ for (trans_i = state; trans_i->state; trans_i++)
+ {
+ DPRINT((" transition %d-%d (%c-%c) %d to %d\n",
+ trans_i->code_min, trans_i->code_max,
+ trans_i->code_min, trans_i->code_max,
+ trans_i->assertions, trans_i->state_id));
+ if (trans_i->code_min <= (tre_cint_t)prev_c
+ && trans_i->code_max >= (tre_cint_t)prev_c)
+ {
+ if (trans_i->assertions
+ && (CHECK_ASSERTIONS(trans_i->assertions)
+ || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
+ {
+ DPRINT((" assertion failed\n"));
+ continue;
+ }
+
+ if (next_state == NULL)
+ {
+ /* First matching transition. */
+ DPRINT((" Next state is %d\n", trans_i->state_id));
+ next_state = trans_i->state;
+ next_tags = trans_i->tags;
+ }
+ else
+ {
+ /* Second matching transition. We may need to backtrack here
+ to take this transition instead of the first one, so we
+ push this transition in the backtracking stack so we can
+ jump back here if needed. */
+ DPRINT((" saving state %d for backtracking\n",
+ trans_i->state_id));
+ BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state,
+ trans_i->state_id, next_c, tags, mbstate);
+ {
+ int *tmp;
+ for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
+ stack->item.tags[*tmp] = pos;
+ }
+#if 0 /* XXX - it's important not to look at all transitions here to keep
+ the stack small! */
+ break;
+#endif
+ }
+ }
+ }
+
+ if (next_state != NULL)
+ {
+ /* Matching transitions were found. Take the first one. */
+ state = next_state;
+
+ /* Update the tag values. */
+ if (next_tags)
+ while (*next_tags >= 0)
+ tags[*next_tags++] = pos;
+ }
+ else
+ {
+ backtrack:
+ /* A matching transition was not found. Try to backtrack. */
+ if (stack->prev)
+ {
+ DPRINT((" backtracking\n"));
+ if (stack->item.state->assertions && ASSERT_BACKREF)
+ {
+ DPRINT((" states_seen[%d] = 0\n",
+ stack->item.state_id));
+ states_seen[stack->item.state_id] = 0;
+ }
+
+ BT_STACK_POP();
+ }
+ else if (match_eo < 0)
+ {
+ /* Try starting from a later position in the input string. */
+ /* Check for end of string. */
+ if (len < 0)
+ {
+ if (next_c == L'\0')
+ {
+ DPRINT(("end of string.\n"));
+ break;
+ }
+ }
+ else
+ {
+ if (pos >= len)
+ {
+ DPRINT(("end of string.\n"));
+ break;
+ }
+ }
+ DPRINT(("restarting from next start position\n"));
+ next_c = next_c_start;
+#ifdef TRE_MBSTATE
+ mbstate = mbstate_start;
+#endif /* TRE_MBSTATE */
+ str_byte = str_byte_start;
+#ifdef TRE_WCHAR
+ str_wide = str_wide_start;
+#endif /* TRE_WCHAR */
+ goto retry;
+ }
+ else
+ {
+ DPRINT(("finished\n"));
+ break;
+ }
+ }
+ }
+
+ ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
+ *match_end_ofs = match_eo;
+
+ error_exit:
+ tre_bt_mem_destroy(mem);
+#ifndef TRE_USE_ALLOCA
+ if (tags)
+ xfree(tags);
+ if (pmatch)
+ xfree(pmatch);
+ if (states_seen)
+ xfree(states_seen);
+#endif /* !TRE_USE_ALLOCA */
+
+ return ret;
+}
diff --git a/lib/tre-match-parallel.c b/lib/tre-match-parallel.c
new file mode 100644
index 0000000000000..0d6ec173c5332
--- /dev/null
+++ b/lib/tre-match-parallel.c
@@ -0,0 +1,501 @@
+/*
+ tre-match-parallel.c - TRE parallel regex matching engine
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+/*
+ This algorithm searches for matches basically by reading characters
+ in the searched string one by one, starting at the beginning. All
+ matching paths in the TNFA are traversed in parallel. When two or
+ more paths reach the same state, exactly one is chosen according to
+ tag ordering rules; if returning submatches is not required it does
+ not matter which path is chosen.
+
+ The worst case time required for finding the leftmost and longest
+ match, or determining that there is no match, is always linearly
+ dependent on the length of the text being searched.
+
+ This algorithm cannot handle TNFAs with back referencing nodes.
+ See `tre-match-backtrack.c'.
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef TRE_USE_ALLOCA
+/* AIX requires this to be the first thing in the file. */
+#ifndef __GNUC__
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+ #pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+# endif
+# endif
+# endif
+#endif
+#endif /* TRE_USE_ALLOCA */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+#ifndef TRE_WCHAR
+#include <ctype.h>
+#endif /* !TRE_WCHAR */
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+
+#include "tre-internal.h"
+#include "tre-match-utils.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+
+
+typedef struct {
+ tre_tnfa_transition_t *state;
+ int *tags;
+} tre_tnfa_reach_t;
+
+typedef struct {
+ int pos;
+ int **tags;
+} tre_reach_pos_t;
+
+
+#ifdef TRE_DEBUG
+static void
+tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_reach_t *reach, int num_tags)
+{
+ int i;
+
+ while (reach->state != NULL)
+ {
+ DPRINT((" %p", (void *)reach->state));
+ if (num_tags > 0)
+ {
+ DPRINT(("/"));
+ for (i = 0; i < num_tags; i++)
+ {
+ DPRINT(("%d:%d", i, reach->tags[i]));
+ if (i < (num_tags-1))
+ DPRINT((","));
+ }
+ }
+ reach++;
+ }
+ DPRINT(("\n"));
+
+}
+#endif /* TRE_DEBUG */
+
+reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
+ tre_str_type_t type, int *match_tags, int eflags,
+ int *match_end_ofs)
+{
+ /* State variables required by GET_NEXT_WCHAR. */
+ tre_char_t prev_c = 0, next_c = 0;
+ const char *str_byte = string;
+ int pos = -1;
+ unsigned int pos_add_next = 1;
+#ifdef TRE_WCHAR
+ const wchar_t *str_wide = string;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+#endif /* TRE_WCHAR */
+ int reg_notbol = eflags & REG_NOTBOL;
+ int reg_noteol = eflags & REG_NOTEOL;
+ int reg_newline = tnfa->cflags & REG_NEWLINE;
+ int str_user_end = 0;
+
+ char *buf;
+ tre_tnfa_transition_t *trans_i;
+ tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
+ tre_reach_pos_t *reach_pos;
+ int *tag_i;
+ int num_tags, i;
+
+ int match_eo = -1; /* end offset of match (-1 if no match found yet) */
+ int new_match = 0;
+ int *tmp_tags = NULL;
+ int *tmp_iptr;
+
+#ifdef TRE_MBSTATE
+ memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+ DPRINT(("tre_tnfa_run_parallel, input type %d\n", type));
+
+ if (!match_tags)
+ num_tags = 0;
+ else
+ num_tags = tnfa->num_tags;
+
+ /* Allocate memory for temporary data required for matching. This needs to
+ be done for every matching operation to be thread safe. This allocates
+ everything in a single large block from the stack frame using alloca()
+ or with malloc() if alloca is unavailable. */
+ {
+ int tbytes, rbytes, pbytes, xbytes, total_bytes;
+ char *tmp_buf;
+ /* Compute the length of the block we need. */
+ tbytes = sizeof(*tmp_tags) * num_tags;
+ rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
+ pbytes = sizeof(*reach_pos) * tnfa->num_states;
+ xbytes = sizeof(int) * num_tags;
+ total_bytes =
+ (sizeof(long) - 1) * 4 /* for alignment paddings */
+ + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
+
+ /* Allocate the memory. */
+#ifdef TRE_USE_ALLOCA
+ buf = alloca(total_bytes);
+#else /* !TRE_USE_ALLOCA */
+ buf = xmalloc((unsigned)total_bytes);
+#endif /* !TRE_USE_ALLOCA */
+ if (buf == NULL)
+ return REG_ESPACE;
+ memset(buf, 0, (size_t)total_bytes);
+
+ /* Get the various pointers within tmp_buf (properly aligned). */
+ tmp_tags = (void *)buf;
+ tmp_buf = buf + tbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ reach_next = (void *)tmp_buf;
+ tmp_buf += rbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ reach = (void *)tmp_buf;
+ tmp_buf += rbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ reach_pos = (void *)tmp_buf;
+ tmp_buf += pbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ for (i = 0; i < tnfa->num_states; i++)
+ {
+ reach[i].tags = (void *)tmp_buf;
+ tmp_buf += xbytes;
+ reach_next[i].tags = (void *)tmp_buf;
+ tmp_buf += xbytes;
+ }
+ }
+
+ for (i = 0; i < tnfa->num_states; i++)
+ reach_pos[i].pos = -1;
+
+ /* If only one character can start a match, find it first. */
+ if (tnfa->first_char >= 0 && type == STR_BYTE && str_byte)
+ {
+ const char *orig_str = str_byte;
+ int first = tnfa->first_char;
+
+ if (len >= 0)
+ str_byte = memchr(orig_str, first, (size_t)len);
+ else
+ str_byte = strchr(orig_str, first);
+ if (str_byte == NULL)
+ {
+#ifndef TRE_USE_ALLOCA
+ if (buf)
+ xfree(buf);
+#endif /* !TRE_USE_ALLOCA */
+ return REG_NOMATCH;
+ }
+ DPRINT(("skipped %lu chars\n", (unsigned long)(str_byte - orig_str)));
+ if (str_byte >= orig_str + 1)
+ prev_c = (unsigned char)*(str_byte - 1);
+ next_c = (unsigned char)*str_byte;
+ pos = str_byte - orig_str;
+ if (len < 0 || pos < len)
+ str_byte++;
+ }
+ else
+ {
+ GET_NEXT_WCHAR();
+ pos = 0;
+ }
+
+#if 0
+ /* Skip over characters that cannot possibly be the first character
+ of a match. */
+ if (tnfa->firstpos_chars != NULL)
+ {
+ char *chars = tnfa->firstpos_chars;
+
+ if (len < 0)
+ {
+ const char *orig_str = str_byte;
+ /* XXX - use strpbrk() and wcspbrk() because they might be
+ optimized for the target architecture. Try also strcspn()
+ and wcscspn() and compare the speeds. */
+ while (next_c != L'\0' && !chars[next_c])
+ {
+ next_c = *str_byte++;
+ }
+ prev_c = *(str_byte - 2);
+ pos += str_byte - orig_str;
+ DPRINT(("skipped %d chars\n", str_byte - orig_str));
+ }
+ else
+ {
+ while (pos <= len && !chars[next_c])
+ {
+ prev_c = next_c;
+ next_c = (unsigned char)(*str_byte++);
+ pos++;
+ }
+ }
+ }
+#endif
+
+ DPRINT(("length: %d\n", len));
+ DPRINT(("pos:chr/code | states and tags\n"));
+ DPRINT(("-------------+------------------------------------------------\n"));
+
+ reach_next_i = reach_next;
+ while (/*CONSTCOND*/1)
+ {
+ /* If no match found yet, add the initial states to `reach_next'. */
+ if (match_eo < 0)
+ {
+ DPRINT((" init >"));
+ trans_i = tnfa->initial;
+ while (trans_i->state != NULL)
+ {
+ if (reach_pos[trans_i->state_id].pos < pos)
+ {
+ if (trans_i->assertions
+ && CHECK_ASSERTIONS(trans_i->assertions))
+ {
+ DPRINT(("assertion failed\n"));
+ trans_i++;
+ continue;
+ }
+
+ DPRINT((" %p", (void *)trans_i->state));
+ reach_next_i->state = trans_i->state;
+ for (i = 0; i < num_tags; i++)
+ reach_next_i->tags[i] = -1;
+ tag_i = trans_i->tags;
+ if (tag_i)
+ while (*tag_i >= 0)
+ {
+ if (*tag_i < num_tags)
+ reach_next_i->tags[*tag_i] = pos;
+ tag_i++;
+ }
+ if (reach_next_i->state == tnfa->final)
+ {
+ DPRINT((" found empty match\n"));
+ match_eo = pos;
+ new_match = 1;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = reach_next_i->tags[i];
+ }
+ reach_pos[trans_i->state_id].pos = pos;
+ reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
+ reach_next_i++;
+ }
+ trans_i++;
+ }
+ DPRINT(("\n"));
+ reach_next_i->state = NULL;
+ }
+ else
+ {
+ if (num_tags == 0 || reach_next_i == reach_next)
+ /* We have found a match. */
+ break;
+ }
+
+ /* Check for end of string. */
+ if (len < 0)
+ {
+ if (type == STR_USER)
+ {
+ if (str_user_end)
+ break;
+ }
+ else if (next_c == L'\0')
+ break;
+ }
+ else
+ {
+ if (pos >= len)
+ break;
+ }
+
+ GET_NEXT_WCHAR();
+
+#ifdef TRE_DEBUG
+ DPRINT(("%3d:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c));
+ tre_print_reach(tnfa, reach_next, num_tags);
+ DPRINT(("%3d:%2lc/%05d |", pos, (tre_cint_t)next_c, (int)next_c));
+ tre_print_reach(tnfa, reach_next, num_tags);
+#endif /* TRE_DEBUG */
+
+ /* Swap `reach' and `reach_next'. */
+ reach_i = reach;
+ reach = reach_next;
+ reach_next = reach_i;
+
+ /* For each state in `reach', weed out states that don't fulfill the
+ minimal matching conditions. */
+ if (tnfa->num_minimals && new_match)
+ {
+ new_match = 0;
+ reach_next_i = reach_next;
+ for (reach_i = reach; reach_i->state; reach_i++)
+ {
+ int skip = 0;
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
+ {
+ int end = tnfa->minimal_tags[i];
+ int start = tnfa->minimal_tags[i + 1];
+ DPRINT((" Minimal start %d, end %d\n", start, end));
+ if (end >= num_tags)
+ {
+ DPRINT((" Throwing %p out.\n", reach_i->state));
+ skip = 1;
+ break;
+ }
+ else if (reach_i->tags[start] == match_tags[start]
+ && reach_i->tags[end] < match_tags[end])
+ {
+ DPRINT((" Throwing %p out because t%d < %d\n",
+ reach_i->state, end, match_tags[end]));
+ skip = 1;
+ break;
+ }
+ }
+ if (!skip)
+ {
+ reach_next_i->state = reach_i->state;
+ tmp_iptr = reach_next_i->tags;
+ reach_next_i->tags = reach_i->tags;
+ reach_i->tags = tmp_iptr;
+ reach_next_i++;
+ }
+ }
+ reach_next_i->state = NULL;
+
+ /* Swap `reach' and `reach_next'. */
+ reach_i = reach;
+ reach = reach_next;
+ reach_next = reach_i;
+ }
+
+ /* For each state in `reach' see if there is a transition leaving with
+ the current input symbol to a state not yet in `reach_next', and
+ add the destination states to `reach_next'. */
+ reach_next_i = reach_next;
+ for (reach_i = reach; reach_i->state; reach_i++)
+ {
+ for (trans_i = reach_i->state; trans_i->state; trans_i++)
+ {
+ /* Does this transition match the input symbol? */
+ if (trans_i->code_min <= (tre_cint_t)prev_c &&
+ trans_i->code_max >= (tre_cint_t)prev_c)
+ {
+ if (trans_i->assertions
+ && (CHECK_ASSERTIONS(trans_i->assertions)
+ || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
+ {
+ DPRINT(("assertion failed\n"));
+ continue;
+ }
+
+ /* Compute the tags after this transition. */
+ for (i = 0; i < num_tags; i++)
+ tmp_tags[i] = reach_i->tags[i];
+ tag_i = trans_i->tags;
+ if (tag_i != NULL)
+ while (*tag_i >= 0)
+ {
+ if (*tag_i < num_tags)
+ tmp_tags[*tag_i] = pos;
+ tag_i++;
+ }
+
+ if (reach_pos[trans_i->state_id].pos < pos)
+ {
+ /* Found an unvisited node. */
+ reach_next_i->state = trans_i->state;
+ tmp_iptr = reach_next_i->tags;
+ reach_next_i->tags = tmp_tags;
+ tmp_tags = tmp_iptr;
+ reach_pos[trans_i->state_id].pos = pos;
+ reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
+
+ if (reach_next_i->state == tnfa->final
+ && (match_eo == -1
+ || (num_tags > 0
+ && reach_next_i->tags[0] <= match_tags[0])))
+ {
+ DPRINT((" found match %p\n", trans_i->state));
+ match_eo = pos;
+ new_match = 1;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = reach_next_i->tags[i];
+ }
+ reach_next_i++;
+
+ }
+ else
+ {
+ assert(reach_pos[trans_i->state_id].pos == pos);
+ /* Another path has also reached this state. We choose
+ the winner by examining the tag values for both
+ paths. */
+ if (tre_tag_order(num_tags, tnfa->tag_directions,
+ tmp_tags,
+ *reach_pos[trans_i->state_id].tags))
+ {
+ /* The new path wins. */
+ tmp_iptr = *reach_pos[trans_i->state_id].tags;
+ *reach_pos[trans_i->state_id].tags = tmp_tags;
+ if (trans_i->state == tnfa->final)
+ {
+ DPRINT((" found better match\n"));
+ match_eo = pos;
+ new_match = 1;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = tmp_tags[i];
+ }
+ tmp_tags = tmp_iptr;
+ }
+ }
+ }
+ }
+ }
+ reach_next_i->state = NULL;
+ }
+
+ DPRINT(("match end offset = %d\n", match_eo));
+
+#ifndef TRE_USE_ALLOCA
+ if (buf)
+ xfree(buf);
+#endif /* !TRE_USE_ALLOCA */
+
+ *match_end_ofs = match_eo;
+ return match_eo >= 0 ? REG_OK : REG_NOMATCH;
+}
+
+/* EOF */
diff --git a/lib/tre-match-utils.h b/lib/tre-match-utils.h
new file mode 100644
index 0000000000000..70db745e1050c
--- /dev/null
+++ b/lib/tre-match-utils.h
@@ -0,0 +1,215 @@
+/*
+ tre-match-utils.h - TRE matcher helper definitions
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#define str_source ((const tre_str_source*)string)
+
+#ifdef TRE_WCHAR
+
+#ifdef TRE_MULTIBYTE
+
+/* Wide character and multibyte support. */
+
+#define GET_NEXT_WCHAR() \
+ do { \
+ prev_c = next_c; \
+ if (type == STR_BYTE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = '\0'; \
+ else \
+ next_c = (unsigned char)(*str_byte++); \
+ } \
+ else if (type == STR_WIDE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = L'\0'; \
+ else \
+ next_c = *str_wide++; \
+ } \
+ else if (type == STR_MBS) \
+ { \
+ pos += pos_add_next; \
+ if (str_byte == NULL) \
+ next_c = L'\0'; \
+ else \
+ { \
+ size_t w; \
+ int max; \
+ if (len >= 0) \
+ max = len - pos; \
+ else \
+ max = 32; \
+ if (max <= 0) \
+ { \
+ next_c = L'\0'; \
+ pos_add_next = 1; \
+ } \
+ else \
+ { \
+ w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \
+ if (w == (size_t)-1 || w == (size_t)-2) \
+ return REG_NOMATCH; \
+ if (w == 0 && len >= 0) \
+ { \
+ pos_add_next = 1; \
+ next_c = 0; \
+ str_byte++; \
+ } \
+ else \
+ { \
+ pos_add_next = w; \
+ str_byte += w; \
+ } \
+ } \
+ } \
+ } \
+ else if (type == STR_USER) \
+ { \
+ pos += pos_add_next; \
+ str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
+ str_source->context); \
+ } \
+ } while(/*CONSTCOND*/0)
+
+#else /* !TRE_MULTIBYTE */
+
+/* Wide character support, no multibyte support. */
+
+#define GET_NEXT_WCHAR() \
+ do { \
+ prev_c = next_c; \
+ if (type == STR_BYTE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = '\0'; \
+ else \
+ next_c = (unsigned char)(*str_byte++); \
+ } \
+ else if (type == STR_WIDE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = L'\0'; \
+ else \
+ next_c = *str_wide++; \
+ } \
+ else if (type == STR_USER) \
+ { \
+ pos += pos_add_next; \
+ str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
+ str_source->context); \
+ } \
+ } while(/*CONSTCOND*/0)
+
+#endif /* !TRE_MULTIBYTE */
+
+#else /* !TRE_WCHAR */
+
+/* No wide character or multibyte support. */
+
+#define GET_NEXT_WCHAR() \
+ do { \
+ prev_c = next_c; \
+ if (type == STR_BYTE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = '\0'; \
+ else \
+ next_c = (unsigned char)(*str_byte++); \
+ } \
+ else if (type == STR_USER) \
+ { \
+ pos += pos_add_next; \
+ str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
+ str_source->context); \
+ } \
+ } while(/*CONSTCOND*/0)
+
+#endif /* !TRE_WCHAR */
+
+
+
+#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
+
+#define CHECK_ASSERTIONS(assertions) \
+ (((assertions & ASSERT_AT_BOL) \
+ && (pos > 0 || reg_notbol) \
+ && (prev_c != L'\n' || !reg_newline)) \
+ || ((assertions & ASSERT_AT_EOL) \
+ && (next_c != L'\0' || reg_noteol) \
+ && (next_c != L'\n' || !reg_newline)) \
+ || ((assertions & ASSERT_AT_BOW) \
+ && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_EOW) \
+ && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_WB) \
+ && (pos != 0 && next_c != L'\0' \
+ && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_WB_NEG) \
+ && (pos == 0 || next_c == L'\0' \
+ || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
+
+#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
+ (((trans_i->assertions & ASSERT_CHAR_CLASS) \
+ && !(tnfa->cflags & REG_ICASE) \
+ && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \
+ || ((trans_i->assertions & ASSERT_CHAR_CLASS) \
+ && (tnfa->cflags & REG_ICASE) \
+ && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
+ && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
+ || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
+ && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
+ tnfa->cflags & REG_ICASE)))
+
+
+
+
+/* Returns 1 if `t1' wins `t2', 0 otherwise. */
+inline static int
+tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
+ int *t1, int *t2)
+{
+ int i;
+ for (i = 0; i < num_tags; i++)
+ {
+ if (tag_directions[i] == TRE_TAG_MINIMIZE)
+ {
+ if (t1[i] < t2[i])
+ return 1;
+ if (t1[i] > t2[i])
+ return 0;
+ }
+ else
+ {
+ if (t1[i] > t2[i])
+ return 1;
+ if (t1[i] < t2[i])
+ return 0;
+ }
+ }
+ /* assert(0);*/
+ return 0;
+}
+
+inline static int
+tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
+{
+ DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
+ while (*classes != (tre_ctype_t)0)
+ if ((!icase && tre_isctype(wc, *classes))
+ || (icase && (tre_isctype(tre_toupper(wc), *classes)
+ || tre_isctype(tre_tolower(wc), *classes))))
+ return 1; /* Match. */
+ else
+ classes++;
+ return 0; /* No match. */
+}
diff --git a/lib/tre-mem.c b/lib/tre-mem.c
new file mode 100644
index 0000000000000..b5fb2768d292a
--- /dev/null
+++ b/lib/tre-mem.c
@@ -0,0 +1,155 @@
+/*
+ tre-mem.c - TRE memory allocator
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+/*
+ This memory allocator is for allocating small memory blocks efficiently
+ in terms of memory overhead and execution speed. The allocated blocks
+ cannot be freed individually, only all at once. There can be multiple
+ allocators, though.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <stdlib.h>
+#include <string.h>
+
+#include "tre-internal.h"
+#include "tre-mem.h"
+#include "xmalloc.h"
+
+
+/* Returns a new memory allocator or NULL if out of memory. */
+tre_mem_t
+tre_mem_new_impl(int provided, void *provided_block)
+{
+ tre_mem_t mem;
+ if (provided)
+ {
+ mem = provided_block;
+ memset(mem, 0, sizeof(*mem));
+ }
+ else
+ mem = xcalloc(1, sizeof(*mem));
+ if (mem == NULL)
+ return NULL;
+ return mem;
+}
+
+
+/* Frees the memory allocator and all memory allocated with it. */
+void
+tre_mem_destroy(tre_mem_t mem)
+{
+ tre_list_t *tmp, *l = mem->blocks;
+
+ while (l != NULL)
+ {
+ xfree(l->data);
+ tmp = l->next;
+ xfree(l);
+ l = tmp;
+ }
+ xfree(mem);
+}
+
+
+/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
+ allocated block or NULL if an underlying malloc() failed. */
+void *
+tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
+ int zero, size_t size)
+{
+ void *ptr;
+
+ if (mem->failed)
+ {
+ DPRINT(("tre_mem_alloc: oops, called after failure?!\n"));
+ return NULL;
+ }
+
+#ifdef MALLOC_DEBUGGING
+ if (!provided)
+ {
+ ptr = xmalloc(1);
+ if (ptr == NULL)
+ {
+ DPRINT(("tre_mem_alloc: xmalloc forced failure\n"));
+ mem->failed = 1;
+ return NULL;
+ }
+ xfree(ptr);
+ }
+#endif /* MALLOC_DEBUGGING */
+
+ if (mem->n < size)
+ {
+ /* We need more memory than is available in the current block.
+ Allocate a new block. */
+ tre_list_t *l;
+ if (provided)
+ {
+ DPRINT(("tre_mem_alloc: using provided block\n"));
+ if (provided_block == NULL)
+ {
+ DPRINT(("tre_mem_alloc: provided block was NULL\n"));
+ mem->failed = 1;
+ return NULL;
+ }
+ mem->ptr = provided_block;
+ mem->n = TRE_MEM_BLOCK_SIZE;
+ }
+ else
+ {
+ int block_size;
+ if (size * 8 > TRE_MEM_BLOCK_SIZE)
+ block_size = size * 8;
+ else
+ block_size = TRE_MEM_BLOCK_SIZE;
+ DPRINT(("tre_mem_alloc: allocating new %d byte block\n",
+ block_size));
+ l = xmalloc(sizeof(*l));
+ if (l == NULL)
+ {
+ mem->failed = 1;
+ return NULL;
+ }
+ l->data = xmalloc(block_size);
+ if (l->data == NULL)
+ {
+ xfree(l);
+ mem->failed = 1;
+ return NULL;
+ }
+ l->next = NULL;
+ if (mem->current != NULL)
+ mem->current->next = l;
+ if (mem->blocks == NULL)
+ mem->blocks = l;
+ mem->current = l;
+ mem->ptr = l->data;
+ mem->n = block_size;
+ }
+ }
+
+ /* Make sure the next pointer will be aligned. */
+ size += ALIGN(mem->ptr + size, long);
+
+ /* Allocate from current block. */
+ ptr = mem->ptr;
+ mem->ptr += size;
+ mem->n -= size;
+
+ /* Set to zero if needed. */
+ if (zero)
+ memset(ptr, 0, size);
+
+ return ptr;
+}
+
+/* EOF */
diff --git a/lib/tre-mem.h b/lib/tre-mem.h
new file mode 100644
index 0000000000000..285940457f389
--- /dev/null
+++ b/lib/tre-mem.h
@@ -0,0 +1,66 @@
+/*
+ tre-mem.h - TRE memory allocator interface
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifndef TRE_MEM_H
+#define TRE_MEM_H 1
+
+#include <stdlib.h>
+
+#define TRE_MEM_BLOCK_SIZE 1024
+
+typedef struct tre_list {
+ void *data;
+ struct tre_list *next;
+} tre_list_t;
+
+typedef struct tre_mem_struct {
+ tre_list_t *blocks;
+ tre_list_t *current;
+ char *ptr;
+ size_t n;
+ int failed;
+ void **provided;
+} *tre_mem_t;
+
+
+tre_mem_t tre_mem_new_impl(int provided, void *provided_block);
+void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
+ int zero, size_t size);
+
+/* Returns a new memory allocator or NULL if out of memory. */
+#define tre_mem_new() tre_mem_new_impl(0, NULL)
+
+/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
+ allocated block or NULL if an underlying malloc() failed. */
+#define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size)
+
+/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
+ allocated block or NULL if an underlying malloc() failed. The memory
+ is set to zero. */
+#define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size)
+
+#ifdef TRE_USE_ALLOCA
+/* alloca() versions. Like above, but memory is allocated with alloca()
+ instead of malloc(). */
+
+#define tre_mem_newa() \
+ tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct)))
+
+#define tre_mem_alloca(mem, size) \
+ ((mem)->n >= (size) \
+ ? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \
+ : tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size)))
+#endif /* TRE_USE_ALLOCA */
+
+
+/* Frees the memory allocator and all memory allocated with it. */
+void tre_mem_destroy(tre_mem_t mem);
+
+#endif /* TRE_MEM_H */
+
+/* EOF */
diff --git a/lib/tre-parse.c b/lib/tre-parse.c
new file mode 100644
index 0000000000000..9282725d06667
--- /dev/null
+++ b/lib/tre-parse.c
@@ -0,0 +1,1739 @@
+/*
+ tre-parse.c - Regexp parser
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+/*
+ This parser is just a simple recursive descent parser for POSIX.2
+ regexps. The parser supports both the obsolete default syntax and
+ the "extended" syntax, and some nonstandard extensions.
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "xmalloc.h"
+#include "tre-mem.h"
+#include "tre-ast.h"
+#include "tre-stack.h"
+#include "tre-parse.h"
+
+
+/* Characters with special meanings in regexp syntax. */
+#define CHAR_PIPE L'|'
+#define CHAR_LPAREN L'('
+#define CHAR_RPAREN L')'
+#define CHAR_LBRACE L'{'
+#define CHAR_RBRACE L'}'
+#define CHAR_LBRACKET L'['
+#define CHAR_RBRACKET L']'
+#define CHAR_MINUS L'-'
+#define CHAR_STAR L'*'
+#define CHAR_QUESTIONMARK L'?'
+#define CHAR_PLUS L'+'
+#define CHAR_PERIOD L'.'
+#define CHAR_COLON L':'
+#define CHAR_EQUAL L'='
+#define CHAR_COMMA L','
+#define CHAR_CARET L'^'
+#define CHAR_DOLLAR L'$'
+#define CHAR_BACKSLASH L'\\'
+#define CHAR_HASH L'#'
+#define CHAR_TILDE L'~'
+
+
+/* Some macros for expanding \w, \s, etc. */
+static const struct tre_macro_struct {
+ const char c;
+ const char *expansion;
+} tre_macros[] =
+ { {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
+ {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
+ {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
+ {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
+ { 0, NULL }
+ };
+
+
+/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
+ must have at least `len' items. Sets buf[0] to zero if the there
+ is no match in `tre_macros'. */
+static void
+tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
+ tre_char_t *buf, size_t buf_len)
+{
+ int i;
+
+ buf[0] = 0;
+ if (regex >= regex_end)
+ return;
+
+ for (i = 0; tre_macros[i].expansion; i++)
+ {
+ if (tre_macros[i].c == *regex)
+ {
+ unsigned int j;
+ DPRINT(("Expanding macro '%c' => '%s'\n",
+ tre_macros[i].c, tre_macros[i].expansion));
+ for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
+ buf[j] = tre_macros[i].expansion[j];
+ buf[j] = 0;
+ break;
+ }
+ }
+}
+
+static reg_errcode_t
+tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i,
+ tre_ast_node_t ***items)
+{
+ reg_errcode_t status;
+ tre_ast_node_t **array = *items;
+ /* Allocate more space if necessary. */
+ if (*i >= *max_i)
+ {
+ tre_ast_node_t **new_items;
+ DPRINT(("out of array space, i = %d\n", *i));
+ /* If the array is already 1024 items large, give up -- there's
+ probably an error in the regexp (e.g. not a '\0' terminated
+ string and missing ']') */
+ if (*max_i > 1024)
+ return REG_ESPACE;
+ *max_i *= 2;
+ new_items = xrealloc(array, sizeof(*items) * *max_i);
+ if (new_items == NULL)
+ return REG_ESPACE;
+ *items = array = new_items;
+ }
+ array[*i] = tre_ast_new_literal(mem, min, max, -1);
+ status = array[*i] == NULL ? REG_ESPACE : REG_OK;
+ (*i)++;
+ return status;
+}
+
+
+/* Expands a character class to character ranges. */
+static reg_errcode_t
+tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items,
+ int *i, int *max_i, int cflags)
+{
+ reg_errcode_t status = REG_OK;
+ tre_cint_t c;
+ int j, min = -1, max = 0;
+ assert(TRE_MB_CUR_MAX == 1);
+
+ DPRINT((" expanding class to character ranges\n"));
+ for (j = 0; (j < 256) && (status == REG_OK); j++)
+ {
+ c = j;
+ if (tre_isctype(c, class)
+ || ((cflags & REG_ICASE)
+ && (tre_isctype(tre_tolower(c), class)
+ || tre_isctype(tre_toupper(c), class))))
+{
+ if (min < 0)
+ min = c;
+ max = c;
+ }
+ else if (min >= 0)
+ {
+ DPRINT((" range %c (%d) to %c (%d)\n", min, min, max, max));
+ status = tre_new_item(mem, min, max, i, max_i, items);
+ min = -1;
+ }
+ }
+ if (min >= 0 && status == REG_OK)
+ status = tre_new_item(mem, min, max, i, max_i, items);
+ return status;
+}
+
+
+static int
+tre_compare_items(const void *a, const void *b)
+{
+ const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
+ const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
+ tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
+ int a_min = l_a->code_min, b_min = l_b->code_min;
+
+ if (a_min < b_min)
+ return -1;
+ else if (a_min > b_min)
+ return 1;
+ else
+ return 0;
+}
+
+#ifndef TRE_USE_SYSTEM_WCTYPE
+
+/* isalnum() and the rest may be macros, so wrap them to functions. */
+int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); }
+int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); }
+
+#ifdef tre_isascii
+int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); }
+#else /* !tre_isascii */
+int tre_isascii_func(tre_cint_t c) { return !(c >> 7); }
+#endif /* !tre_isascii */
+
+#ifdef tre_isblank
+int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); }
+#else /* !tre_isblank */
+int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); }
+#endif /* !tre_isblank */
+
+int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); }
+int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); }
+int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); }
+int tre_islower_func(tre_cint_t c) { return tre_islower(c); }
+int tre_isprint_func(tre_cint_t c) { return tre_isprint(c); }
+int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); }
+int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); }
+int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); }
+int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); }
+
+struct {
+ char *name;
+ int (*func)(tre_cint_t);
+} tre_ctype_map[] = {
+ { "alnum", &tre_isalnum_func },
+ { "alpha", &tre_isalpha_func },
+#ifdef tre_isascii
+ { "ascii", &tre_isascii_func },
+#endif /* tre_isascii */
+#ifdef tre_isblank
+ { "blank", &tre_isblank_func },
+#endif /* tre_isblank */
+ { "cntrl", &tre_iscntrl_func },
+ { "digit", &tre_isdigit_func },
+ { "graph", &tre_isgraph_func },
+ { "lower", &tre_islower_func },
+ { "print", &tre_isprint_func },
+ { "punct", &tre_ispunct_func },
+ { "space", &tre_isspace_func },
+ { "upper", &tre_isupper_func },
+ { "xdigit", &tre_isxdigit_func },
+ { NULL, NULL}
+};
+
+tre_ctype_t tre_ctype(const char *name)
+{
+ int i;
+ for (i = 0; tre_ctype_map[i].name != NULL; i++)
+ {
+ if (strcmp(name, tre_ctype_map[i].name) == 0)
+ return tre_ctype_map[i].func;
+ }
+ return (tre_ctype_t)0;
+}
+#endif /* !TRE_USE_SYSTEM_WCTYPE */
+
+/* Maximum number of character classes that can occur in a negated bracket
+ expression. */
+#define MAX_NEG_CLASSES 64
+
+/* Maximum length of character class names. */
+#define MAX_CLASS_NAME
+
+#define REST(re) (int)(ctx->re_end - (re)), (re)
+
+static reg_errcode_t
+tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
+ tre_ctype_t neg_classes[], int *num_neg_classes,
+ tre_ast_node_t ***items, int *num_items,
+ int *items_size)
+{
+ const tre_char_t *re = ctx->re;
+ reg_errcode_t status = REG_OK;
+ tre_ctype_t class = (tre_ctype_t)0;
+ int i = *num_items;
+ int max_i = *items_size;
+ int skip;
+
+ /* Build an array of the items in the bracket expression. */
+ while (status == REG_OK)
+ {
+ skip = 0;
+ if (re == ctx->re_end)
+ {
+ status = REG_EBRACK;
+ }
+ else if (*re == CHAR_RBRACKET && re > ctx->re)
+ {
+ DPRINT(("tre_parse_bracket: done: '%.*" STRF "'\n", REST(re)));
+ re++;
+ break;
+ }
+ else
+ {
+ tre_cint_t min = 0, max = 0;
+
+ class = (tre_ctype_t)0;
+ if (re + 2 < ctx->re_end
+ && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET)
+ {
+ DPRINT(("tre_parse_bracket: range: '%.*" STRF "'\n", REST(re)));
+ min = *re;
+ max = *(re + 2);
+ re += 3;
+ /* XXX - Should use collation order instead of encoding values
+ in character ranges. */
+ if (min > max)
+ status = REG_ERANGE;
+ }
+ else if (re + 1 < ctx->re_end
+ && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
+ status = REG_ECOLLATE;
+ else if (re + 1 < ctx->re_end
+ && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
+ status = REG_ECOLLATE;
+ else if (re + 1 < ctx->re_end
+ && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
+ {
+ char tmp_str[64];
+ const tre_char_t *endptr = re + 2;
+ int len;
+ DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re)));
+ while (endptr < ctx->re_end && *endptr != CHAR_COLON)
+ endptr++;
+ if (endptr != ctx->re_end)
+ {
+ len = MIN(endptr - re - 2, 63);
+#ifdef TRE_WCHAR
+ {
+ tre_char_t tmp_wcs[64];
+ wcsncpy(tmp_wcs, re + 2, (size_t)len);
+ tmp_wcs[len] = L'\0';
+#if defined HAVE_WCSRTOMBS
+ {
+ mbstate_t state;
+ const tre_char_t *src = tmp_wcs;
+ memset(&state, '\0', sizeof(state));
+ len = wcsrtombs(tmp_str, &src, sizeof(tmp_str), &state);
+ }
+#elif defined HAVE_WCSTOMBS
+ len = wcstombs(tmp_str, tmp_wcs, 63);
+#endif /* defined HAVE_WCSTOMBS */
+ }
+#else /* !TRE_WCHAR */
+ strncpy(tmp_str, (const char*)re + 2, len);
+#endif /* !TRE_WCHAR */
+ tmp_str[len] = '\0';
+ DPRINT((" class name: %s\n", tmp_str));
+ class = tre_ctype(tmp_str);
+ if (!class)
+ status = REG_ECTYPE;
+ /* Optimize character classes for 8 bit character sets. */
+ if (status == REG_OK && TRE_MB_CUR_MAX == 1)
+ {
+ status = tre_expand_ctype(ctx->mem, class, items,
+ &i, &max_i, ctx->cflags);
+ class = (tre_ctype_t)0;
+ skip = 1;
+ }
+ re = endptr + 2;
+ }
+ else
+ status = REG_ECTYPE;
+ min = 0;
+ max = TRE_CHAR_MAX;
+ }
+ else
+ {
+ DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re)));
+ if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
+ && ctx->re != re)
+ /* Two ranges are not allowed to share and endpoint. */
+ status = REG_ERANGE;
+ min = max = *re++;
+ }
+
+ if (status != REG_OK)
+ break;
+
+ if (class && negate)
+ if (*num_neg_classes >= MAX_NEG_CLASSES)
+ status = REG_ESPACE;
+ else
+ neg_classes[(*num_neg_classes)++] = class;
+ else if (!skip)
+ {
+ status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
+ if (status != REG_OK)
+ break;
+ ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class;
+ }
+
+ /* Add opposite-case counterpoints if REG_ICASE is present.
+ This is broken if there are more than two "same" characters. */
+ if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip)
+ {
+ tre_cint_t cmin, ccurr;
+
+ DPRINT(("adding opposite-case counterpoints\n"));
+ while (min <= max)
+ {
+ if (tre_islower(min))
+ {
+ cmin = ccurr = tre_toupper(min++);
+ while (tre_islower(min) && tre_toupper(min) == ccurr + 1
+ && min <= max)
+ ccurr = tre_toupper(min++);
+ status = tre_new_item(ctx->mem, cmin, ccurr,
+ &i, &max_i, items);
+ }
+ else if (tre_isupper(min))
+ {
+ cmin = ccurr = tre_tolower(min++);
+ while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
+ && min <= max)
+ ccurr = tre_tolower(min++);
+ status = tre_new_item(ctx->mem, cmin, ccurr,
+ &i, &max_i, items);
+ }
+ else min++;
+ if (status != REG_OK)
+ break;
+ }
+ if (status != REG_OK)
+ break;
+ }
+ }
+ }
+ *num_items = i;
+ *items_size = max_i;
+ ctx->re = re;
+ return status;
+}
+
+static reg_errcode_t
+tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+ tre_ast_node_t *node = NULL;
+ int negate = 0;
+ reg_errcode_t status = REG_OK;
+ tre_ast_node_t **items, *u, *n;
+ int i = 0, j, max_i = 32, curr_max, curr_min;
+ tre_ctype_t neg_classes[MAX_NEG_CLASSES];
+ int num_neg_classes = 0;
+
+ /* Start off with an array of `max_i' elements. */
+ items = xmalloc(sizeof(*items) * max_i);
+ if (items == NULL)
+ return REG_ESPACE;
+
+ if (*ctx->re == CHAR_CARET)
+ {
+ DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re)));
+ negate = 1;
+ ctx->re++;
+ }
+
+ status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes,
+ &items, &i, &max_i);
+
+ if (status != REG_OK)
+ goto parse_bracket_done;
+
+ /* Sort the array if we need to negate it. */
+ if (negate)
+ qsort(items, (unsigned)i, sizeof(*items), tre_compare_items);
+
+ curr_max = curr_min = 0;
+ /* Build a union of the items in the array, negated if necessary. */
+ for (j = 0; j < i && status == REG_OK; j++)
+ {
+ int min, max;
+ tre_literal_t *l = items[j]->obj;
+ min = l->code_min;
+ max = l->code_max;
+
+ DPRINT(("item: %d - %d, class %ld, curr_max = %d\n",
+ (int)l->code_min, (int)l->code_max, (long)l->u.class, curr_max));
+
+ if (negate)
+ {
+ if (min < curr_max)
+ {
+ /* Overlap. */
+ curr_max = MAX(max + 1, curr_max);
+ DPRINT(("overlap, curr_max = %d\n", curr_max));
+ l = NULL;
+ }
+ else
+ {
+ /* No overlap. */
+ curr_max = min - 1;
+ if (curr_max >= curr_min)
+ {
+ DPRINT(("no overlap\n"));
+ l->code_min = curr_min;
+ l->code_max = curr_max;
+ }
+ else
+ {
+ DPRINT(("no overlap, zero room\n"));
+ l = NULL;
+ }
+ curr_min = curr_max = max + 1;
+ }
+ }
+
+ if (l != NULL)
+ {
+ int k;
+ DPRINT(("creating %d - %d\n", (int)l->code_min, (int)l->code_max));
+ l->position = ctx->position;
+ if (num_neg_classes > 0)
+ {
+ l->neg_classes = tre_mem_alloc(ctx->mem,
+ (sizeof(l->neg_classes)
+ * (num_neg_classes + 1)));
+ if (l->neg_classes == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ for (k = 0; k < num_neg_classes; k++)
+ l->neg_classes[k] = neg_classes[k];
+ l->neg_classes[k] = (tre_ctype_t)0;
+ }
+ else
+ l->neg_classes = NULL;
+ if (node == NULL)
+ node = items[j];
+ else
+ {
+ u = tre_ast_new_union(ctx->mem, node, items[j]);
+ if (u == NULL)
+ status = REG_ESPACE;
+ node = u;
+ }
+ }
+ }
+
+ if (status != REG_OK)
+ goto parse_bracket_done;
+
+ if (negate)
+ {
+ int k;
+ DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX));
+ n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
+ if (n == NULL)
+ status = REG_ESPACE;
+ else
+ {
+ tre_literal_t *l = n->obj;
+ if (num_neg_classes > 0)
+ {
+ l->neg_classes = tre_mem_alloc(ctx->mem,
+ (sizeof(l->neg_classes)
+ * (num_neg_classes + 1)));
+ if (l->neg_classes == NULL)
+ {
+ status = REG_ESPACE;
+ goto parse_bracket_done;
+ }
+ for (k = 0; k < num_neg_classes; k++)
+ l->neg_classes[k] = neg_classes[k];
+ l->neg_classes[k] = (tre_ctype_t)0;
+ }
+ else
+ l->neg_classes = NULL;
+ if (node == NULL)
+ node = n;
+ else
+ {
+ u = tre_ast_new_union(ctx->mem, node, n);
+ if (u == NULL)
+ status = REG_ESPACE;
+ node = u;
+ }
+ }
+ }
+
+ if (status != REG_OK)
+ goto parse_bracket_done;
+
+#ifdef TRE_DEBUG
+ tre_ast_print(node);
+#endif /* TRE_DEBUG */
+
+ parse_bracket_done:
+ xfree(items);
+ ctx->position++;
+ *result = node;
+ return status;
+}
+
+
+/* Parses a positive decimal integer. Returns -1 if the string does not
+ contain a valid number. */
+static int
+tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
+{
+ int num = -1;
+ const tre_char_t *r = *regex;
+ while (r < regex_end && *r >= L'0' && *r <= L'9')
+ {
+ if (num < 0)
+ num = 0;
+ num = num * 10 + *r - L'0';
+ r++;
+ }
+ *regex = r;
+ return num;
+}
+
+
+static reg_errcode_t
+tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+ int min, max, i;
+ int cost_ins, cost_del, cost_subst, cost_max;
+ int limit_ins, limit_del, limit_subst, limit_err;
+ const tre_char_t *r = ctx->re;
+ const tre_char_t *start;
+ int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
+ int approx = 0;
+ int costs_set = 0;
+ int counts_set = 0;
+
+ cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET;
+ limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET;
+
+ /* Parse number (minimum repetition count). */
+ min = -1;
+ if (r < ctx->re_end && *r >= L'0' && *r <= L'9') {
+ DPRINT(("tre_parse: min count: '%.*" STRF "'\n", REST(r)));
+ min = tre_parse_int(&r, ctx->re_end);
+ }
+
+ /* Parse comma and second number (maximum repetition count). */
+ max = min;
+ if (r < ctx->re_end && *r == CHAR_COMMA)
+ {
+ r++;
+ DPRINT(("tre_parse: max count: '%.*" STRF "'\n", REST(r)));
+ max = tre_parse_int(&r, ctx->re_end);
+ }
+
+ /* Check that the repeat counts are sane. */
+ if ((max >= 0 && min > max) || max > RE_DUP_MAX)
+ return REG_BADBR;
+
+
+ /*
+ '{'
+ optionally followed immediately by a number == minimum repcount
+ optionally followed by , then a number == maximum repcount
+ + then a number == maximum insertion count
+ - then a number == maximum deletion count
+ # then a number == maximum substitution count
+ ~ then a number == maximum number of errors
+ Any of +, -, # or ~ without followed by a number means that
+ the maximum count/number of errors is infinite.
+
+ An equation of the form
+ Xi + Yd + Zs < C
+ can be specified to set costs and the cost limit to a value
+ different from the default value:
+ - X is the cost of an insertion
+ - Y is the cost of a deletion
+ - Z is the cost of a substitution
+ - C is the maximum cost
+
+ If no count limit or cost is set for an operation, the operation
+ is not allowed at all.
+ */
+
+
+ do {
+ int done;
+ start = r;
+
+ /* Parse count limit settings */
+ done = 0;
+ if (!counts_set)
+ while (r + 1 < ctx->re_end && !done)
+ {
+ switch (*r)
+ {
+ case CHAR_PLUS: /* Insert limit */
+ DPRINT(("tre_parse: ins limit: '%.*" STRF "'\n", REST(r)));
+ r++;
+ limit_ins = tre_parse_int(&r, ctx->re_end);
+ if (limit_ins < 0)
+ limit_ins = INT_MAX;
+ counts_set = 1;
+ break;
+ case CHAR_MINUS: /* Delete limit */
+ DPRINT(("tre_parse: del limit: '%.*" STRF "'\n", REST(r)));
+ r++;
+ limit_del = tre_parse_int(&r, ctx->re_end);
+ if (limit_del < 0)
+ limit_del = INT_MAX;
+ counts_set = 1;
+ break;
+ case CHAR_HASH: /* Substitute limit */
+ DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r)));
+ r++;
+ limit_subst = tre_parse_int(&r, ctx->re_end);
+ if (limit_subst < 0)
+ limit_subst = INT_MAX;
+ counts_set = 1;
+ break;
+ case CHAR_TILDE: /* Maximum number of changes */
+ DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r)));
+ r++;
+ limit_err = tre_parse_int(&r, ctx->re_end);
+ if (limit_err < 0)
+ limit_err = INT_MAX;
+ approx = 1;
+ break;
+ case CHAR_COMMA:
+ r++;
+ break;
+ case L' ':
+ r++;
+ break;
+ case L'}':
+ done = 1;
+ break;
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+ /* Parse cost restriction equation. */
+ done = 0;
+ if (!costs_set)
+ while (r + 1 < ctx->re_end && !done)
+ {
+ switch (*r)
+ {
+ case CHAR_PLUS:
+ case L' ':
+ r++;
+ break;
+ case L'<':
+ DPRINT(("tre_parse: max cost: '%.*" STRF "'\n", REST(r)));
+ r++;
+ while (*r == L' ')
+ r++;
+ cost_max = tre_parse_int(&r, ctx->re_end);
+ if (cost_max < 0)
+ cost_max = INT_MAX;
+ else
+ cost_max--;
+ approx = 1;
+ break;
+ case CHAR_COMMA:
+ r++;
+ done = 1;
+ break;
+ default:
+ if (*r >= L'0' && *r <= L'9')
+ {
+#ifdef TRE_DEBUG
+ const tre_char_t *sr = r;
+#endif /* TRE_DEBUG */
+ int cost = tre_parse_int(&r, ctx->re_end);
+ /* XXX - make sure r is not past end. */
+ switch (*r)
+ {
+ case L'i': /* Insert cost */
+ DPRINT(("tre_parse: ins cost: '%.*" STRF "'\n",
+ REST(sr)));
+ r++;
+ cost_ins = cost;
+ costs_set = 1;
+ break;
+ case L'd': /* Delete cost */
+ DPRINT(("tre_parse: del cost: '%.*" STRF "'\n",
+ REST(sr)));
+ r++;
+ cost_del = cost;
+ costs_set = 1;
+ break;
+ case L's': /* Substitute cost */
+ DPRINT(("tre_parse: subst cost: '%.*" STRF "'\n",
+ REST(sr)));
+ r++;
+ cost_subst = cost;
+ costs_set = 1;
+ break;
+ default:
+ return REG_BADBR;
+ }
+ }
+ else
+ {
+ done = 1;
+ break;
+ }
+ }
+ }
+ } while (start != r);
+
+ /* Missing }. */
+ if (r >= ctx->re_end)
+ return REG_EBRACE;
+
+ /* Empty contents of {}. */
+ if (r == ctx->re)
+ return REG_BADBR;
+
+ /* Parse the ending '}' or '\}'.*/
+ if (ctx->cflags & REG_EXTENDED)
+ {
+ if (r >= ctx->re_end || *r != CHAR_RBRACE)
+ return REG_BADBR;
+ r++;
+ }
+ else
+ {
+ if (r + 1 >= ctx->re_end
+ || *r != CHAR_BACKSLASH
+ || *(r + 1) != CHAR_RBRACE)
+ return REG_BADBR;
+ r += 2;
+ }
+
+
+ /* Parse trailing '?' marking minimal repetition. */
+ if (r < ctx->re_end)
+ {
+ if (*r == CHAR_QUESTIONMARK)
+ {
+ minimal = !(ctx->cflags & REG_UNGREEDY);
+ r++;
+ }
+ else if (*r == CHAR_STAR || *r == CHAR_PLUS)
+ {
+ /* These are reserved for future extensions. */
+ return REG_BADRPT;
+ }
+ }
+
+ /* Create the AST node(s). */
+ if (min == 0 && max == 0)
+ {
+ *result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ if (*result == NULL)
+ return REG_ESPACE;
+ }
+ else
+ {
+ if (min < 0 && max < 0)
+ /* Only approximate parameters set, no repetitions. */
+ min = max = 1;
+
+ *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
+ if (!*result)
+ return REG_ESPACE;
+
+ /* If approximate matching parameters are set, add them to the
+ iteration node. */
+ if (approx || costs_set || counts_set)
+ {
+ int *params;
+ tre_iteration_t *iter = (*result)->obj;
+
+ if (costs_set || counts_set)
+ {
+ if (limit_ins == TRE_PARAM_UNSET)
+ {
+ if (cost_ins == TRE_PARAM_UNSET)
+ limit_ins = 0;
+ else
+ limit_ins = INT_MAX;
+ }
+
+ if (limit_del == TRE_PARAM_UNSET)
+ {
+ if (cost_del == TRE_PARAM_UNSET)
+ limit_del = 0;
+ else
+ limit_del = INT_MAX;
+ }
+
+ if (limit_subst == TRE_PARAM_UNSET)
+ {
+ if (cost_subst == TRE_PARAM_UNSET)
+ limit_subst = 0;
+ else
+ limit_subst = INT_MAX;
+ }
+ }
+
+ if (cost_max == TRE_PARAM_UNSET)
+ cost_max = INT_MAX;
+ if (limit_err == TRE_PARAM_UNSET)
+ limit_err = INT_MAX;
+
+ ctx->have_approx = 1;
+ params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST);
+ if (!params)
+ return REG_ESPACE;
+ for (i = 0; i < TRE_PARAM_LAST; i++)
+ params[i] = TRE_PARAM_UNSET;
+ params[TRE_PARAM_COST_INS] = cost_ins;
+ params[TRE_PARAM_COST_DEL] = cost_del;
+ params[TRE_PARAM_COST_SUBST] = cost_subst;
+ params[TRE_PARAM_COST_MAX] = cost_max;
+ params[TRE_PARAM_MAX_INS] = limit_ins;
+ params[TRE_PARAM_MAX_DEL] = limit_del;
+ params[TRE_PARAM_MAX_SUBST] = limit_subst;
+ params[TRE_PARAM_MAX_ERR] = limit_err;
+ iter->params = params;
+ }
+ }
+
+ DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], "
+ "limits [%d,%d,%d, total %d]\n",
+ min, max, cost_ins, cost_del, cost_subst, cost_max,
+ limit_ins, limit_del, limit_subst, limit_err));
+
+
+ ctx->re = r;
+ return REG_OK;
+}
+
+typedef enum {
+ PARSE_RE = 0,
+ PARSE_ATOM,
+ PARSE_MARK_FOR_SUBMATCH,
+ PARSE_BRANCH,
+ PARSE_PIECE,
+ PARSE_CATENATION,
+ PARSE_POST_CATENATION,
+ PARSE_UNION,
+ PARSE_POST_UNION,
+ PARSE_POSTFIX,
+ PARSE_RESTORE_CFLAGS
+} tre_parse_re_stack_symbol_t;
+
+
+reg_errcode_t
+tre_parse(tre_parse_ctx_t *ctx)
+{
+ tre_ast_node_t *result = NULL;
+ tre_parse_re_stack_symbol_t symbol;
+ reg_errcode_t status = REG_OK;
+ tre_stack_t *stack = ctx->stack;
+ int bottom = tre_stack_num_objects(stack);
+ int depth = 0;
+ int temporary_cflags = 0;
+
+ DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d\n",
+ ctx->len, ctx->re, ctx->len));
+
+ if (!ctx->nofirstsub)
+ {
+ STACK_PUSH(stack, int, ctx->submatch_id);
+ STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
+ ctx->submatch_id++;
+ }
+ STACK_PUSH(stack, int, PARSE_RE);
+ ctx->re_start = ctx->re;
+ ctx->re_end = ctx->re + ctx->len;
+
+
+ /* The following is basically just a recursive descent parser. I use
+ an explicit stack instead of recursive functions mostly because of
+ two reasons: compatibility with systems which have an overflowable
+ call stack, and efficiency (both in lines of code and speed). */
+ while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
+ {
+ if (status != REG_OK)
+ break;
+ symbol = tre_stack_pop_int(stack);
+ switch (symbol)
+ {
+ case PARSE_RE:
+ /* Parse a full regexp. A regexp is one or more branches,
+ separated by the union operator `|'. */
+#ifdef REG_LITERAL
+ if (!(ctx->cflags & REG_LITERAL)
+ && ctx->cflags & REG_EXTENDED)
+#endif /* REG_LITERAL */
+ STACK_PUSHX(stack, int, PARSE_UNION);
+ STACK_PUSHX(stack, int, PARSE_BRANCH);
+ break;
+
+ case PARSE_BRANCH:
+ /* Parse a branch. A branch is one or more pieces, concatenated.
+ A piece is an atom possibly followed by a postfix operator. */
+ STACK_PUSHX(stack, int, PARSE_CATENATION);
+ STACK_PUSHX(stack, int, PARSE_PIECE);
+ break;
+
+ case PARSE_PIECE:
+ /* Parse a piece. A piece is an atom possibly followed by one
+ or more postfix operators. */
+#ifdef REG_LITERAL
+ if (!(ctx->cflags & REG_LITERAL))
+#endif /* REG_LITERAL */
+ STACK_PUSHX(stack, int, PARSE_POSTFIX);
+ STACK_PUSHX(stack, int, PARSE_ATOM);
+ break;
+
+ case PARSE_CATENATION:
+ /* If the expression has not ended, parse another piece. */
+ {
+ tre_char_t c;
+ if (ctx->re >= ctx->re_end)
+ break;
+ c = *ctx->re;
+#ifdef REG_LITERAL
+ if (!(ctx->cflags & REG_LITERAL))
+ {
+#endif /* REG_LITERAL */
+ if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE)
+ break;
+ if ((ctx->cflags & REG_EXTENDED
+ && c == CHAR_RPAREN && depth > 0)
+ || (!(ctx->cflags & REG_EXTENDED)
+ && (c == CHAR_BACKSLASH
+ && *(ctx->re + 1) == CHAR_RPAREN)))
+ {
+ if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
+ status = REG_EPAREN;
+ DPRINT(("tre_parse: group end: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ depth--;
+ if (!(ctx->cflags & REG_EXTENDED))
+ ctx->re += 2;
+ break;
+ }
+#ifdef REG_LITERAL
+ }
+#endif /* REG_LITERAL */
+
+#ifdef REG_RIGHT_ASSOC
+ if (ctx->cflags & REG_RIGHT_ASSOC)
+ {
+ /* Right associative concatenation. */
+ STACK_PUSHX(stack, voidptr, result);
+ STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
+ STACK_PUSHX(stack, int, PARSE_CATENATION);
+ STACK_PUSHX(stack, int, PARSE_PIECE);
+ }
+ else
+#endif /* REG_RIGHT_ASSOC */
+ {
+ /* Default case, left associative concatenation. */
+ STACK_PUSHX(stack, int, PARSE_CATENATION);
+ STACK_PUSHX(stack, voidptr, result);
+ STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
+ STACK_PUSHX(stack, int, PARSE_PIECE);
+ }
+ break;
+ }
+
+ case PARSE_POST_CATENATION:
+ {
+ tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+ tre_ast_node_t *tmp_node;
+ tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
+ if (!tmp_node)
+ return REG_ESPACE;
+ result = tmp_node;
+ break;
+ }
+
+ case PARSE_UNION:
+ if (ctx->re >= ctx->re_end)
+ break;
+#ifdef REG_LITERAL
+ if (ctx->cflags & REG_LITERAL)
+ break;
+#endif /* REG_LITERAL */
+ switch (*ctx->re)
+ {
+ case CHAR_PIPE:
+ DPRINT(("tre_parse: union: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ STACK_PUSHX(stack, int, PARSE_UNION);
+ STACK_PUSHX(stack, voidptr, result);
+ STACK_PUSHX(stack, int, PARSE_POST_UNION);
+ STACK_PUSHX(stack, int, PARSE_BRANCH);
+ ctx->re++;
+ break;
+
+ case CHAR_RPAREN:
+ ctx->re++;
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case PARSE_POST_UNION:
+ {
+ tre_ast_node_t *tmp_node;
+ tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+ tmp_node = tre_ast_new_union(ctx->mem, tree, result);
+ if (!tmp_node)
+ return REG_ESPACE;
+ result = tmp_node;
+ break;
+ }
+
+ case PARSE_POSTFIX:
+ /* Parse postfix operators. */
+ if (ctx->re >= ctx->re_end)
+ break;
+#ifdef REG_LITERAL
+ if (ctx->cflags & REG_LITERAL)
+ break;
+#endif /* REG_LITERAL */
+ switch (*ctx->re)
+ {
+ case CHAR_PLUS:
+ case CHAR_QUESTIONMARK:
+ if (!(ctx->cflags & REG_EXTENDED))
+ break;
+ /*FALLTHROUGH*/
+ case CHAR_STAR:
+ {
+ tre_ast_node_t *tmp_node;
+ int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
+ int rep_min = 0;
+ int rep_max = -1;
+#ifdef TRE_DEBUG
+ const tre_char_t *tmp_re;
+#endif
+
+ if (*ctx->re == CHAR_PLUS)
+ rep_min = 1;
+ if (*ctx->re == CHAR_QUESTIONMARK)
+ rep_max = 1;
+#ifdef TRE_DEBUG
+ tmp_re = ctx->re;
+#endif
+
+ if (ctx->re + 1 < ctx->re_end)
+ {
+ if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
+ {
+ minimal = !(ctx->cflags & REG_UNGREEDY);
+ ctx->re++;
+ }
+ else if (*(ctx->re + 1) == CHAR_STAR
+ || *(ctx->re + 1) == CHAR_PLUS)
+ {
+ /* These are reserved for future extensions. */
+ return REG_BADRPT;
+ }
+ }
+
+ DPRINT(("tre_parse: %s star: '%.*" STRF "'\n",
+ minimal ? " minimal" : "greedy", REST(tmp_re)));
+ ctx->re++;
+ tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
+ minimal);
+ if (tmp_node == NULL)
+ return REG_ESPACE;
+ result = tmp_node;
+ STACK_PUSHX(stack, int, PARSE_POSTFIX);
+ }
+ break;
+
+ case CHAR_BACKSLASH:
+ /* "\{" is special without REG_EXTENDED */
+ if (!(ctx->cflags & REG_EXTENDED)
+ && ctx->re + 1 < ctx->re_end
+ && *(ctx->re + 1) == CHAR_LBRACE)
+ {
+ ctx->re++;
+ goto parse_brace;
+ }
+ else
+ break;
+
+ case CHAR_LBRACE:
+ /* "{" is literal without REG_EXTENDED */
+ if (!(ctx->cflags & REG_EXTENDED))
+ break;
+
+ parse_brace:
+ DPRINT(("tre_parse: bound: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ ctx->re++;
+
+ status = tre_parse_bound(ctx, &result);
+ if (status != REG_OK)
+ return status;
+ STACK_PUSHX(stack, int, PARSE_POSTFIX);
+ break;
+ }
+ break;
+
+ case PARSE_ATOM:
+ /* Parse an atom. An atom is a regular expression enclosed in `()',
+ an empty set of `()', a bracket expression, `.', `^', `$',
+ a `\' followed by a character, or a single character. */
+
+ /* End of regexp? (empty string). */
+ if (ctx->re >= ctx->re_end)
+ goto parse_literal;
+
+#ifdef REG_LITERAL
+ if (ctx->cflags & REG_LITERAL)
+ goto parse_literal;
+#endif /* REG_LITERAL */
+
+ switch (*ctx->re)
+ {
+ case CHAR_LPAREN: /* parenthesized subexpression */
+
+ /* Handle "(?...)" extensions. They work in a way similar
+ to Perls corresponding extensions. */
+ if (ctx->cflags & REG_EXTENDED
+ && *(ctx->re + 1) == CHAR_QUESTIONMARK)
+ {
+ int new_cflags = ctx->cflags;
+ int bit = 1;
+ DPRINT(("tre_parse: extension: '%.*" STRF "\n",
+ REST(ctx->re)));
+ ctx->re += 2;
+ while (/*CONSTCOND*/1)
+ {
+ if (*ctx->re == L'i')
+ {
+ DPRINT(("tre_parse: icase: '%.*" STRF "\n",
+ REST(ctx->re)));
+ if (bit)
+ new_cflags |= REG_ICASE;
+ else
+ new_cflags &= ~REG_ICASE;
+ ctx->re++;
+ }
+ else if (*ctx->re == L'n')
+ {
+ DPRINT(("tre_parse: newline: '%.*" STRF "\n",
+ REST(ctx->re)));
+ if (bit)
+ new_cflags |= REG_NEWLINE;
+ else
+ new_cflags &= ~REG_NEWLINE;
+ ctx->re++;
+ }
+#ifdef REG_RIGHT_ASSOC
+ else if (*ctx->re == L'r')
+ {
+ DPRINT(("tre_parse: right assoc: '%.*" STRF "\n",
+ REST(ctx->re)));
+ if (bit)
+ new_cflags |= REG_RIGHT_ASSOC;
+ else
+ new_cflags &= ~REG_RIGHT_ASSOC;
+ ctx->re++;
+ }
+#endif /* REG_RIGHT_ASSOC */
+#ifdef REG_UNGREEDY
+ else if (*ctx->re == L'U')
+ {
+ DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n",
+ REST(ctx->re)));
+ if (bit)
+ new_cflags |= REG_UNGREEDY;
+ else
+ new_cflags &= ~REG_UNGREEDY;
+ ctx->re++;
+ }
+#endif /* REG_UNGREEDY */
+ else if (*ctx->re == CHAR_MINUS)
+ {
+ DPRINT(("tre_parse: turn off: '%.*" STRF "\n",
+ REST(ctx->re)));
+ ctx->re++;
+ bit = 0;
+ }
+ else if (*ctx->re == CHAR_COLON)
+ {
+ DPRINT(("tre_parse: no group: '%.*" STRF "\n",
+ REST(ctx->re)));
+ ctx->re++;
+ depth++;
+ break;
+ }
+ else if (*ctx->re == CHAR_HASH)
+ {
+ DPRINT(("tre_parse: comment: '%.*" STRF "\n",
+ REST(ctx->re)));
+ /* A comment can contain any character except a
+ right parenthesis */
+ while (*ctx->re != CHAR_RPAREN
+ && ctx->re < ctx->re_end)
+ ctx->re++;
+ if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end)
+ {
+ ctx->re++;
+ break;
+ }
+ else
+ return REG_BADPAT;
+ }
+ else if (*ctx->re == CHAR_RPAREN)
+ {
+ ctx->re++;
+ break;
+ }
+ else
+ return REG_BADPAT;
+ }
+
+ /* Turn on the cflags changes for the rest of the
+ enclosing group. */
+ STACK_PUSHX(stack, int, ctx->cflags);
+ STACK_PUSHX(stack, int, PARSE_RESTORE_CFLAGS);
+ STACK_PUSHX(stack, int, PARSE_RE);
+ ctx->cflags = new_cflags;
+ break;
+ }
+
+ if (ctx->cflags & REG_EXTENDED
+ || (ctx->re > ctx->re_start
+ && *(ctx->re - 1) == CHAR_BACKSLASH))
+ {
+ depth++;
+ if (ctx->re + 2 < ctx->re_end
+ && *(ctx->re + 1) == CHAR_QUESTIONMARK
+ && *(ctx->re + 2) == CHAR_COLON)
+ {
+ DPRINT(("tre_parse: group begin: '%.*" STRF
+ "', no submatch\n", REST(ctx->re)));
+ /* Don't mark for submatching. */
+ ctx->re += 3;
+ STACK_PUSHX(stack, int, PARSE_RE);
+ }
+ else
+ {
+ DPRINT(("tre_parse: group begin: '%.*" STRF
+ "', submatch %d\n", REST(ctx->re),
+ ctx->submatch_id));
+ ctx->re++;
+ /* First parse a whole RE, then mark the resulting tree
+ for submatching. */
+ STACK_PUSHX(stack, int, ctx->submatch_id);
+ STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
+ STACK_PUSHX(stack, int, PARSE_RE);
+ ctx->submatch_id++;
+ }
+ }
+ else
+ goto parse_literal;
+ break;
+
+ case CHAR_RPAREN: /* end of current subexpression */
+ if ((ctx->cflags & REG_EXTENDED && depth > 0)
+ || (ctx->re > ctx->re_start
+ && *(ctx->re - 1) == CHAR_BACKSLASH))
+ {
+ DPRINT(("tre_parse: empty: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ /* We were expecting an atom, but instead the current
+ subexpression was closed. POSIX leaves the meaning of
+ this to be implementation-defined. We interpret this as
+ an empty expression (which matches an empty string). */
+ result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ if (result == NULL)
+ return REG_ESPACE;
+ if (!(ctx->cflags & REG_EXTENDED))
+ ctx->re--;
+ }
+ else
+ goto parse_literal;
+ break;
+
+ case CHAR_LBRACKET: /* bracket expression */
+ DPRINT(("tre_parse: bracket: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ ctx->re++;
+ status = tre_parse_bracket(ctx, &result);
+ if (status != REG_OK)
+ return status;
+ break;
+
+ case CHAR_BACKSLASH:
+ /* If this is "\(" or "\)" chew off the backslash and
+ try again. */
+ if (!(ctx->cflags & REG_EXTENDED)
+ && ctx->re + 1 < ctx->re_end
+ && (*(ctx->re + 1) == CHAR_LPAREN
+ || *(ctx->re + 1) == CHAR_RPAREN))
+ {
+ ctx->re++;
+ STACK_PUSHX(stack, int, PARSE_ATOM);
+ break;
+ }
+
+ /* If a macro is used, parse the expanded macro recursively. */
+ {
+ tre_char_t buf[64];
+ tre_expand_macro(ctx->re + 1, ctx->re_end,
+ buf, elementsof(buf));
+ if (buf[0] != 0)
+ {
+ tre_parse_ctx_t subctx;
+ memcpy(&subctx, ctx, sizeof(subctx));
+ subctx.re = buf;
+ subctx.len = tre_strlen(buf);
+ subctx.nofirstsub = 1;
+ status = tre_parse(&subctx);
+ if (status != REG_OK)
+ return status;
+ ctx->re += 2;
+ ctx->position = subctx.position;
+ result = subctx.result;
+ break;
+ }
+ }
+
+ if (ctx->re + 1 >= ctx->re_end)
+ /* Trailing backslash. */
+ return REG_EESCAPE;
+
+#ifdef REG_LITERAL
+ if (*(ctx->re + 1) == L'Q')
+ {
+ DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ ctx->cflags |= REG_LITERAL;
+ temporary_cflags |= REG_LITERAL;
+ ctx->re += 2;
+ STACK_PUSHX(stack, int, PARSE_ATOM);
+ break;
+ }
+#endif /* REG_LITERAL */
+
+ DPRINT(("tre_parse: bleep: '%.*" STRF "'\n", REST(ctx->re)));
+ ctx->re++;
+ switch (*ctx->re)
+ {
+ case L'b':
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
+ ASSERT_AT_WB, -1);
+ ctx->re++;
+ break;
+ case L'B':
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
+ ASSERT_AT_WB_NEG, -1);
+ ctx->re++;
+ break;
+ case L'<':
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
+ ASSERT_AT_BOW, -1);
+ ctx->re++;
+ break;
+ case L'>':
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
+ ASSERT_AT_EOW, -1);
+ ctx->re++;
+ break;
+ case L'x':
+ ctx->re++;
+ if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end)
+ {
+ /* 8 bit hex char. */
+ char tmp[3] = {0, 0, 0};
+ long val;
+ DPRINT(("tre_parse: 8 bit hex: '%.*" STRF "'\n",
+ REST(ctx->re - 2)));
+
+ if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
+ {
+ tmp[0] = (char)ctx->re[0];
+ ctx->re++;
+ }
+ if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
+ {
+ tmp[1] = (char)ctx->re[0];
+ ctx->re++;
+ }
+ val = strtol(tmp, NULL, 16);
+ result = tre_ast_new_literal(ctx->mem, (int)val,
+ (int)val, ctx->position);
+ ctx->position++;
+ break;
+ }
+ else if (ctx->re < ctx->re_end)
+ {
+ /* Wide char. */
+ char tmp[32];
+ long val;
+ int i = 0;
+ ctx->re++;
+ while (ctx->re_end - ctx->re >= 0)
+ {
+ if (ctx->re[0] == CHAR_RBRACE)
+ break;
+ if (tre_isxdigit(ctx->re[0]))
+ {
+ tmp[i] = (char)ctx->re[0];
+ i++;
+ ctx->re++;
+ continue;
+ }
+ return REG_EBRACE;
+ }
+ ctx->re++;
+ tmp[i] = 0;
+ val = strtol(tmp, NULL, 16);
+ result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
+ ctx->position);
+ ctx->position++;
+ break;
+ }
+ /*FALLTHROUGH*/
+
+ default:
+ if (tre_isdigit(*ctx->re))
+ {
+ /* Back reference. */
+ int val = *ctx->re - L'0';
+ DPRINT(("tre_parse: backref: '%.*" STRF "'\n",
+ REST(ctx->re - 1)));
+ result = tre_ast_new_literal(ctx->mem, BACKREF, val,
+ ctx->position);
+ if (result == NULL)
+ return REG_ESPACE;
+ ctx->position++;
+ ctx->max_backref = MAX(val, ctx->max_backref);
+ ctx->re++;
+ }
+ else
+ {
+ /* Escaped character. */
+ DPRINT(("tre_parse: escaped: '%.*" STRF "'\n",
+ REST(ctx->re - 1)));
+ result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
+ ctx->position);
+ ctx->position++;
+ ctx->re++;
+ }
+ break;
+ }
+ if (result == NULL)
+ return REG_ESPACE;
+ break;
+
+ case CHAR_PERIOD: /* the any-symbol */
+ DPRINT(("tre_parse: any: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ if (ctx->cflags & REG_NEWLINE)
+ {
+ tre_ast_node_t *tmp1;
+ tre_ast_node_t *tmp2;
+ tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1,
+ ctx->position);
+ if (!tmp1)
+ return REG_ESPACE;
+ tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX,
+ ctx->position + 1);
+ if (!tmp2)
+ return REG_ESPACE;
+ result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+ if (!result)
+ return REG_ESPACE;
+ ctx->position += 2;
+ }
+ else
+ {
+ result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
+ ctx->position);
+ if (!result)
+ return REG_ESPACE;
+ ctx->position++;
+ }
+ ctx->re++;
+ break;
+
+ case CHAR_CARET: /* beginning of line assertion */
+ /* '^' has a special meaning everywhere in EREs, and in the
+ beginning of the RE and after \( is BREs. */
+ if (ctx->cflags & REG_EXTENDED
+ || (ctx->re - 2 >= ctx->re_start
+ && *(ctx->re - 2) == CHAR_BACKSLASH
+ && *(ctx->re - 1) == CHAR_LPAREN)
+ || ctx->re == ctx->re_start)
+ {
+ DPRINT(("tre_parse: BOL: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
+ ASSERT_AT_BOL, -1);
+ if (result == NULL)
+ return REG_ESPACE;
+ ctx->re++;
+ }
+ else
+ goto parse_literal;
+ break;
+
+ case CHAR_DOLLAR: /* end of line assertion. */
+ /* '$' is special everywhere in EREs, and in the end of the
+ string and before \) is BREs. */
+ if (ctx->cflags & REG_EXTENDED
+ || (ctx->re + 2 < ctx->re_end
+ && *(ctx->re + 1) == CHAR_BACKSLASH
+ && *(ctx->re + 2) == CHAR_RPAREN)
+ || ctx->re + 1 == ctx->re_end)
+ {
+ DPRINT(("tre_parse: EOL: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ result = tre_ast_new_literal(ctx->mem, ASSERTION,
+ ASSERT_AT_EOL, -1);
+ if (result == NULL)
+ return REG_ESPACE;
+ ctx->re++;
+ }
+ else
+ goto parse_literal;
+ break;
+
+ default:
+ parse_literal:
+
+ if (temporary_cflags && ctx->re + 1 < ctx->re_end
+ && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E')
+ {
+ DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ ctx->cflags &= ~temporary_cflags;
+ temporary_cflags = 0;
+ ctx->re += 2;
+ STACK_PUSHX(stack, int, PARSE_PIECE);
+ break;
+ }
+
+
+ /* We are expecting an atom. If the subexpression (or the whole
+ regexp ends here, we interpret it as an empty expression
+ (which matches an empty string). */
+ if (
+#ifdef REG_LITERAL
+ !(ctx->cflags & REG_LITERAL) &&
+#endif /* REG_LITERAL */
+ (ctx->re >= ctx->re_end
+ || *ctx->re == CHAR_STAR
+ || (ctx->cflags & REG_EXTENDED
+ && (*ctx->re == CHAR_PIPE
+ || *ctx->re == CHAR_LBRACE
+ || *ctx->re == CHAR_PLUS
+ || *ctx->re == CHAR_QUESTIONMARK))
+ /* Test for "\)" in BRE mode. */
+ || (!(ctx->cflags & REG_EXTENDED)
+ && ctx->re + 1 < ctx->re_end
+ && *ctx->re == CHAR_BACKSLASH
+ && *(ctx->re + 1) == CHAR_LBRACE)))
+ {
+ DPRINT(("tre_parse: empty: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ if (!result)
+ return REG_ESPACE;
+ break;
+ }
+
+ DPRINT(("tre_parse: literal: '%.*" STRF "'\n",
+ REST(ctx->re)));
+ /* Note that we can't use an tre_isalpha() test here, since there
+ may be characters which are alphabetic but neither upper or
+ lower case. */
+ if (ctx->cflags & REG_ICASE
+ && (tre_isupper(*ctx->re) || tre_islower(*ctx->re)))
+ {
+ tre_ast_node_t *tmp1;
+ tre_ast_node_t *tmp2;
+
+ /* XXX - Can there be more than one opposite-case
+ counterpoints for some character in some locale? Or
+ more than two characters which all should be regarded
+ the same character if case is ignored? If yes, there
+ does not seem to be a portable way to detect it. I guess
+ that at least for multi-character collating elements there
+ could be several opposite-case counterpoints, but they
+ cannot be supported portably anyway. */
+ tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re),
+ tre_toupper(*ctx->re),
+ ctx->position);
+ if (!tmp1)
+ return REG_ESPACE;
+ tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re),
+ tre_tolower(*ctx->re),
+ ctx->position);
+ if (!tmp2)
+ return REG_ESPACE;
+ result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+ if (!result)
+ return REG_ESPACE;
+ }
+ else
+ {
+ result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
+ ctx->position);
+ if (!result)
+ return REG_ESPACE;
+ }
+ ctx->position++;
+ ctx->re++;
+ break;
+ }
+ break;
+
+ case PARSE_MARK_FOR_SUBMATCH:
+ {
+ int submatch_id = tre_stack_pop_int(stack);
+
+ if (result->submatch_id >= 0)
+ {
+ tre_ast_node_t *n, *tmp_node;
+ n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ if (n == NULL)
+ return REG_ESPACE;
+ tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
+ if (tmp_node == NULL)
+ return REG_ESPACE;
+ tmp_node->num_submatches = result->num_submatches;
+ result = tmp_node;
+ }
+ result->submatch_id = submatch_id;
+ result->num_submatches++;
+ break;
+ }
+
+ case PARSE_RESTORE_CFLAGS:
+ ctx->cflags = tre_stack_pop_int(stack);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ /* Check for missing closing parentheses. */
+ if (depth > 0)
+ return REG_EPAREN;
+
+ if (status == REG_OK)
+ ctx->result = result;
+
+ return status;
+}
+
+/* EOF */
diff --git a/lib/tre-parse.h b/lib/tre-parse.h
new file mode 100644
index 0000000000000..8cfedf15f7bc7
--- /dev/null
+++ b/lib/tre-parse.h
@@ -0,0 +1,50 @@
+/*
+ tre-parse.c - Regexp parser definitions
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifndef TRE_PARSE_H
+#define TRE_PARSE_H 1
+
+/* Parse context. */
+typedef struct {
+ /* Memory allocator. The AST is allocated using this. */
+ tre_mem_t mem;
+ /* Stack used for keeping track of regexp syntax. */
+ tre_stack_t *stack;
+ /* The parse result. */
+ tre_ast_node_t *result;
+ /* The regexp to parse and its length. */
+ const tre_char_t *re;
+ /* The first character of the entire regexp. */
+ const tre_char_t *re_start;
+ /* The first character after the end of the regexp. */
+ const tre_char_t *re_end;
+ int len;
+ /* Current submatch ID. */
+ int submatch_id;
+ /* Current position (number of literal). */
+ int position;
+ /* The highest back reference or -1 if none seen so far. */
+ int max_backref;
+ /* This flag is set if the regexp uses approximate matching. */
+ int have_approx;
+ /* Compilation flags. */
+ int cflags;
+ /* If this flag is set the top-level submatch is not captured. */
+ int nofirstsub;
+ /* The currently set approximate matching parameters. */
+ int params[TRE_PARAM_LAST];
+} tre_parse_ctx_t;
+
+/* Parses a wide character regexp pattern into a syntax tree. This parser
+ handles both syntaxes (BRE and ERE), including the TRE extensions. */
+reg_errcode_t
+tre_parse(tre_parse_ctx_t *ctx);
+
+#endif /* TRE_PARSE_H */
+
+/* EOF */
diff --git a/lib/tre-stack.c b/lib/tre-stack.c
new file mode 100644
index 0000000000000..8ad89c8ce65ed
--- /dev/null
+++ b/lib/tre-stack.c
@@ -0,0 +1,125 @@
+/*
+ tre-stack.c - Simple stack implementation
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <stdlib.h>
+#include <assert.h>
+
+#include "tre-internal.h"
+#include "tre-stack.h"
+#include "xmalloc.h"
+
+union tre_stack_item {
+ void *voidptr_value;
+ int int_value;
+};
+
+struct tre_stack_rec {
+ int size;
+ int max_size;
+ int increment;
+ int ptr;
+ union tre_stack_item *stack;
+};
+
+
+tre_stack_t *
+tre_stack_new(int size, int max_size, int increment)
+{
+ tre_stack_t *s;
+
+ s = xmalloc(sizeof(*s));
+ if (s != NULL)
+ {
+ s->stack = xmalloc(sizeof(*s->stack) * size);
+ if (s->stack == NULL)
+ {
+ xfree(s);
+ return NULL;
+ }
+ s->size = size;
+ s->max_size = max_size;
+ s->increment = increment;
+ s->ptr = 0;
+ }
+ return s;
+}
+
+void
+tre_stack_destroy(tre_stack_t *s)
+{
+ xfree(s->stack);
+ xfree(s);
+}
+
+int
+tre_stack_num_objects(tre_stack_t *s)
+{
+ return s->ptr;
+}
+
+static reg_errcode_t
+tre_stack_push(tre_stack_t *s, union tre_stack_item value)
+{
+ if (s->ptr < s->size)
+ {
+ s->stack[s->ptr] = value;
+ s->ptr++;
+ }
+ else
+ {
+ if (s->size >= s->max_size)
+ {
+ DPRINT(("tre_stack_push: stack full\n"));
+ return REG_ESPACE;
+ }
+ else
+ {
+ union tre_stack_item *new_buffer;
+ int new_size;
+ DPRINT(("tre_stack_push: trying to realloc more space\n"));
+ new_size = s->size + s->increment;
+ if (new_size > s->max_size)
+ new_size = s->max_size;
+ new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
+ if (new_buffer == NULL)
+ {
+ DPRINT(("tre_stack_push: realloc failed.\n"));
+ return REG_ESPACE;
+ }
+ DPRINT(("tre_stack_push: realloc succeeded.\n"));
+ assert(new_size > s->size);
+ s->size = new_size;
+ s->stack = new_buffer;
+ tre_stack_push(s, value);
+ }
+ }
+ return REG_OK;
+}
+
+#define define_pushf(typetag, type) \
+ declare_pushf(typetag, type) { \
+ union tre_stack_item item; \
+ item.typetag ## _value = value; \
+ return tre_stack_push(s, item); \
+}
+
+define_pushf(int, int)
+define_pushf(voidptr, void *)
+
+#define define_popf(typetag, type) \
+ declare_popf(typetag, type) { \
+ return s->stack[--s->ptr].typetag ## _value; \
+ }
+
+define_popf(int, int)
+define_popf(voidptr, void *)
+
+/* EOF */
diff --git a/lib/tre-stack.h b/lib/tre-stack.h
new file mode 100644
index 0000000000000..7d11c5e2d1683
--- /dev/null
+++ b/lib/tre-stack.h
@@ -0,0 +1,76 @@
+/*
+ tre-stack.h: Stack definitions
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+
+#ifndef TRE_STACK_H
+#define TRE_STACK_H 1
+
+#include "tre.h"
+
+typedef struct tre_stack_rec tre_stack_t;
+
+/* Creates a new stack object. `size' is initial size in bytes, `max_size'
+ is maximum size, and `increment' specifies how much more space will be
+ allocated with realloc() if all space gets used up. Returns the stack
+ object or NULL if out of memory. */
+tre_stack_t *
+tre_stack_new(int size, int max_size, int increment);
+
+/* Frees the stack object. */
+void
+tre_stack_destroy(tre_stack_t *s);
+
+/* Returns the current number of objects in the stack. */
+int
+tre_stack_num_objects(tre_stack_t *s);
+
+/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
+ `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
+ This tries to realloc() more space before failing if maximum size
+ has not yet been reached. Returns REG_OK if successful. */
+#define declare_pushf(typetag, type) \
+ reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
+
+declare_pushf(voidptr, void *);
+declare_pushf(int, int);
+
+/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
+ element off of stack `s' and returns it. The stack must not be
+ empty. */
+#define declare_popf(typetag, type) \
+ type tre_stack_pop_ ## typetag(tre_stack_t *s)
+
+declare_popf(voidptr, void *);
+declare_popf(int, int);
+
+/* Just to save some typing. */
+#define STACK_PUSH(s, typetag, value) \
+ do \
+ { \
+ status = tre_stack_push_ ## typetag(s, value); \
+ } \
+ while (/*CONSTCOND*/0)
+
+#define STACK_PUSHX(s, typetag, value) \
+ { \
+ status = tre_stack_push_ ## typetag(s, value); \
+ if (status != REG_OK) \
+ break; \
+ }
+
+#define STACK_PUSHR(s, typetag, value) \
+ { \
+ reg_errcode_t _status; \
+ _status = tre_stack_push_ ## typetag(s, value); \
+ if (_status != REG_OK) \
+ return _status; \
+ }
+
+#endif /* TRE_STACK_H */
+
+/* EOF */
diff --git a/lib/tre.h b/lib/tre.h
new file mode 100644
index 0000000000000..ace34824a627a
--- /dev/null
+++ b/lib/tre.h
@@ -0,0 +1,276 @@
+/*
+ tre.h - TRE public API definitions
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifndef TRE_H
+#define TRE_H 1
+
+#include "tre-config.h"
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif /* HAVE_SYS_TYPES_H */
+
+#ifdef HAVE_LIBUTF8_H
+#include <libutf8.h>
+#endif /* HAVE_LIBUTF8_H */
+
+#ifdef TRE_USE_SYSTEM_REGEX_H
+/* Include the system regex.h to make TRE ABI compatible with the
+ system regex. */
+#include TRE_SYSTEM_REGEX_H_PATH
+#define tre_regcomp regcomp
+#define tre_regexec regexec
+#define tre_regerror regerror
+#define tre_regfree regfree
+#endif /* TRE_USE_SYSTEM_REGEX_H */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef TRE_USE_SYSTEM_REGEX_H
+
+#ifndef REG_OK
+#define REG_OK 0
+#endif /* !REG_OK */
+
+#ifndef HAVE_REG_ERRCODE_T
+typedef int reg_errcode_t;
+#endif /* !HAVE_REG_ERRCODE_T */
+
+#if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
+#define REG_LITERAL 0x1000
+#endif
+
+/* Extra tre_regcomp() flags. */
+#ifndef REG_BASIC
+#define REG_BASIC 0
+#endif /* !REG_BASIC */
+#define REG_RIGHT_ASSOC (REG_LITERAL << 1)
+#define REG_UNGREEDY (REG_RIGHT_ASSOC << 1)
+
+/* Extra tre_regexec() flags. */
+#define REG_APPROX_MATCHER 0x1000
+#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
+
+#else /* !TRE_USE_SYSTEM_REGEX_H */
+
+/* If the we're not using system regex.h, we need to define the
+ structs and enums ourselves. */
+
+typedef int regoff_t;
+typedef struct {
+ size_t re_nsub; /* Number of parenthesized subexpressions. */
+ void *value; /* For internal use only. */
+} regex_t;
+
+typedef struct {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+} regmatch_t;
+
+
+typedef enum {
+ REG_OK = 0, /* No error. */
+ /* POSIX tre_regcomp() return error codes. (In the order listed in the
+ standard.) */
+ REG_NOMATCH, /* No match. */
+ REG_BADPAT, /* Invalid regexp. */
+ REG_ECOLLATE, /* Unknown collating element. */
+ REG_ECTYPE, /* Unknown character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* "[]" imbalance */
+ REG_EPAREN, /* "\(\)" or "()" imbalance */
+ REG_EBRACE, /* "\{\}" or "{}" imbalance */
+ REG_BADBR, /* Invalid content of {} */
+ REG_ERANGE, /* Invalid use of range operator */
+ REG_ESPACE, /* Out of memory. */
+ REG_BADRPT /* Invalid use of repetition operators. */
+} reg_errcode_t;
+
+/* POSIX tre_regcomp() flags. */
+#define REG_EXTENDED 1
+#define REG_ICASE (REG_EXTENDED << 1)
+#define REG_NEWLINE (REG_ICASE << 1)
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+/* Extra tre_regcomp() flags. */
+#define REG_BASIC 0
+#define REG_LITERAL (REG_NOSUB << 1)
+#define REG_RIGHT_ASSOC (REG_LITERAL << 1)
+#define REG_UNGREEDY (REG_RIGHT_ASSOC << 1)
+
+/* POSIX tre_regexec() flags. */
+#define REG_NOTBOL 1
+#define REG_NOTEOL (REG_NOTBOL << 1)
+
+/* Extra tre_regexec() flags. */
+#define REG_APPROX_MATCHER (REG_NOTEOL << 1)
+#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
+
+#endif /* !TRE_USE_SYSTEM_REGEX_H */
+
+/* REG_NOSPEC and REG_LITERAL mean the same thing. */
+#if defined(REG_LITERAL) && !defined(REG_NOSPEC)
+#define REG_NOSPEC REG_LITERAL
+#elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
+#define REG_LITERAL REG_NOSPEC
+#endif /* defined(REG_NOSPEC) */
+
+/* The maximum number of iterations in a bound expression. */
+#undef RE_DUP_MAX
+#define RE_DUP_MAX 255
+
+/* The POSIX.2 regexp functions */
+extern int
+tre_regcomp(regex_t *preg, const char *regex, int cflags);
+
+extern int
+tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags);
+
+extern size_t
+tre_regerror(int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size);
+
+extern void
+tre_regfree(regex_t *preg);
+
+#ifdef TRE_WCHAR
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+
+/* Wide character versions (not in POSIX.2). */
+extern int
+tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
+
+extern int
+tre_regwexec(const regex_t *preg, const wchar_t *string,
+ size_t nmatch, regmatch_t pmatch[], int eflags);
+#endif /* TRE_WCHAR */
+
+/* Versions with a maximum length argument and therefore the capability to
+ handle null characters in the middle of the strings (not in POSIX.2). */
+extern int
+tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
+
+extern int
+tre_regnexec(const regex_t *preg, const char *string, size_t len,
+ size_t nmatch, regmatch_t pmatch[], int eflags);
+
+#ifdef TRE_WCHAR
+extern int
+tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
+
+extern int
+tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
+ size_t nmatch, regmatch_t pmatch[], int eflags);
+#endif /* TRE_WCHAR */
+
+#ifdef TRE_APPROX
+
+/* Approximate matching parameter struct. */
+typedef struct {
+ int cost_ins; /* Default cost of an inserted character. */
+ int cost_del; /* Default cost of a deleted character. */
+ int cost_subst; /* Default cost of a substituted character. */
+ int max_cost; /* Maximum allowed cost of a match. */
+
+ int max_ins; /* Maximum allowed number of inserts. */
+ int max_del; /* Maximum allowed number of deletes. */
+ int max_subst; /* Maximum allowed number of substitutes. */
+ int max_err; /* Maximum allowed number of errors total. */
+} regaparams_t;
+
+/* Approximate matching result struct. */
+typedef struct {
+ size_t nmatch; /* Length of pmatch[] array. */
+ regmatch_t *pmatch; /* Submatch data. */
+ int cost; /* Cost of the match. */
+ int num_ins; /* Number of inserts in the match. */
+ int num_del; /* Number of deletes in the match. */
+ int num_subst; /* Number of substitutes in the match. */
+} regamatch_t;
+
+
+/* Approximate matching functions. */
+extern int
+tre_regaexec(const regex_t *preg, const char *string,
+ regamatch_t *match, regaparams_t params, int eflags);
+
+extern int
+tre_reganexec(const regex_t *preg, const char *string, size_t len,
+ regamatch_t *match, regaparams_t params, int eflags);
+#ifdef TRE_WCHAR
+/* Wide character approximate matching. */
+extern int
+tre_regawexec(const regex_t *preg, const wchar_t *string,
+ regamatch_t *match, regaparams_t params, int eflags);
+
+extern int
+tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
+ regamatch_t *match, regaparams_t params, int eflags);
+#endif /* TRE_WCHAR */
+
+/* Sets the parameters to default values. */
+extern void
+tre_regaparams_default(regaparams_t *params);
+#endif /* TRE_APPROX */
+
+#ifdef TRE_WCHAR
+typedef wchar_t tre_char_t;
+#else /* !TRE_WCHAR */
+typedef unsigned char tre_char_t;
+#endif /* !TRE_WCHAR */
+
+typedef struct {
+ int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
+ void (*rewind)(size_t pos, void *context);
+ int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
+ void *context;
+} tre_str_source;
+
+extern int
+tre_reguexec(const regex_t *preg, const tre_str_source *string,
+ size_t nmatch, regmatch_t pmatch[], int eflags);
+
+/* Returns the version string. The returned string is static. */
+extern char *
+tre_version(void);
+
+/* Returns the value for a config parameter. The type to which `result'
+ must point to depends of the value of `query', see documentation for
+ more details. */
+extern int
+tre_config(int query, void *result);
+
+enum {
+ TRE_CONFIG_APPROX,
+ TRE_CONFIG_WCHAR,
+ TRE_CONFIG_MULTIBYTE,
+ TRE_CONFIG_SYSTEM_ABI,
+ TRE_CONFIG_VERSION
+};
+
+/* Returns 1 if the compiled pattern has back references, 0 if not. */
+extern int
+tre_have_backrefs(const regex_t *preg);
+
+/* Returns 1 if the compiled pattern uses approximate matching features,
+ 0 if not. */
+extern int
+tre_have_approx(const regex_t *preg);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* TRE_H */
+
+/* EOF */
diff --git a/lib/xmalloc.c b/lib/xmalloc.c
new file mode 100644
index 0000000000000..3459d2d917ad2
--- /dev/null
+++ b/lib/xmalloc.c
@@ -0,0 +1,351 @@
+/*
+ xmalloc.c - Simple malloc debugging library implementation
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+/*
+ TODO:
+ - red zones
+ - group dumps by source location
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#define XMALLOC_INTERNAL 1
+#include "xmalloc.h"
+
+
+/*
+ Internal stuff.
+*/
+
+typedef struct hashTableItemRec {
+ void *ptr;
+ int bytes;
+ const char *file;
+ int line;
+ const char *func;
+ struct hashTableItemRec *next;
+} hashTableItem;
+
+typedef struct {
+ hashTableItem **table;
+} hashTable;
+
+static int xmalloc_peak;
+int xmalloc_current;
+static int xmalloc_peak_blocks;
+int xmalloc_current_blocks;
+static int xmalloc_fail_after;
+
+#define TABLE_BITS 8
+#define TABLE_MASK ((1 << TABLE_BITS) - 1)
+#define TABLE_SIZE (1 << TABLE_BITS)
+
+static hashTable *
+hash_table_new(void)
+{
+ hashTable *tbl;
+
+ tbl = malloc(sizeof(*tbl));
+
+ if (tbl != NULL)
+ {
+ tbl->table = calloc(TABLE_SIZE, sizeof(*tbl->table));
+
+ if (tbl->table == NULL)
+ {
+ free(tbl);
+ return NULL;
+ }
+ }
+
+ return tbl;
+}
+
+static int
+hash_void_ptr(void *ptr)
+{
+ int hash;
+ int i;
+
+ /* I took this hash function just off the top of my head, I have
+ no idea whether it is bad or very bad. */
+ hash = 0;
+ for (i = 0; i < (int)sizeof(ptr)*8 / TABLE_BITS; i++)
+ {
+ hash ^= (unsigned long)ptr >> i*8;
+ hash += i * 17;
+ hash &= TABLE_MASK;
+ }
+ return hash;
+}
+
+static void
+hash_table_add(hashTable *tbl, void *ptr, int bytes,
+ const char *file, int line, const char *func)
+{
+ int i;
+ hashTableItem *item, *new;
+
+ i = hash_void_ptr(ptr);
+
+ item = tbl->table[i];
+ if (item != NULL)
+ while (item->next != NULL)
+ item = item->next;
+
+ new = malloc(sizeof(*new));
+ assert(new != NULL);
+ new->ptr = ptr;
+ new->bytes = bytes;
+ new->file = file;
+ new->line = line;
+ new->func = func;
+ new->next = NULL;
+ if (item != NULL)
+ item->next = new;
+ else
+ tbl->table[i] = new;
+
+ xmalloc_current += bytes;
+ if (xmalloc_current > xmalloc_peak)
+ xmalloc_peak = xmalloc_current;
+ xmalloc_current_blocks++;
+ if (xmalloc_current_blocks > xmalloc_peak_blocks)
+ xmalloc_peak_blocks = xmalloc_current_blocks;
+}
+
+static void
+hash_table_del(hashTable *tbl, void *ptr)
+{
+ int i;
+ hashTableItem *item, *prev;
+
+ i = hash_void_ptr(ptr);
+
+ item = tbl->table[i];
+ if (item == NULL)
+ {
+ printf("xfree: invalid ptr %p\n", ptr);
+ abort();
+ }
+ prev = NULL;
+ while (item->ptr != ptr)
+ {
+ prev = item;
+ item = item->next;
+ }
+ if (item->ptr != ptr)
+ {
+ printf("xfree: invalid ptr %p\n", ptr);
+ abort();
+ }
+
+ xmalloc_current -= item->bytes;
+ xmalloc_current_blocks--;
+
+ if (prev != NULL)
+ {
+ prev->next = item->next;
+ free(item);
+ }
+ else
+ {
+ tbl->table[i] = item->next;
+ free(item);
+ }
+}
+
+static hashTable *xmalloc_table = NULL;
+
+static void
+xmalloc_init(void)
+{
+ if (xmalloc_table == NULL)
+ {
+ xmalloc_table = hash_table_new();
+ xmalloc_peak = 0;
+ xmalloc_peak_blocks = 0;
+ xmalloc_current = 0;
+ xmalloc_current_blocks = 0;
+ xmalloc_fail_after = -1;
+ }
+ assert(xmalloc_table != NULL);
+ assert(xmalloc_table->table != NULL);
+}
+
+
+
+/*
+ Public API.
+*/
+
+void
+xmalloc_configure(int fail_after)
+{
+ xmalloc_init();
+ xmalloc_fail_after = fail_after;
+}
+
+int
+xmalloc_dump_leaks(void)
+{
+ int i;
+ int num_leaks = 0;
+ int leaked_bytes = 0;
+ hashTableItem *item;
+
+ xmalloc_init();
+
+ for (i = 0; i < TABLE_SIZE; i++)
+ {
+ item = xmalloc_table->table[i];
+ while (item != NULL)
+ {
+ printf("%s:%d: %s: %d bytes at %p not freed\n",
+ item->file, item->line, item->func, item->bytes, item->ptr);
+ num_leaks++;
+ leaked_bytes += item->bytes;
+ item = item->next;
+ }
+ }
+ if (num_leaks == 0)
+ printf("No memory leaks.\n");
+ else
+ printf("%d unfreed memory chuncks, total %d unfreed bytes.\n",
+ num_leaks, leaked_bytes);
+ printf("Peak memory consumption %d bytes (%.1f kB, %.1f MB) in %d blocks ",
+ xmalloc_peak, (double)xmalloc_peak / 1024,
+ (double)xmalloc_peak / (1024*1024), xmalloc_peak_blocks);
+ printf("(average ");
+ if (xmalloc_peak_blocks)
+ printf("%d", ((xmalloc_peak + xmalloc_peak_blocks / 2)
+ / xmalloc_peak_blocks));
+ else
+ printf("N/A");
+ printf(" bytes per block).\n");
+
+ return num_leaks;
+}
+
+void *
+xmalloc_impl(size_t size, const char *file, int line, const char *func)
+{
+ void *ptr;
+
+ xmalloc_init();
+ assert(size > 0);
+
+ if (xmalloc_fail_after == 0)
+ {
+ xmalloc_fail_after = -2;
+#if 0
+ printf("xmalloc: forced failure %s:%d: %s\n", file, line, func);
+#endif
+ return NULL;
+ }
+ else if (xmalloc_fail_after == -2)
+ {
+ printf("xmalloc: called after failure from %s:%d: %s\n",
+ file, line, func);
+ assert(0);
+ }
+ else if (xmalloc_fail_after > 0)
+ xmalloc_fail_after--;
+
+ ptr = malloc(size);
+ if (ptr != NULL)
+ hash_table_add(xmalloc_table, ptr, (int)size, file, line, func);
+ return ptr;
+}
+
+void *
+xcalloc_impl(size_t nmemb, size_t size, const char *file, int line,
+ const char *func)
+{
+ void *ptr;
+
+ xmalloc_init();
+ assert(size > 0);
+
+ if (xmalloc_fail_after == 0)
+ {
+ xmalloc_fail_after = -2;
+#if 0
+ printf("xcalloc: forced failure %s:%d: %s\n", file, line, func);
+#endif
+ return NULL;
+ }
+ else if (xmalloc_fail_after == -2)
+ {
+ printf("xcalloc: called after failure from %s:%d: %s\n",
+ file, line, func);
+ assert(0);
+ }
+ else if (xmalloc_fail_after > 0)
+ xmalloc_fail_after--;
+
+ ptr = calloc(nmemb, size);
+ if (ptr != NULL)
+ hash_table_add(xmalloc_table, ptr, (int)(nmemb * size), file, line, func);
+ return ptr;
+}
+
+void
+xfree_impl(void *ptr, const char *file, int line, const char *func)
+{
+ /*LINTED*/(void)&file;
+ /*LINTED*/(void)&line;
+ /*LINTED*/(void)&func;
+ xmalloc_init();
+
+ if (ptr != NULL)
+ hash_table_del(xmalloc_table, ptr);
+ free(ptr);
+}
+
+void *
+xrealloc_impl(void *ptr, size_t new_size, const char *file, int line,
+ const char *func)
+{
+ void *new_ptr;
+
+ xmalloc_init();
+ assert(ptr != NULL);
+ assert(new_size > 0);
+
+ if (xmalloc_fail_after == 0)
+ {
+ xmalloc_fail_after = -2;
+ return NULL;
+ }
+ else if (xmalloc_fail_after == -2)
+ {
+ printf("xrealloc: called after failure from %s:%d: %s\n",
+ file, line, func);
+ assert(0);
+ }
+ else if (xmalloc_fail_after > 0)
+ xmalloc_fail_after--;
+
+ new_ptr = realloc(ptr, new_size);
+ if (new_ptr != NULL)
+ {
+ hash_table_del(xmalloc_table, ptr);
+ hash_table_add(xmalloc_table, new_ptr, (int)new_size, file, line, func);
+ }
+ return new_ptr;
+}
+
+
+
+/* EOF */
diff --git a/lib/xmalloc.h b/lib/xmalloc.h
new file mode 100644
index 0000000000000..ce310af5204e6
--- /dev/null
+++ b/lib/xmalloc.h
@@ -0,0 +1,77 @@
+/*
+ xmalloc.h - Simple malloc debugging library API
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifndef _XMALLOC_H
+#define _XMALLOC_H 1
+
+void *xmalloc_impl(size_t size, const char *file, int line, const char *func);
+void *xcalloc_impl(size_t nmemb, size_t size, const char *file, int line,
+ const char *func);
+void xfree_impl(void *ptr, const char *file, int line, const char *func);
+void *xrealloc_impl(void *ptr, size_t new_size, const char *file, int line,
+ const char *func);
+int xmalloc_dump_leaks(void);
+void xmalloc_configure(int fail_after);
+
+
+#ifndef XMALLOC_INTERNAL
+#ifdef MALLOC_DEBUGGING
+
+/* Version 2.4 and later of GCC define a magical variable `__PRETTY_FUNCTION__'
+ which contains the name of the function currently being defined.
+# define __XMALLOC_FUNCTION __PRETTY_FUNCTION__
+ This is broken in G++ before version 2.6.
+ C9x has a similar variable called __func__, but prefer the GCC one since
+ it demangles C++ function names. */
+# ifdef __GNUC__
+# if __GNUC__ > 2 || (__GNUC__ == 2 \
+ && __GNUC_MINOR__ >= (defined __cplusplus ? 6 : 4))
+# define __XMALLOC_FUNCTION __PRETTY_FUNCTION__
+# else
+# define __XMALLOC_FUNCTION ((const char *) 0)
+# endif
+# else
+# if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L
+# define __XMALLOC_FUNCTION __func__
+# else
+# define __XMALLOC_FUNCTION ((const char *) 0)
+# endif
+# endif
+
+#define xmalloc(size) xmalloc_impl(size, __FILE__, __LINE__, \
+ __XMALLOC_FUNCTION)
+#define xcalloc(nmemb, size) xcalloc_impl(nmemb, size, __FILE__, __LINE__, \
+ __XMALLOC_FUNCTION)
+#define xfree(ptr) xfree_impl(ptr, __FILE__, __LINE__, __XMALLOC_FUNCTION)
+#define xrealloc(ptr, new_size) xrealloc_impl(ptr, new_size, __FILE__, \
+ __LINE__, __XMALLOC_FUNCTION)
+#undef malloc
+#undef calloc
+#undef free
+#undef realloc
+
+#define malloc USE_XMALLOC_INSTEAD_OF_MALLOC
+#define calloc USE_XCALLOC_INSTEAD_OF_CALLOC
+#define free USE_XFREE_INSTEAD_OF_FREE
+#define realloc USE_XREALLOC_INSTEAD_OF_REALLOC
+
+#else /* !MALLOC_DEBUGGING */
+
+#include <stdlib.h>
+
+#define xmalloc(size) malloc(size)
+#define xcalloc(nmemb, size) calloc(nmemb, size)
+#define xfree(ptr) free(ptr)
+#define xrealloc(ptr, new_size) realloc(ptr, new_size)
+
+#endif /* !MALLOC_DEBUGGING */
+#endif /* !XMALLOC_INTERNAL */
+
+#endif /* _XMALLOC_H */
+
+/* EOF */