GNU Regex 0.12 - src - FreeBSD source tree

diff options


context:
space:
mode:

author	J.T. Conklin <jtc@FreeBSD.org>	1993-07-30 20:16:53 +0000
committer	J.T. Conklin <jtc@FreeBSD.org>	1993-07-30 20:16:53 +0000
commit	6eefa612a977ece8f5e118392b27413260cb6996 (patch)
tree	e01cf2a5cc6062467dbb628a7beef06eaa39845d /gnu/lib/libregex/doc
parent	ff5c17099b72eefef1c4274236e4c3e1f344437f (diff)

Notes

Diffstat (limited to 'gnu/lib/libregex/doc')

-rw-r--r--

gnu/lib/libregex/doc/Makefile

-rw-r--r--

gnu/lib/libregex/doc/Makefile.in

-rw-r--r--

gnu/lib/libregex/doc/include.awk

-rw-r--r--

gnu/lib/libregex/doc/regex.aux

136

-rw-r--r--

gnu/lib/libregex/doc/regex.cps

152

-rw-r--r--

gnu/lib/libregex/doc/regex.info

2836

-rw-r--r--

gnu/lib/libregex/doc/regex.texi

3138

-rw-r--r--

gnu/lib/libregex/doc/xregex.texi

3021

8 files changed, 9487 insertions, 0 deletions

diff --git a/gnu/lib/libregex/doc/Makefile b/gnu/lib/libregex/doc/Makefile
new file mode 100644
index 000000000000..13753ae8ff1d
--- /dev/null
+++ b/gnu/lib/libregex/doc/Makefile

@@ -0,0 +1,93 @@

+# Generated automatically from Makefile.in by configure.

+# Makefile for regex documentation.

+# This program is free software; you can redistribute it and/or modify

+# it under the terms of the GNU General Public License as published by

+# the Free Software Foundation; either version 2, or (at your option)

+# any later version.

+# This program is distributed in the hope that it will be useful,

+# but WITHOUT ANY WARRANTY; without even the implied warranty of

+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+# GNU General Public License for more details.

+# You should have received a copy of the GNU General Public License

+# along with this program; if not, write to the Free Software

+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

+# Installation directories.

+prefix = /usr

+infodir = $(prefix)/info

+srcdir = .

+VPATH = .:../.

+INSTALL = /usr/bin/install -c

+INSTALL_DATA = $(INSTALL) -m 644

+MAKEINFO = makeinfo --no-split

+SHELL = /bin/sh

+TEX = tex

+TEXINDEX = texindex

+default all: regex.info regex.dvi

+.PHONY: default all

+# We need to include some code from regex.h.

+regex.texi: xregex.texi

+ rm -f $@

+ gawk -f include.awk -vsource=../$(srcdir)/regex.h \

+ <../$(srcdir)/doc/xregex.texi \

+ | expand >$@

+ chmod a-w $@

+regex.dvi: regex.cps

+ $(TEX) regex.texi

+regex.cps: regex.cp

+ $(TEXINDEX) regex.??

+regex.cp: regex.texi

+ $(TEX) ../$(srcdir)/doc/regex.texi

+regex.info: regex.texi

+ $(MAKEINFO) ../$(srcdir)/doc/regex.texi

+# I know of no way to make a good TAGS file from Texinfo source.

+TAGS:

+check:

+.PHONY: check

+install: regex.info

+ -mkdir $(prefix) $(infodir)

+ for i in *.info*; do $(INSTALL_DATA) $$i $(infodir)/$$i; done

+.PHONY: install

+clean mostlyclean:

+ rm -f regex.?? *.dvi *.log *.toc

+distclean: clean

+ rm -f Makefile

+ for f in regex.??s; do if test -z "`cat $$f`"; then rm -f $$f; fi; done

+realclean: distclean

+ rm -f *.info* regex.??? regex.texi TAGS

+extraclean: distclean

+ rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out

+.PHONY: mostlyclean clean distclean realclean extraclean

+Makefile: Makefile.in ../config.status

+ (cd ..; sh config.status)

+# Prevent GNU make 3 from overflowing arg limit on system V.

+.NOEXPORT:

+# Assumes $(distdir) is the place to put our files.

+distfiles = Makefile.in *.texi texinfo.tex include.awk \

+ regex.info* regex.aux regex.cps

+dist: Makefile regex.info regex.cps

+ mkdir $(distdir)

+ ln $(distfiles) $(distdir)

+.PHONY: dist

diff --git a/gnu/lib/libregex/doc/Makefile.in b/gnu/lib/libregex/doc/Makefile.in
new file mode 100644
index 000000000000..2f5d382c06e5
--- /dev/null
+++ b/gnu/lib/libregex/doc/Makefile.in

@@ -0,0 +1,92 @@

+# Makefile for regex documentation.

+# This program is free software; you can redistribute it and/or modify

+# it under the terms of the GNU General Public License as published by

+# the Free Software Foundation; either version 2, or (at your option)

+# any later version.

+# This program is distributed in the hope that it will be useful,

+# but WITHOUT ANY WARRANTY; without even the implied warranty of

+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+# GNU General Public License for more details.

+# You should have received a copy of the GNU General Public License

+# along with this program; if not, write to the Free Software

+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

+# Installation directories.

+prefix = /usr/local

+infodir = $(prefix)/info

+srcdir = @srcdir@

+VPATH = @srcdir@:../@srcdir@

+INSTALL = @INSTALL@

+INSTALL_DATA = @INSTALL_DATA@

+MAKEINFO = makeinfo --no-split

+SHELL = /bin/sh

+TEX = tex

+TEXINDEX = texindex

+default all: regex.info regex.dvi

+.PHONY: default all

+# We need to include some code from regex.h.

+regex.texi: xregex.texi

+ rm -f $@

+ gawk -f include.awk -vsource=../$(srcdir)/regex.h \

+ <../$(srcdir)/doc/xregex.texi \

+ | expand >$@

+ chmod a-w $@

+regex.dvi: regex.cps

+ $(TEX) regex.texi

+regex.cps: regex.cp

+ $(TEXINDEX) regex.??

+regex.cp: regex.texi

+ $(TEX) ../$(srcdir)/doc/regex.texi

+regex.info: regex.texi

+ $(MAKEINFO) ../$(srcdir)/doc/regex.texi

+# I know of no way to make a good TAGS file from Texinfo source.

+TAGS:

+check:

+.PHONY: check

+install: regex.info

+ -mkdir $(prefix) $(infodir)

+ for i in *.info*; do $(INSTALL_DATA) $$i $(infodir)/$$i; done

+.PHONY: install

+clean mostlyclean:

+ rm -f regex.?? *.dvi *.log *.toc

+distclean: clean

+ rm -f Makefile

+ for f in regex.??s; do if test -z "`cat $$f`"; then rm -f $$f; fi; done

+realclean: distclean

+ rm -f *.info* regex.??? regex.texi TAGS

+extraclean: distclean

+ rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out

+.PHONY: mostlyclean clean distclean realclean extraclean

+Makefile: Makefile.in ../config.status

+ (cd ..; sh config.status)

+# Prevent GNU make 3 from overflowing arg limit on system V.

+.NOEXPORT:

+# Assumes $(distdir) is the place to put our files.

+distfiles = Makefile.in *.texi texinfo.tex include.awk \

+ regex.info* regex.aux regex.cps

+dist: Makefile regex.info regex.cps

+ mkdir $(distdir)

+ ln $(distfiles) $(distdir)

+.PHONY: dist

diff --git a/gnu/lib/libregex/doc/include.awk b/gnu/lib/libregex/doc/include.awk
new file mode 100644
index 000000000000..a1df3f84634f
--- /dev/null
+++ b/gnu/lib/libregex/doc/include.awk

@@ -0,0 +1,19 @@

+# Assume `source' is set with -vsource=filename on the command line.

+/^\[\[\[/ { inclusion = $2; # name of the thing to include.

+ printing = 0;

+ while ((getline line < source) > 0)

+ {

+ if (match (line, "\\[\\[\\[end " inclusion "\\]\\]\\]"))

+ printing = 0;

+ if (printing)

+ print line;

+ if (match (line,"\\[\\[\\[begin " inclusion "\\]\\]\\]"))

+ printing = 1;

+ }

+ close (source);

+ next;

+ }

+ { print }

diff --git a/gnu/lib/libregex/doc/regex.aux b/gnu/lib/libregex/doc/regex.aux
new file mode 100644
index 000000000000..fd6a245eb111
--- /dev/null
+++ b/gnu/lib/libregex/doc/regex.aux

@@ -0,0 +1,136 @@

+'xrdef {Overview-pg}{1}

+'xrdef {Overview-snt}{Chapter'tie1}

+'xrdef {Regular Expression Syntax-pg}{2}

+'xrdef {Regular Expression Syntax-snt}{Chapter'tie2}

+'xrdef {Syntax Bits-pg}{2}

+'xrdef {Syntax Bits-snt}{Section'tie2.1}

+'xrdef {Predefined Syntaxes-pg}{5}

+'xrdef {Predefined Syntaxes-snt}{Section'tie2.2}

+'xrdef {Collating Elements vs. Characters-pg}{6}

+'xrdef {Collating Elements vs. Characters-snt}{Section'tie2.3}

+'xrdef {The Backslash Character-pg}{7}

+'xrdef {The Backslash Character-snt}{Section'tie2.4}

+'xrdef {Common Operators-pg}{9}

+'xrdef {Common Operators-snt}{Chapter'tie3}

+'xrdef {Match-self Operator-pg}{9}

+'xrdef {Match-self Operator-snt}{Section'tie3.1}

+'xrdef {Match-any-character Operator-pg}{9}

+'xrdef {Match-any-character Operator-snt}{Section'tie3.2}

+'xrdef {Concatenation Operator-pg}{10}

+'xrdef {Concatenation Operator-snt}{Section'tie3.3}

+'xrdef {Repetition Operators-pg}{10}

+'xrdef {Repetition Operators-snt}{Section'tie3.4}

+'xrdef {Match-zero-or-more Operator-pg}{10}

+'xrdef {Match-zero-or-more Operator-snt}{Section'tie3.4.1}

+'xrdef {Match-one-or-more Operator-pg}{11}

+'xrdef {Match-one-or-more Operator-snt}{Section'tie3.4.2}

+'xrdef {Match-zero-or-one Operator-pg}{11}

+'xrdef {Match-zero-or-one Operator-snt}{Section'tie3.4.3}

+'xrdef {Interval Operators-pg}{12}

+'xrdef {Interval Operators-snt}{Section'tie3.4.4}

+'xrdef {Alternation Operator-pg}{13}

+'xrdef {Alternation Operator-snt}{Section'tie3.5}

+'xrdef {List Operators-pg}{13}

+'xrdef {List Operators-snt}{Section'tie3.6}

+'xrdef {Character Class Operators-pg}{14}

+'xrdef {Character Class Operators-snt}{Section'tie3.6.1}

+'xrdef {Range Operator-pg}{15}

+'xrdef {Range Operator-snt}{Section'tie3.6.2}

+'xrdef {Grouping Operators-pg}{16}

+'xrdef {Grouping Operators-snt}{Section'tie3.7}

+'xrdef {Back-reference Operator-pg}{17}

+'xrdef {Back-reference Operator-snt}{Section'tie3.8}

+'xrdef {Anchoring Operators-pg}{18}

+'xrdef {Anchoring Operators-snt}{Section'tie3.9}

+'xrdef {Match-beginning-of-line Operator-pg}{18}

+'xrdef {Match-beginning-of-line Operator-snt}{Section'tie3.9.1}

+'xrdef {Match-end-of-line Operator-pg}{18}

+'xrdef {Match-end-of-line Operator-snt}{Section'tie3.9.2}

+'xrdef {GNU Operators-pg}{20}

+'xrdef {GNU Operators-snt}{Chapter'tie4}

+'xrdef {Word Operators-pg}{20}

+'xrdef {Word Operators-snt}{Section'tie4.1}

+'xrdef {Non-Emacs Syntax Tables-pg}{20}

+'xrdef {Non-Emacs Syntax Tables-snt}{Section'tie4.1.1}

+'xrdef {Match-word-boundary Operator-pg}{20}

+'xrdef {Match-word-boundary Operator-snt}{Section'tie4.1.2}

+'xrdef {Match-within-word Operator-pg}{20}

+'xrdef {Match-within-word Operator-snt}{Section'tie4.1.3}

+'xrdef {Match-beginning-of-word Operator-pg}{21}

+'xrdef {Match-beginning-of-word Operator-snt}{Section'tie4.1.4}

+'xrdef {Match-end-of-word Operator-pg}{21}

+'xrdef {Match-end-of-word Operator-snt}{Section'tie4.1.5}

+'xrdef {Match-word-constituent Operator-pg}{21}

+'xrdef {Match-word-constituent Operator-snt}{Section'tie4.1.6}

+'xrdef {Match-non-word-constituent Operator-pg}{21}

+'xrdef {Match-non-word-constituent Operator-snt}{Section'tie4.1.7}

+'xrdef {Buffer Operators-pg}{21}

+'xrdef {Buffer Operators-snt}{Section'tie4.2}

+'xrdef {Match-beginning-of-buffer Operator-pg}{21}

+'xrdef {Match-beginning-of-buffer Operator-snt}{Section'tie4.2.1}

+'xrdef {Match-end-of-buffer Operator-pg}{21}

+'xrdef {Match-end-of-buffer Operator-snt}{Section'tie4.2.2}

+'xrdef {GNU Emacs Operators-pg}{22}

+'xrdef {GNU Emacs Operators-snt}{Chapter'tie5}

+'xrdef {Syntactic Class Operators-pg}{22}

+'xrdef {Syntactic Class Operators-snt}{Section'tie5.1}

+'xrdef {Emacs Syntax Tables-pg}{22}

+'xrdef {Emacs Syntax Tables-snt}{Section'tie5.1.1}

+'xrdef {Match-syntactic-class Operator-pg}{22}

+'xrdef {Match-syntactic-class Operator-snt}{Section'tie5.1.2}

+'xrdef {Match-not-syntactic-class Operator-pg}{22}

+'xrdef {Match-not-syntactic-class Operator-snt}{Section'tie5.1.3}

+'xrdef {What Gets Matched?-pg}{23}

+'xrdef {What Gets Matched?-snt}{Chapter'tie6}

+'xrdef {Programming with Regex-pg}{24}

+'xrdef {Programming with Regex-snt}{Chapter'tie7}

+'xrdef {GNU Regex Functions-pg}{24}

+'xrdef {GNU Regex Functions-snt}{Section'tie7.1}

+'xrdef {GNU Pattern Buffers-pg}{24}

+'xrdef {GNU Pattern Buffers-snt}{Section'tie7.1.1}

+'xrdef {GNU Regular Expression Compiling-pg}{26}

+'xrdef {GNU Regular Expression Compiling-snt}{Section'tie7.1.2}

+'xrdef {GNU Matching-pg}{27}

+'xrdef {GNU Matching-snt}{Section'tie7.1.3}

+'xrdef {GNU Searching-pg}{28}

+'xrdef {GNU Searching-snt}{Section'tie7.1.4}

+'xrdef {Matching/Searching with Split Data-pg}{29}

+'xrdef {Matching/Searching with Split Data-snt}{Section'tie7.1.5}

+'xrdef {Searching with Fastmaps-pg}{30}

+'xrdef {Searching with Fastmaps-snt}{Section'tie7.1.6}

+'xrdef {GNU Translate Tables-pg}{31}

+'xrdef {GNU Translate Tables-snt}{Section'tie7.1.7}

+'xrdef {Using Registers-pg}{32}

+'xrdef {Using Registers-snt}{Section'tie7.1.8}

+'xrdef {Freeing GNU Pattern Buffers-pg}{34}

+'xrdef {Freeing GNU Pattern Buffers-snt}{Section'tie7.1.9}

+'xrdef {POSIX Regex Functions-pg}{35}

+'xrdef {POSIX Regex Functions-snt}{Section'tie7.2}

+'xrdef {POSIX Pattern Buffers-pg}{35}

+'xrdef {POSIX Pattern Buffers-snt}{Section'tie7.2.1}

+'xrdef {POSIX Regular Expression Compiling-pg}{35}

+'xrdef {POSIX Regular Expression Compiling-snt}{Section'tie7.2.2}

+'xrdef {POSIX Matching-pg}{37}

+'xrdef {POSIX Matching-snt}{Section'tie7.2.3}

+'xrdef {Reporting Errors-pg}{38}

+'xrdef {Reporting Errors-snt}{Section'tie7.2.4}

+'xrdef {Using Byte Offsets-pg}{39}

+'xrdef {Using Byte Offsets-snt}{Section'tie7.2.5}

+'xrdef {Freeing POSIX Pattern Buffers-pg}{39}

+'xrdef {Freeing POSIX Pattern Buffers-snt}{Section'tie7.2.6}

+'xrdef {BSD Regex Functions-pg}{40}

+'xrdef {BSD Regex Functions-snt}{Section'tie7.3}

+'xrdef {BSD Regular Expression Compiling-pg}{40}

+'xrdef {BSD Regular Expression Compiling-snt}{Section'tie7.3.1}

+'xrdef {BSD Searching-pg}{40}

+'xrdef {BSD Searching-snt}{Section'tie7.3.2}

+'xrdef {Copying-pg}{42}

+'xrdef {Copying-snt}{Appendix'tie'char65{}}

+'xrdef {Copying-pg}{42}

+'xrdef {Copying-snt}{}

+'xrdef {Copying-pg}{43}

+'xrdef {Copying-snt}{}

+'xrdef {Copying-pg}{48}

+'xrdef {Copying-snt}{}

+'xrdef {Index-pg}{50}

+'xrdef {Index-snt}{}

diff --git a/gnu/lib/libregex/doc/regex.cps b/gnu/lib/libregex/doc/regex.cps
new file mode 100644
index 000000000000..8b2e57c64e47
--- /dev/null
+++ b/gnu/lib/libregex/doc/regex.cps

@@ -0,0 +1,152 @@

+\initial {$}

+\entry {\code {$}}{18}

+\initial {(}

+\entry {\code {(}}{16}

+\initial {)}

+\entry {\code {)}}{16}

+\initial {*}

+\entry {\samp {*}}{10}

+\initial {-}

+\entry {\samp {-}}{13}

+\initial {.}

+\entry {\samp {.}}{9}

+\initial {:}

+\entry {\samp {:]} in regex}{14}

+\initial {?}

+\entry {\samp {?}}{11}

+\initial {[}

+\entry {\samp {[}}{13}

+\entry {\samp {[:} in regex}{14}

+\entry {\samp {[{\tt\hat}}}{13}

+\initial {]}

+\entry {\samp {]}}{13}

+\initial {{\tt\char'173}}

+\entry {\samp {{\tt\char'173}}}{12}

+\initial {{\tt\char'174}}

+\entry {\code {{\tt\char'174}}}{13}

+\initial {{\tt\char'175}}

+\entry {\samp {{\tt\char'175}}}{12}

+\initial {{\tt\char43}}

+\entry {\samp {{\tt\char43}}}{11}

+\initial {{\tt\hat}}

+\entry {\samp {{\tt\hat}}}{13}

+\entry {\code {{\tt\hat}}}{18}

+\initial {{\tt\indexbackslash }}

+\entry {{\tt\indexbackslash }}{7}

+\entry {\samp {{\tt\indexbackslash }}}{13}

+\entry {\samp {{\tt\indexbackslash }'}}{21}

+\entry {\code {{\tt\indexbackslash }(}}{16}

+\entry {\code {{\tt\indexbackslash })}}{16}

+\entry {\samp {{\tt\indexbackslash }`}}{21}

+\entry {\samp {{\tt\indexbackslash }{\tt\char'173}}}{12}

+\entry {\code {{\tt\indexbackslash }{\tt\char'174}}}{13}

+\entry {\samp {{\tt\indexbackslash }{\tt\char'175}}}{12}

+\entry {\samp {{\tt\indexbackslash }{\tt\gtr}}}{21}

+\entry {\samp {{\tt\indexbackslash }{\tt\less}}}{21}

+\entry {\samp {{\tt\indexbackslash }b}}{20}

+\entry {\samp {{\tt\indexbackslash }B}}{20}

+\entry {\samp {{\tt\indexbackslash }s}}{22}

+\entry {\samp {{\tt\indexbackslash }S}}{22}

+\entry {\samp {{\tt\indexbackslash }w}}{21}

+\entry {\samp {{\tt\indexbackslash }W}}{21}

+\initial {A}

+\entry {\code {allocated \r {initialization}}}{26}

+\entry {alternation operator}{13}

+\entry {alternation operator and \samp {{\tt\hat}}}{18}

+\entry {anchoring}{18}

+\entry {anchors}{18}

+\entry {Awk}{5}

+\initial {B}

+\entry {back references}{17}

+\entry {backtracking}{10, 13}

+\entry {beginning-of-line operator}{18}

+\entry {bracket expression}{13}

+\entry {\code {buffer \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}

+\entry {\code {buffer \r {initialization}}}{26}

+\initial {C}

+\entry {character classes}{14}

+\initial {E}

+\entry {Egrep}{5}

+\entry {Emacs}{5}

+\entry {end-of-line operator}{18}

+\entry {\code {end\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32}

+\initial {F}

+\entry {\code {fastmap \r {initialization}}}{26}

+\entry {\code {fastmap{\_}accurate \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}

+\entry {fastmaps}{30}

+\initial {G}

+\entry {Grep}{5}

+\entry {grouping}{16}

+\initial {I}

+\entry {ignoring case}{35}

+\entry {interval expression}{12}

+\initial {M}

+\entry {matching list}{13}

+\entry {matching newline}{13}

+\entry {matching with GNU functions}{27}

+\initial {N}

+\entry {\code {newline{\_}anchor \r {field in pattern buffer}}}{18}

+\entry {nonmatching list}{13}

+\entry {\code {not{\_}bol \r {field in pattern buffer}}}{18}

+\entry {\code {num_regs\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32}

+\initial {O}

+\entry {open-group operator and \samp {{\tt\hat}}}{18}

+\entry {or operator}{13}

+\initial {P}

+\entry {parenthesizing}{16}

+\entry {pattern buffer initialization}{26}

+\entry {pattern buffer, definition of}{24}

+\entry {POSIX Awk}{5}

+\initial {R}

+\entry {\code {range \r {argument to \code {re{\_}search}}}}{28}

+\entry {\code {re_registers}}{32}

+\entry {\code {RE{\_}BACKSLASH{\_}ESCAPE{\_}IN{\_}LIST}}{3}

+\entry {\code {RE{\_}BK{\_}PLUS{\_}QM}}{3}

+\entry {\code {RE{\_}CHAR{\_}CLASSES}}{3}

+\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS}}{3}

+\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS \r {(and \samp {{\tt\hat}})}}}{18}

+\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}OPS}}{3}

+\entry {\code {RE{\_}CONTEXT{\_}INVALID{\_}OPS}}{3}

+\entry {\code {RE{\_}DOT{\_}NEWLINE}}{3}

+\entry {\code {RE{\_}DOT{\_}NOT{\_}NULL}}{4}

+\entry {\code {RE{\_}INTERVALS}}{4}

+\entry {\code {RE{\_}LIMITED{\_}OPS}}{4}

+\entry {\code {RE{\_}NEWLINE{\_}ALT}}{4}

+\entry {\code {RE{\_}NO{\_}BK{\_}BRACES}}{4}

+\entry {\code {RE{\_}NO{\_}BK{\_}PARENS}}{4}

+\entry {\code {RE{\_}NO{\_}BK{\_}REFS}}{4}

+\entry {\code {RE{\_}NO{\_}BK{\_}VBAR}}{4}

+\entry {\code {RE{\_}NO{\_}EMPTY{\_}RANGES}}{4}

+\entry {\code {re{\_}nsub \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}

+\entry {\code {re{\_}pattern{\_}buffer \r {definition}}}{24}

+\entry {\code {re{\_}syntax{\_}options \r {initialization}}}{26}

+\entry {\code {RE{\_}UNMATCHED{\_}RIGHT{\_}PAREN{\_}ORD}}{4}

+\entry {\code {REG{\_}EXTENDED}}{35}

+\entry {\code {REG{\_}ICASE}}{35}

+\entry {\code {REG{\_}NEWLINE}}{36}

+\entry {\code {REG{\_}NOSUB}}{35}

+\entry {\code {regex.c}}{1}

+\entry {\code {regex.h}}{1}

+\entry {regexp anchoring}{18}

+\entry {\code {regmatch{\_}t}}{39}

+\entry {\code {regs{\_}allocated}}{32}

+\entry {\code {REGS{\_}FIXED}}{33}

+\entry {\code {REGS{\_}REALLOCATE}}{32}

+\entry {\code {REGS{\_}UNALLOCATED}}{32}

+\entry {regular expressions, syntax of}{2}

+\initial {S}

+\entry {searching with GNU functions}{28}

+\entry {\code {start \r {argument to \code {re{\_}search}}}}{28}

+\entry {\code {start\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32}

+\entry {\code {struct re{\_}pattern{\_}buffer \r {definition}}}{24}

+\entry {subexpressions}{16}

+\entry {syntax bits}{2}

+\entry {\code {syntax \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}

+\entry {syntax initialization}{26}

+\entry {syntax of regular expressions}{2}

+\initial {T}

+\entry {\code {translate \r {initialization}}}{26}

+\initial {U}

+\entry {\code {used \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}

+\initial {W}

+\entry {word boundaries, matching}{20}

diff --git a/gnu/lib/libregex/doc/regex.info b/gnu/lib/libregex/doc/regex.info
new file mode 100644
index 000000000000..90deedeaf44f
--- /dev/null
+++ b/gnu/lib/libregex/doc/regex.info

@@ -0,0 +1,2836 @@

+This is Info file regex.info, produced by Makeinfo-1.52 from the input

+file .././doc/regex.texi.

+ This file documents the GNU regular expression library.

+ Permission is granted to make and distribute verbatim copies of this

+manual provided the copyright notice and this permission notice are

+preserved on all copies.

+ Permission is granted to copy and distribute modified versions of this

+manual under the conditions for verbatim copying, provided also that the

+section entitled "GNU General Public License" is included exactly as in

+the original, and provided that the entire resulting derived work is

+distributed under the terms of a permission notice identical to this

+one.

+ Permission is granted to copy and distribute translations of this

+manual into another language, under the above conditions for modified

+versions, except that the section entitled "GNU General Public License"

+may be included in a translation approved by the Free Software

+Foundation instead of in the original English.

+File: regex.info, Node: Top, Next: Overview, Prev: (dir), Up: (dir)

+Regular Expression Library

+**************************

+ This manual documents how to program with the GNU regular expression

+library. This is edition 0.12a of the manual, 19 September 1992.

+ The first part of this master menu lists the major nodes in this Info

+document, including the index. The rest of the menu lists all the

+lower level nodes in the document.

+* Menu:

+* Overview::

+* Regular Expression Syntax::

+* Common Operators::

+* GNU Operators::

+* GNU Emacs Operators::

+* What Gets Matched?::

+* Programming with Regex::

+* Copying:: Copying and sharing Regex.

+* Index:: General index.

+ -- The Detailed Node Listing --

+Regular Expression Syntax

+* Syntax Bits::

+* Predefined Syntaxes::

+* Collating Elements vs. Characters::

+* The Backslash Character::

+Common Operators

+* Match-self Operator:: Ordinary characters.

+* Match-any-character Operator:: .

+* Concatenation Operator:: Juxtaposition.

+* Repetition Operators:: * + ? {}

+* Alternation Operator:: |

+* List Operators:: [...] [^...]

+* Grouping Operators:: (...)

+* Back-reference Operator:: \digit

+* Anchoring Operators:: ^ $

+Repetition Operators

+* Match-zero-or-more Operator:: *

+* Match-one-or-more Operator:: +

+* Match-zero-or-one Operator:: ?

+* Interval Operators:: {}

+List Operators (`[' ... `]' and `[^' ... `]')

+* Character Class Operators:: [:class:]

+* Range Operator:: start-end

+Anchoring Operators

+* Match-beginning-of-line Operator:: ^

+* Match-end-of-line Operator:: $

+GNU Operators

+* Word Operators::

+* Buffer Operators::

+Word Operators

+* Non-Emacs Syntax Tables::

+* Match-word-boundary Operator:: \b

+* Match-within-word Operator:: \B

+* Match-beginning-of-word Operator:: \<

+* Match-end-of-word Operator:: \>

+* Match-word-constituent Operator:: \w

+* Match-non-word-constituent Operator:: \W

+Buffer Operators

+* Match-beginning-of-buffer Operator:: \`

+* Match-end-of-buffer Operator:: \'

+GNU Emacs Operators

+* Syntactic Class Operators::

+Syntactic Class Operators

+* Emacs Syntax Tables::

+* Match-syntactic-class Operator:: \sCLASS

+* Match-not-syntactic-class Operator:: \SCLASS

+Programming with Regex

+* GNU Regex Functions::

+* POSIX Regex Functions::

+* BSD Regex Functions::

+GNU Regex Functions

+* GNU Pattern Buffers:: The re_pattern_buffer type.

+* GNU Regular Expression Compiling:: re_compile_pattern ()

+* GNU Matching:: re_match ()

+* GNU Searching:: re_search ()

+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()

+* Searching with Fastmaps:: re_compile_fastmap ()

+* GNU Translate Tables:: The `translate' field.

+* Using Registers:: The re_registers type and related fns.

+* Freeing GNU Pattern Buffers:: regfree ()

+POSIX Regex Functions

+* POSIX Pattern Buffers:: The regex_t type.

+* POSIX Regular Expression Compiling:: regcomp ()

+* POSIX Matching:: regexec ()

+* Reporting Errors:: regerror ()

+* Using Byte Offsets:: The regmatch_t type.

+* Freeing POSIX Pattern Buffers:: regfree ()

+BSD Regex Functions

+* BSD Regular Expression Compiling:: re_comp ()

+* BSD Searching:: re_exec ()

+File: regex.info, Node: Overview, Next: Regular Expression Syntax, Prev: Top, Up: Top

+Overview

+********

+ A "regular expression" (or "regexp", or "pattern") is a text string

+that describes some (mathematical) set of strings. A regexp R

+"matches" a string S if S is in the set of strings described by R.

+ Using the Regex library, you can:

+ * see if a string matches a specified pattern as a whole, and

+ * search within a string for a substring matching a specified

+ pattern.

+ Some regular expressions match only one string, i.e., the set they

+describe has only one member. For example, the regular expression

+`foo' matches the string `foo' and no others. Other regular

+expressions match more than one string, i.e., the set they describe has

+more than one member. For example, the regular expression `f*' matches

+the set of strings made up of any number (including zero) of `f's. As

+you can see, some characters in regular expressions match themselves

+(such as `f') and some don't (such as `*'); the ones that don't match

+themselves instead let you specify patterns that describe many

+different strings.

+ To either match or search for a regular expression with the Regex

+library functions, you must first compile it with a Regex pattern

+compiling function. A "compiled pattern" is a regular expression

+converted to the internal format used by the library functions. Once

+you've compiled a pattern, you can use it for matching or searching any

+number of times.

+ The Regex library consists of two source files: `regex.h' and

+`regex.c'. Regex provides three groups of functions with which you can

+operate on regular expressions. One group--the GNU group--is more

+powerful but not completely compatible with the other two, namely the

+POSIX and Berkeley UNIX groups; its interface was designed specifically

+for GNU. The other groups have the same interfaces as do the regular

+expression functions in POSIX and Berkeley UNIX.

+ We wrote this chapter with programmers in mind, not users of

+programs--such as Emacs--that use Regex. We describe the Regex library

+in its entirety, not how to write regular expressions that a particular

+program understands.

+File: regex.info, Node: Regular Expression Syntax, Next: Common Operators, Prev: Overview, Up: Top

+Regular Expression Syntax

+*************************

+ "Characters" are things you can type. "Operators" are things in a

+regular expression that match one or more characters. You compose

+regular expressions from operators, which in turn you specify using one

+or more characters.

+ Most characters represent what we call the match-self operator, i.e.,

+they match themselves; we call these characters "ordinary". Other

+characters represent either all or parts of fancier operators; e.g.,

+`.' represents what we call the match-any-character operator (which, no

+surprise, matches (almost) any character); we call these characters

+"special". Two different things determine what characters represent

+what operators:

+ 1. the regular expression syntax your program has told the Regex

+ library to recognize, and

+ 2. the context of the character in the regular expression.

+ In the following sections, we describe these things in more detail.

+* Menu:

+* Syntax Bits::

+* Predefined Syntaxes::

+* Collating Elements vs. Characters::

+* The Backslash Character::

+File: regex.info, Node: Syntax Bits, Next: Predefined Syntaxes, Up: Regular Expression Syntax

+Syntax Bits

+===========

+ In any particular syntax for regular expressions, some characters are

+always special, others are sometimes special, and others are never

+special. The particular syntax that Regex recognizes for a given

+regular expression depends on the value in the `syntax' field of the

+pattern buffer of that regular expression.

+ You get a pattern buffer by compiling a regular expression. *Note

+GNU Pattern Buffers::, and *Note POSIX Pattern Buffers::, for more

+information on pattern buffers. *Note GNU Regular Expression

+Compiling::, *Note POSIX Regular Expression Compiling::, and *Note BSD

+Regular Expression Compiling::, for more information on compiling.

+ Regex considers the value of the `syntax' field to be a collection of

+bits; we refer to these bits as "syntax bits". In most cases, they

+affect what characters represent what operators. We describe the

+meanings of the operators to which we refer in *Note Common Operators::,

+*Note GNU Operators::, and *Note GNU Emacs Operators::.

+ For reference, here is the complete list of syntax bits, in

+alphabetical order:

+`RE_BACKSLASH_ESCAPE_IN_LISTS'

+ If this bit is set, then `\' inside a list (*note List Operators::.

+ quotes (makes ordinary, if it's special) the following character;

+ if this bit isn't set, then `\' is an ordinary character inside

+ lists. (*Note The Backslash Character::, for what `\' does

+ outside of lists.)

+`RE_BK_PLUS_QM'

+ If this bit is set, then `\+' represents the match-one-or-more

+ operator and `\?' represents the match-zero-or-more operator; if

+ this bit isn't set, then `+' represents the match-one-or-more

+ operator and `?' represents the match-zero-or-one operator. This

+ bit is irrelevant if `RE_LIMITED_OPS' is set.

+`RE_CHAR_CLASSES'

+ If this bit is set, then you can use character classes in lists;

+ if this bit isn't set, then you can't.

+`RE_CONTEXT_INDEP_ANCHORS'

+ If this bit is set, then `^' and `$' are special anywhere outside

+ a list; if this bit isn't set, then these characters are special

+ only in certain contexts. *Note Match-beginning-of-line

+ Operator::, and *Note Match-end-of-line Operator::.

+`RE_CONTEXT_INDEP_OPS'

+ If this bit is set, then certain characters are special anywhere

+ outside a list; if this bit isn't set, then those characters are

+ special only in some contexts and are ordinary elsewhere.

+ Specifically, if this bit isn't set then `*', and (if the syntax

+ bit `RE_LIMITED_OPS' isn't set) `+' and `?' (or `\+' and `\?',

+ depending on the syntax bit `RE_BK_PLUS_QM') represent repetition

+ operators only if they're not first in a regular expression or

+ just after an open-group or alternation operator. The same holds

+ for `{' (or `\{', depending on the syntax bit `RE_NO_BK_BRACES') if

+ it is the beginning of a valid interval and the syntax bit

+ `RE_INTERVALS' is set.

+`RE_CONTEXT_INVALID_OPS'

+ If this bit is set, then repetition and alternation operators

+ can't be in certain positions within a regular expression.

+ Specifically, the regular expression is invalid if it has:

+ * a repetition operator first in the regular expression or just

+ after a match-beginning-of-line, open-group, or alternation

+ operator; or

+ * an alternation operator first or last in the regular

+ expression, just before a match-end-of-line operator, or just

+ after an alternation or open-group operator.

+ If this bit isn't set, then you can put the characters

+ representing the repetition and alternation characters anywhere in

+ a regular expression. Whether or not they will in fact be

+ operators in certain positions depends on other syntax bits.

+`RE_DOT_NEWLINE'

+ If this bit is set, then the match-any-character operator matches

+ a newline; if this bit isn't set, then it doesn't.

+`RE_DOT_NOT_NULL'

+ If this bit is set, then the match-any-character operator doesn't

+ match a null character; if this bit isn't set, then it does.

+`RE_INTERVALS'

+ If this bit is set, then Regex recognizes interval operators; if

+ this bit isn't set, then it doesn't.

+`RE_LIMITED_OPS'

+ If this bit is set, then Regex doesn't recognize the

+ match-one-or-more, match-zero-or-one or alternation operators; if

+ this bit isn't set, then it does.

+`RE_NEWLINE_ALT'

+ If this bit is set, then newline represents the alternation

+ operator; if this bit isn't set, then newline is ordinary.

+`RE_NO_BK_BRACES'

+ If this bit is set, then `{' represents the open-interval operator

+ and `}' represents the close-interval operator; if this bit isn't

+ set, then `\{' represents the open-interval operator and `\}'

+ represents the close-interval operator. This bit is relevant only

+ if `RE_INTERVALS' is set.

+`RE_NO_BK_PARENS'

+ If this bit is set, then `(' represents the open-group operator and

+ `)' represents the close-group operator; if this bit isn't set,

+ then `$' represents the open-group operator and `$' represents

+ the close-group operator.

+`RE_NO_BK_REFS'

+ If this bit is set, then Regex doesn't recognize `\'DIGIT as the

+ back reference operator; if this bit isn't set, then it does.

+`RE_NO_BK_VBAR'

+ If this bit is set, then `|' represents the alternation operator;

+ if this bit isn't set, then `\|' represents the alternation

+ operator. This bit is irrelevant if `RE_LIMITED_OPS' is set.

+`RE_NO_EMPTY_RANGES'

+ If this bit is set, then a regular expression with a range whose

+ ending point collates lower than its starting point is invalid; if

+ this bit isn't set, then Regex considers such a range to be empty.

+`RE_UNMATCHED_RIGHT_PAREN_ORD'

+ If this bit is set and the regular expression has no matching

+ open-group operator, then Regex considers what would otherwise be

+ a close-group operator (based on how `RE_NO_BK_PARENS' is set) to

+ match `)'.

+File: regex.info, Node: Predefined Syntaxes, Next: Collating Elements vs. Characters, Prev: Syntax Bits, Up: Regular Expression Syntax

+Predefined Syntaxes

+===================

+ If you're programming with Regex, you can set a pattern buffer's

+(*note GNU Pattern Buffers::., and *Note POSIX Pattern Buffers::)

+`syntax' field either to an arbitrary combination of syntax bits (*note

+Syntax Bits::.) or else to the configurations defined by Regex. These

+configurations define the syntaxes used by certain programs--GNU Emacs,

+POSIX Awk, traditional Awk, Grep, Egrep--in addition to syntaxes for

+POSIX basic and extended regular expressions.

+ The predefined syntaxes-taken directly from `regex.h'--are:

+ #define RE_SYNTAX_EMACS 0

+ #define RE_SYNTAX_AWK \

+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \

+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \

+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \

+ | RE_UNMATCHED_RIGHT_PAREN_ORD)

+ #define RE_SYNTAX_POSIX_AWK \

+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)

+ #define RE_SYNTAX_GREP \

+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \

+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \

+ | RE_NEWLINE_ALT)

+ #define RE_SYNTAX_EGREP \

+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \

+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \

+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \

+ | RE_NO_BK_VBAR)

+ #define RE_SYNTAX_POSIX_EGREP \

+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)

+ /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */

+ #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC

+ #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC

+ /* Syntax bits common to both basic and extended POSIX regex syntax. */

+ #define _RE_SYNTAX_POSIX_COMMON \

+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \

+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)

+ #define RE_SYNTAX_POSIX_BASIC \

+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)

+ /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes

+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this

+ isn't minimal, since other operators, such as \`, aren't disabled. */

+ #define RE_SYNTAX_POSIX_MINIMAL_BASIC \

+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)

+ #define RE_SYNTAX_POSIX_EXTENDED \

+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \

+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \

+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \

+ | RE_UNMATCHED_RIGHT_PAREN_ORD)

+ /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS

+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */

+ #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \

+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \

+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \

+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \

+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)

+File: regex.info, Node: Collating Elements vs. Characters, Next: The Backslash Character, Prev: Predefined Syntaxes, Up: Regular Expression Syntax

+Collating Elements vs. Characters

+=================================

+ POSIX generalizes the notion of a character to that of a collating

+element. It defines a "collating element" to be "a sequence of one or

+more bytes defined in the current collating sequence as a unit of

+collation."

+ This generalizes the notion of a character in two ways. First, a

+single character can map into two or more collating elements. For

+example, the German "es-zet" collates as the collating element `s'

+followed by another collating element `s'. Second, two or more

+characters can map into one collating element. For example, the

+Spanish `ll' collates after `l' and before `m'.

+ Since POSIX's "collating element" preserves the essential idea of a

+"character," we use the latter, more familiar, term in this document.

+File: regex.info, Node: The Backslash Character, Prev: Collating Elements vs. Characters, Up: Regular Expression Syntax

+The Backslash Character

+=======================

+ The `\' character has one of four different meanings, depending on

+the context in which you use it and what syntax bits are set (*note

+Syntax Bits::.). It can: 1) stand for itself, 2) quote the next

+character, 3) introduce an operator, or 4) do nothing.

+ 1. It stands for itself inside a list (*note List Operators::.) if

+ the syntax bit `RE_BACKSLASH_ESCAPE_IN_LISTS' is not set. For

+ example, `[\]' would match `\'.

+ 2. It quotes (makes ordinary, if it's special) the next character

+ when you use it either:

+ * outside a list,(1) or

+ * inside a list and the syntax bit

+ `RE_BACKSLASH_ESCAPE_IN_LISTS' is set.

+ 3. It introduces an operator when followed by certain ordinary

+ characters--sometimes only when certain syntax bits are set. See

+ the cases `RE_BK_PLUS_QM', `RE_NO_BK_BRACES', `RE_NO_BK_VAR',

+ `RE_NO_BK_PARENS', `RE_NO_BK_REF' in *Note Syntax Bits::. Also:

+ * `\b' represents the match-word-boundary operator (*note

+ Match-word-boundary Operator::.).

+ * `\B' represents the match-within-word operator (*note

+ Match-within-word Operator::.).

+ * `\<' represents the match-beginning-of-word operator

+ (*note Match-beginning-of-word Operator::.).

+ * `\>' represents the match-end-of-word operator (*note

+ Match-end-of-word Operator::.).

+ * `\w' represents the match-word-constituent operator (*note

+ Match-word-constituent Operator::.).

+ * `\W' represents the match-non-word-constituent operator

+ (*note Match-non-word-constituent Operator::.).

+ * `\`' represents the match-beginning-of-buffer operator and

+ `\'' represents the match-end-of-buffer operator (*note

+ Buffer Operators::.).

+ * If Regex was compiled with the C preprocessor symbol `emacs'

+ defined, then `\sCLASS' represents the match-syntactic-class

+ operator and `\SCLASS' represents the

+ match-not-syntactic-class operator (*note Syntactic Class

+ Operators::.).

+ 4. In all other cases, Regex ignores `\'. For example, `\n' matches

+ `n'.

+ ---------- Footnotes ----------

+ (1) Sometimes you don't have to explicitly quote special characters

+to make them ordinary. For instance, most characters lose any special

+meaning inside a list (*note List Operators::.). In addition, if the

+syntax bits `RE_CONTEXT_INVALID_OPS' and `RE_CONTEXT_INDEP_OPS' aren't

+set, then (for historical reasons) the matcher considers special

+characters ordinary if they are in contexts where the operations they

+represent make no sense; for example, then the match-zero-or-more

+operator (represented by `*') matches itself in the regular expression

+`*foo' because there is no preceding expression on which it can

+operate. It is poor practice, however, to depend on this behavior; if

+you want a special character to be ordinary outside a list, it's better

+to always quote it, regardless.

+File: regex.info, Node: Common Operators, Next: GNU Operators, Prev: Regular Expression Syntax, Up: Top

+Common Operators

+****************

+ You compose regular expressions from operators. In the following

+sections, we describe the regular expression operators specified by

+POSIX; GNU also uses these. Most operators have more than one

+representation as characters. *Note Regular Expression Syntax::, for

+what characters represent what operators under what circumstances.

+ For most operators that can be represented in two ways, one

+representation is a single character and the other is that character

+preceded by `\'. For example, either `(' or `\(' represents the

+open-group operator. Which one does depends on the setting of a syntax

+bit, in this case `RE_NO_BK_PARENS'. Why is this so? Historical

+reasons dictate some of the varying representations, while POSIX

+dictates others.

+ Finally, almost all characters lose any special meaning inside a list

+(*note List Operators::.).

+* Menu:

+* Match-self Operator:: Ordinary characters.

+* Match-any-character Operator:: .

+* Concatenation Operator:: Juxtaposition.

+* Repetition Operators:: * + ? {}

+* Alternation Operator:: |

+* List Operators:: [...] [^...]

+* Grouping Operators:: (...)

+* Back-reference Operator:: \digit

+* Anchoring Operators:: ^ $

+File: regex.info, Node: Match-self Operator, Next: Match-any-character Operator, Up: Common Operators

+The Match-self Operator (ORDINARY CHARACTER)

+============================================

+ This operator matches the character itself. All ordinary characters

+(*note Regular Expression Syntax::.) represent this operator. For

+example, `f' is always an ordinary character, so the regular expression

+`f' matches only the string `f'. In particular, it does *not* match

+the string `ff'.

+File: regex.info, Node: Match-any-character Operator, Next: Concatenation Operator, Prev: Match-self Operator, Up: Common Operators

+The Match-any-character Operator (`.')

+======================================

+ This operator matches any single printing or nonprinting character

+except it won't match a:

+newline

+ if the syntax bit `RE_DOT_NEWLINE' isn't set.

+null

+ if the syntax bit `RE_DOT_NOT_NULL' is set.

+ The `.' (period) character represents this operator. For example,

+`a.b' matches any three-character string beginning with `a' and ending

+with `b'.

+File: regex.info, Node: Concatenation Operator, Next: Repetition Operators, Prev: Match-any-character Operator, Up: Common Operators

+The Concatenation Operator

+==========================

+ This operator concatenates two regular expressions A and B. No

+character represents this operator; you simply put B after A. The

+result is a regular expression that will match a string if A matches

+its first part and B matches the rest. For example, `xy' (two

+match-self operators) matches `xy'.

+File: regex.info, Node: Repetition Operators, Next: Alternation Operator, Prev: Concatenation Operator, Up: Common Operators

+Repetition Operators

+====================

+ Repetition operators repeat the preceding regular expression a

+specified number of times.

+* Menu:

+* Match-zero-or-more Operator:: *

+* Match-one-or-more Operator:: +

+* Match-zero-or-one Operator:: ?

+* Interval Operators:: {}

+File: regex.info, Node: Match-zero-or-more Operator, Next: Match-one-or-more Operator, Up: Repetition Operators

+The Match-zero-or-more Operator (`*')

+-------------------------------------

+ This operator repeats the smallest possible preceding regular

+expression as many times as necessary (including zero) to match the

+pattern. `*' represents this operator. For example, `o*' matches any

+string made up of zero or more `o's. Since this operator operates on

+the smallest preceding regular expression, `fo*' has a repeating `o',

+not a repeating `fo'. So, `fo*' matches `f', `fo', `foo', and so on.

+ Since the match-zero-or-more operator is a suffix operator, it may be

+useless as such when no regular expression precedes it. This is the

+case when it:

+ * is first in a regular expression, or

+ * follows a match-beginning-of-line, open-group, or alternation

+ operator.

+Three different things can happen in these cases:

+ 1. If the syntax bit `RE_CONTEXT_INVALID_OPS' is set, then the

+ regular expression is invalid.

+ 2. If `RE_CONTEXT_INVALID_OPS' isn't set, but `RE_CONTEXT_INDEP_OPS'

+ is, then `*' represents the match-zero-or-more operator (which

+ then operates on the empty string).

+ 3. Otherwise, `*' is ordinary.

+ The matcher processes a match-zero-or-more operator by first matching

+as many repetitions of the smallest preceding regular expression as it

+can. Then it continues to match the rest of the pattern.

+ If it can't match the rest of the pattern, it backtracks (as many

+times as necessary), each time discarding one of the matches until it

+can either match the entire pattern or be certain that it cannot get a

+match. For example, when matching `ca*ar' against `caaar', the matcher

+first matches all three `a's of the string with the `a*' of the regular

+expression. However, it cannot then match the final `ar' of the

+regular expression against the final `r' of the string. So it

+backtracks, discarding the match of the last `a' in the string. It can

+then match the remaining `ar'.

+File: regex.info, Node: Match-one-or-more Operator, Next: Match-zero-or-one Operator, Prev: Match-zero-or-more Operator, Up: Repetition Operators

+The Match-one-or-more Operator (`+' or `\+')

+--------------------------------------------

+ If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't

+recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM'

+isn't set, then `+' represents this operator; if it is, then `\+' does.

+ This operator is similar to the match-zero-or-more operator except

+that it repeats the preceding regular expression at least once; *note

+Match-zero-or-more Operator::., for what it operates on, how some

+syntax bits affect it, and how Regex backtracks to match it.

+ For example, supposing that `+' represents the match-one-or-more

+operator; then `ca+r' matches, e.g., `car' and `caaaar', but not `cr'.

+File: regex.info, Node: Match-zero-or-one Operator, Next: Interval Operators, Prev: Match-one-or-more Operator, Up: Repetition Operators

+The Match-zero-or-one Operator (`?' or `\?')

+--------------------------------------------

+ If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't

+recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM'

+isn't set, then `?' represents this operator; if it is, then `\?' does.

+ This operator is similar to the match-zero-or-more operator except

+that it repeats the preceding regular expression once or not at all;

+*note Match-zero-or-more Operator::., to see what it operates on, how

+some syntax bits affect it, and how Regex backtracks to match it.

+ For example, supposing that `?' represents the match-zero-or-one

+operator; then `ca?r' matches both `car' and `cr', but nothing else.

+File: regex.info, Node: Interval Operators, Prev: Match-zero-or-one Operator, Up: Repetition Operators

+Interval Operators (`{' ... `}' or `\{' ... `\}')

+-------------------------------------------------

+ If the syntax bit `RE_INTERVALS' is set, then Regex recognizes

+"interval expressions". They repeat the smallest possible preceding

+regular expression a specified number of times.

+ If the syntax bit `RE_NO_BK_BRACES' is set, `{' represents the

+"open-interval operator" and `}' represents the "close-interval

+operator" ; otherwise, `\{' and `\}' do.

+ Specifically, supposing that `{' and `}' represent the open-interval

+and close-interval operators; then:

+`{COUNT}'

+ matches exactly COUNT occurrences of the preceding regular

+ expression.

+`{MIN,}'

+ matches MIN or more occurrences of the preceding regular

+ expression.

+`{MIN, MAX}'

+ matches at least MIN but no more than MAX occurrences of the

+ preceding regular expression.

+ The interval expression (but not necessarily the regular expression

+that contains it) is invalid if:

+ * MIN is greater than MAX, or

+ * any of COUNT, MIN, or MAX are outside the range zero to

+ `RE_DUP_MAX' (which symbol `regex.h' defines).

+ If the interval expression is invalid and the syntax bit

+`RE_NO_BK_BRACES' is set, then Regex considers all the characters in

+the would-be interval to be ordinary. If that bit isn't set, then the

+regular expression is invalid.

+ If the interval expression is valid but there is no preceding regular

+expression on which to operate, then if the syntax bit

+`RE_CONTEXT_INVALID_OPS' is set, the regular expression is invalid. If

+that bit isn't set, then Regex considers all the characters--other than

+backslashes, which it ignores--in the would-be interval to be ordinary.

+File: regex.info, Node: Alternation Operator, Next: List Operators, Prev: Repetition Operators, Up: Common Operators

+The Alternation Operator (`|' or `\|')

+======================================

+ If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't

+recognize this operator. Otherwise, if the syntax bit `RE_NO_BK_VBAR'

+is set, then `|' represents this operator; otherwise, `\|' does.

+ Alternatives match one of a choice of regular expressions: if you put

+the character(s) representing the alternation operator between any two

+regular expressions A and B, the result matches the union of the

+strings that A and B match. For example, supposing that `|' is the

+alternation operator, then `foo|bar|quux' would match any of `foo',

+`bar' or `quux'.

+ The alternation operator operates on the *largest* possible

+surrounding regular expressions. (Put another way, it has the lowest

+precedence of any regular expression operator.) Thus, the only way you

+can delimit its arguments is to use grouping. For example, if `(' and

+`)' are the open and close-group operators, then `fo(o|b)ar' would

+match either `fooar' or `fobar'. (`foo|bar' would match `foo' or

+`bar'.)

+ The matcher usually tries all combinations of alternatives so as to

+match the longest possible string. For example, when matching

+`(fooq|foo)*(qbarquux|bar)' against `fooqbarquux', it cannot take, say,

+the first ("depth-first") combination it could match, since then it

+would be content to match just `fooqbar'.

+File: regex.info, Node: List Operators, Next: Grouping Operators, Prev: Alternation Operator, Up: Common Operators

+List Operators (`[' ... `]' and `[^' ... `]')

+=============================================

+ "Lists", also called "bracket expressions", are a set of one or more

+items. An "item" is a character, a character class expression, or a

+range expression. The syntax bits affect which kinds of items you can

+put in a list. We explain the last two items in subsections below.

+Empty lists are invalid.

+ A "matching list" matches a single character represented by one of

+the list items. You form a matching list by enclosing one or more items

+within an "open-matching-list operator" (represented by `[') and a

+"close-list operator" (represented by `]').

+ For example, `[ab]' matches either `a' or `b'. `[ad]*' matches the

+empty string and any string composed of just `a's and `d's in any

+order. Regex considers invalid a regular expression with a `[' but no

+matching `]'.

+ "Nonmatching lists" are similar to matching lists except that they

+match a single character *not* represented by one of the list items.

+You use an "open-nonmatching-list operator" (represented by `[^'(1))

+instead of an open-matching-list operator to start a nonmatching list.

+ For example, `[^ab]' matches any character except `a' or `b'.

+ If the `posix_newline' field in the pattern buffer (*note GNU Pattern

+Buffers::. is set, then nonmatching lists do not match a newline.

+ Most characters lose any special meaning inside a list. The special

+characters inside a list follow.

+`]'

+ ends the list if it's not the first list item. So, if you want to

+ make the `]' character a list item, you must put it first.

+`\'

+ quotes the next character if the syntax bit

+ `RE_BACKSLASH_ESCAPE_IN_LISTS' is set.

+`[:'

+ represents the open-character-class operator (*note Character

+ Class Operators::.) if the syntax bit `RE_CHAR_CLASSES' is set and

+ what follows is a valid character class expression.

+`:]'

+ represents the close-character-class operator if the syntax bit

+ `RE_CHAR_CLASSES' is set and what precedes it is an

+ open-character-class operator followed by a valid character class

+ name.

+`-'

+ represents the range operator (*note Range Operator::.) if it's

+ not first or last in a list or the ending point of a range.

+All other characters are ordinary. For example, `[.*]' matches `.' and

+`*'.

+* Menu:

+* Character Class Operators:: [:class:]

+* Range Operator:: start-end

+ ---------- Footnotes ----------

+ (1) Regex therefore doesn't consider the `^' to be the first

+character in the list. If you put a `^' character first in (what you

+think is) a matching list, you'll turn it into a nonmatching list.

+File: regex.info, Node: Character Class Operators, Next: Range Operator, Up: List Operators

+Character Class Operators (`[:' ... `:]')

+-----------------------------------------

+ If the syntax bit `RE_CHARACTER_CLASSES' is set, then Regex

+recognizes character class expressions inside lists. A "character

+class expression" matches one character from a given class. You form a

+character class expression by putting a character class name between an

+"open-character-class operator" (represented by `[:') and a

+"close-character-class operator" (represented by `:]'). The character

+class names and their meanings are:

+`alnum'

+ letters and digits

+`alpha'

+ letters

+`blank'

+ system-dependent; for GNU, a space or tab

+`cntrl'

+ control characters (in the ASCII encoding, code 0177 and codes

+ less than 040)

+`digit'

+ digits

+`graph'

+ same as `print' except omits space

+`lower'

+ lowercase letters

+`print'

+ printable characters (in the ASCII encoding, space tilde--codes

+ 040 through 0176)

+`punct'

+ neither control nor alphanumeric characters

+`space'

+ space, carriage return, newline, vertical tab, and form feed

+`upper'

+ uppercase letters

+`xdigit'

+ hexadecimal digits: `0'-`9', `a'-`f', `A'-`F'

+These correspond to the definitions in the C library's `<ctype.h>'

+facility. For example, `[:alpha:]' corresponds to the standard

+facility `isalpha'. Regex recognizes character class expressions only

+inside of lists; so `[[:alpha:]]' matches any letter, but `[:alpha:]'

+outside of a bracket expression and not followed by a repetition

+operator matches just itself.

+File: regex.info, Node: Range Operator, Prev: Character Class Operators, Up: List Operators

+The Range Operator (`-')

+------------------------

+ Regex recognizes "range expressions" inside a list. They represent

+those characters that fall between two elements in the current

+collating sequence. You form a range expression by putting a "range

+operator" between two characters.(1) `-' represents the range operator.

+For example, `a-f' within a list represents all the characters from `a'

+through `f' inclusively.

+ If the syntax bit `RE_NO_EMPTY_RANGES' is set, then if the range's

+ending point collates less than its starting point, the range (and the

+regular expression containing it) is invalid. For example, the regular

+expression `[z-a]' would be invalid. If this bit isn't set, then Regex

+considers such a range to be empty.

+ Since `-' represents the range operator, if you want to make a `-'

+character itself a list item, you must do one of the following:

+ * Put the `-' either first or last in the list.

+ * Include a range whose starting point collates strictly lower than

+ `-' and whose ending point collates equal or higher. Unless a

+ range is the first item in a list, a `-' can't be its starting

+ point, but *can* be its ending point. That is because Regex

+ considers `-' to be the range operator unless it is preceded by

+ another `-'. For example, in the ASCII encoding, `)', `*', `+',

+ `,', `-', `.', and `/' are contiguous characters in the collating

+ sequence. You might think that `[)-+--/]' has two ranges: `)-+'

+ and `--/'. Rather, it has the ranges `)-+' and `+--', plus the

+ character `/', so it matches, e.g., `,', not `.'.

+ * Put a range whose starting point is `-' first in the list.

+ For example, `[-a-z]' matches a lowercase letter or a hyphen (in

+English, in ASCII).

+ ---------- Footnotes ----------

+ (1) You can't use a character class for the starting or ending point

+of a range, since a character class is not a single character.

+File: regex.info, Node: Grouping Operators, Next: Back-reference Operator, Prev: List Operators, Up: Common Operators

+Grouping Operators (`(' ... `)' or `$' ... `$')

+=================================================

+ A "group", also known as a "subexpression", consists of an

+"open-group operator", any number of other operators, and a

+"close-group operator". Regex treats this sequence as a unit, just as

+mathematics and programming languages treat a parenthesized expression

+as a unit.

+ Therefore, using "groups", you can:

+ * delimit the argument(s) to an alternation operator (*note

+ Alternation Operator::.) or a repetition operator (*note

+ Repetition Operators::.).

+ * keep track of the indices of the substring that matched a given

+ group. *Note Using Registers::, for a precise explanation. This

+ lets you:

+ * use the back-reference operator (*note Back-reference

+ Operator::.).

+ * use registers (*note Using Registers::.).

+ If the syntax bit `RE_NO_BK_PARENS' is set, then `(' represents the

+open-group operator and `)' represents the close-group operator;

+otherwise, `$' and `$' do.

+ If the syntax bit `RE_UNMATCHED_RIGHT_PAREN_ORD' is set and a

+close-group operator has no matching open-group operator, then Regex

+considers it to match `)'.

+File: regex.info, Node: Back-reference Operator, Next: Anchoring Operators, Prev: Grouping Operators, Up: Common Operators

+The Back-reference Operator ("\"DIGIT)

+======================================

+ If the syntax bit `RE_NO_BK_REF' isn't set, then Regex recognizes

+back references. A back reference matches a specified preceding group.

+The back reference operator is represented by `\DIGIT' anywhere after

+the end of a regular expression's DIGIT-th group (*note Grouping

+Operators::.).

+ DIGIT must be between `1' and `9'. The matcher assigns numbers 1

+through 9 to the first nine groups it encounters. By using one of `\1'

+through `\9' after the corresponding group's close-group operator, you

+can match a substring identical to the one that the group does.

+ Back references match according to the following (in all examples

+below, `(' represents the open-group, `)' the close-group, `{' the

+open-interval and `}' the close-interval operator):

+ * If the group matches a substring, the back reference matches an

+ identical substring. For example, `(a)\1' matches `aa' and

+ `(bana)na\1bo\1' matches `bananabanabobana'. Likewise, `(.*)\1'

+ matches any (newline-free if the syntax bit `RE_DOT_NEWLINE' isn't

+ set) string that is composed of two identical halves; the `(.*)'

+ matches the first half and the `\1' matches the second half.

+ * If the group matches more than once (as it might if followed by,

+ e.g., a repetition operator), then the back reference matches the

+ substring the group *last* matched. For example, `((a*)b)*\1\2'

+ matches `aabababa'; first group 1 (the outer one) matches `aab'

+ and group 2 (the inner one) matches `aa'. Then group 1 matches

+ `ab' and group 2 matches `a'. So, `\1' matches `ab' and `\2'

+ matches `a'.

+ * If the group doesn't participate in a match, i.e., it is part of an

+ alternative not taken or a repetition operator allows zero

+ repetitions of it, then the back reference makes the whole match

+ fail. For example, `(one()|two())-and-(three\2|four\3)' matches

+ `one-and-three' and `two-and-four', but not `one-and-four' or

+ `two-and-three'. For example, if the pattern matches `one-and-',

+ then its group 2 matches the empty string and its group 3 doesn't

+ participate in the match. So, if it then matches `four', then

+ when it tries to back reference group 3--which it will attempt to

+ do because `\3' follows the `four'--the match will fail because

+ group 3 didn't participate in the match.

+ You can use a back reference as an argument to a repetition operator.

+For example, `(a(b))\2*' matches `a' followed by two or more `b's.

+Similarly, `(a(b))\2{3}' matches `abbbb'.

+ If there is no preceding DIGIT-th subexpression, the regular

+expression is invalid.

+File: regex.info, Node: Anchoring Operators, Prev: Back-reference Operator, Up: Common Operators

+Anchoring Operators

+===================

+ These operators can constrain a pattern to match only at the

+beginning or end of the entire string or at the beginning or end of a

+line.

+* Menu:

+* Match-beginning-of-line Operator:: ^

+* Match-end-of-line Operator:: $

+File: regex.info, Node: Match-beginning-of-line Operator, Next: Match-end-of-line Operator, Up: Anchoring Operators

+The Match-beginning-of-line Operator (`^')

+------------------------------------------

+ This operator can match the empty string either at the beginning of

+the string or after a newline character. Thus, it is said to "anchor"

+the pattern to the beginning of a line.

+ In the cases following, `^' represents this operator. (Otherwise,

+`^' is ordinary.)

+ * It (the `^') is first in the pattern, as in `^foo'.

+ * The syntax bit `RE_CONTEXT_INDEP_ANCHORS' is set, and it is outside

+ a bracket expression.

+ * It follows an open-group or alternation operator, as in `a$^b$'

+ and `a\|^b'. *Note Grouping Operators::, and *Note Alternation

+ Operator::.

+ These rules imply that some valid patterns containing `^' cannot be

+matched; for example, `foo^bar' if `RE_CONTEXT_INDEP_ANCHORS' is set.

+ If the `not_bol' field is set in the pattern buffer (*note GNU

+Pattern Buffers::.), then `^' fails to match at the beginning of the

+string. *Note POSIX Matching::, for when you might find this useful.

+ If the `newline_anchor' field is set in the pattern buffer, then `^'

+fails to match after a newline. This is useful when you do not regard

+the string to be matched as broken into lines.

+File: regex.info, Node: Match-end-of-line Operator, Prev: Match-beginning-of-line Operator, Up: Anchoring Operators

+The Match-end-of-line Operator (`$')

+------------------------------------

+ This operator can match the empty string either at the end of the

+string or before a newline character in the string. Thus, it is said

+to "anchor" the pattern to the end of a line.

+ It is always represented by `$'. For example, `foo$' usually

+matches, e.g., `foo' and, e.g., the first three characters of

+`foo\nbar'.

+ Its interaction with the syntax bits and pattern buffer fields is

+exactly the dual of `^''s; see the previous section. (That is,

+"beginning" becomes "end", "next" becomes "previous", and "after"

+becomes "before".)

+File: regex.info, Node: GNU Operators, Next: GNU Emacs Operators, Prev: Common Operators, Up: Top

+GNU Operators

+*************

+ Following are operators that GNU defines (and POSIX doesn't).

+* Menu:

+* Word Operators::

+* Buffer Operators::

+File: regex.info, Node: Word Operators, Next: Buffer Operators, Up: GNU Operators

+Word Operators

+==============

+ The operators in this section require Regex to recognize parts of

+words. Regex uses a syntax table to determine whether or not a

+character is part of a word, i.e., whether or not it is

+"word-constituent".

+* Menu:

+* Non-Emacs Syntax Tables::

+* Match-word-boundary Operator:: \b

+* Match-within-word Operator:: \B

+* Match-beginning-of-word Operator:: \<

+* Match-end-of-word Operator:: \>

+* Match-word-constituent Operator:: \w

+* Match-non-word-constituent Operator:: \W

+File: regex.info, Node: Non-Emacs Syntax Tables, Next: Match-word-boundary Operator, Up: Word Operators

+Non-Emacs Syntax Tables

+-----------------------

+ A "syntax table" is an array indexed by the characters in your

+character set. In the ASCII encoding, therefore, a syntax table has

+256 elements. Regex always uses a `char *' variable `re_syntax_table'

+as its syntax table. In some cases, it initializes this variable and

+in others it expects you to initialize it.

+ * If Regex is compiled with the preprocessor symbols `emacs' and

+ `SYNTAX_TABLE' both undefined, then Regex allocates

+ `re_syntax_table' and initializes an element I either to `Sword'

+ (which it defines) if I is a letter, number, or `_', or to zero if

+ it's not.

+ * If Regex is compiled with `emacs' undefined but `SYNTAX_TABLE'

+ defined, then Regex expects you to define a `char *' variable

+ `re_syntax_table' to be a valid syntax table.

+ * *Note Emacs Syntax Tables::, for what happens when Regex is

+ compiled with the preprocessor symbol `emacs' defined.

+File: regex.info, Node: Match-word-boundary Operator, Next: Match-within-word Operator, Prev: Non-Emacs Syntax Tables, Up: Word Operators

+The Match-word-boundary Operator (`\b')

+---------------------------------------

+ This operator (represented by `\b') matches the empty string at

+either the beginning or the end of a word. For example, `\brat\b'

+matches the separate word `rat'.

+File: regex.info, Node: Match-within-word Operator, Next: Match-beginning-of-word Operator, Prev: Match-word-boundary Operator, Up: Word Operators

+The Match-within-word Operator (`\B')

+-------------------------------------

+ This operator (represented by `\B') matches the empty string within a

+word. For example, `c\Brat\Be' matches `crate', but `dirty \Brat'

+doesn't match `dirty rat'.

+File: regex.info, Node: Match-beginning-of-word Operator, Next: Match-end-of-word Operator, Prev: Match-within-word Operator, Up: Word Operators

+The Match-beginning-of-word Operator (`\<')

+-------------------------------------------

+ This operator (represented by `\<') matches the empty string at the

+beginning of a word.

+File: regex.info, Node: Match-end-of-word Operator, Next: Match-word-constituent Operator, Prev: Match-beginning-of-word Operator, Up: Word Operators

+The Match-end-of-word Operator (`\>')

+-------------------------------------

+ This operator (represented by `\>') matches the empty string at the

+end of a word.

+File: regex.info, Node: Match-word-constituent Operator, Next: Match-non-word-constituent Operator, Prev: Match-end-of-word Operator, Up: Word Operators

+The Match-word-constituent Operator (`\w')

+------------------------------------------

+ This operator (represented by `\w') matches any word-constituent

+character.

+File: regex.info, Node: Match-non-word-constituent Operator, Prev: Match-word-constituent Operator, Up: Word Operators

+The Match-non-word-constituent Operator (`\W')

+----------------------------------------------

+ This operator (represented by `\W') matches any character that is not

+word-constituent.

+File: regex.info, Node: Buffer Operators, Prev: Word Operators, Up: GNU Operators

+Buffer Operators

+================

+ Following are operators which work on buffers. In Emacs, a "buffer"

+is, naturally, an Emacs buffer. For other programs, Regex considers the

+entire string to be matched as the buffer.

+* Menu:

+* Match-beginning-of-buffer Operator:: \`

+* Match-end-of-buffer Operator:: \'

+File: regex.info, Node: Match-beginning-of-buffer Operator, Next: Match-end-of-buffer Operator, Up: Buffer Operators

+The Match-beginning-of-buffer Operator (`\`')

+---------------------------------------------

+ This operator (represented by `\`') matches the empty string at the

+beginning of the buffer.

+File: regex.info, Node: Match-end-of-buffer Operator, Prev: Match-beginning-of-buffer Operator, Up: Buffer Operators

+The Match-end-of-buffer Operator (`\'')

+---------------------------------------

+ This operator (represented by `\'') matches the empty string at the

+end of the buffer.

+File: regex.info, Node: GNU Emacs Operators, Next: What Gets Matched?, Prev: GNU Operators, Up: Top

+GNU Emacs Operators

+*******************

+ Following are operators that GNU defines (and POSIX doesn't) that you

+can use only when Regex is compiled with the preprocessor symbol

+`emacs' defined.

+* Menu:

+* Syntactic Class Operators::

+File: regex.info, Node: Syntactic Class Operators, Up: GNU Emacs Operators

+Syntactic Class Operators

+=========================

+ The operators in this section require Regex to recognize the syntactic

+classes of characters. Regex uses a syntax table to determine this.

+* Menu:

+* Emacs Syntax Tables::

+* Match-syntactic-class Operator:: \sCLASS

+* Match-not-syntactic-class Operator:: \SCLASS

+File: regex.info, Node: Emacs Syntax Tables, Next: Match-syntactic-class Operator, Up: Syntactic Class Operators

+Emacs Syntax Tables

+-------------------

+ A "syntax table" is an array indexed by the characters in your

+character set. In the ASCII encoding, therefore, a syntax table has

+256 elements.

+ If Regex is compiled with the preprocessor symbol `emacs' defined,

+then Regex expects you to define and initialize the variable

+`re_syntax_table' to be an Emacs syntax table. Emacs' syntax tables

+are more complicated than Regex's own (*note Non-Emacs Syntax

+Tables::.). *Note Syntax: (emacs)Syntax, for a description of Emacs'

+syntax tables.

+File: regex.info, Node: Match-syntactic-class Operator, Next: Match-not-syntactic-class Operator, Prev: Emacs Syntax Tables, Up: Syntactic Class Operators

+The Match-syntactic-class Operator (`\s'CLASS)

+----------------------------------------------

+ This operator matches any character whose syntactic class is

+represented by a specified character. `\sCLASS' represents this

+operator where CLASS is the character representing the syntactic class

+you want. For example, `w' represents the syntactic class of

+word-constituent characters, so `\sw' matches any word-constituent

+character.

+File: regex.info, Node: Match-not-syntactic-class Operator, Prev: Match-syntactic-class Operator, Up: Syntactic Class Operators

+The Match-not-syntactic-class Operator (`\S'CLASS)

+--------------------------------------------------

+ This operator is similar to the match-syntactic-class operator except

+that it matches any character whose syntactic class is *not*

+represented by the specified character. `\SCLASS' represents this

+operator. For example, `w' represents the syntactic class of

+word-constituent characters, so `\Sw' matches any character that is not

+word-constituent.

+File: regex.info, Node: What Gets Matched?, Next: Programming with Regex, Prev: GNU Emacs Operators, Up: Top

+What Gets Matched?

+******************

+ Regex usually matches strings according to the "leftmost longest"

+rule; that is, it chooses the longest of the leftmost matches. This

+does not mean that for a regular expression containing subexpressions

+that it simply chooses the longest match for each subexpression, left to

+right; the overall match must also be the longest possible one.

+ For example, `(ac*)(c*d[ac]*)\1' matches `acdacaaa', not `acdac', as

+it would if it were to choose the longest match for the first

+subexpression.

+File: regex.info, Node: Programming with Regex, Next: Copying, Prev: What Gets Matched?, Up: Top

+Programming with Regex

+**********************

+ Here we describe how you use the Regex data structures and functions

+in C programs. Regex has three interfaces: one designed for GNU, one

+compatible with POSIX and one compatible with Berkeley UNIX.

+* Menu:

+* GNU Regex Functions::

+* POSIX Regex Functions::

+* BSD Regex Functions::

+File: regex.info, Node: GNU Regex Functions, Next: POSIX Regex Functions, Up: Programming with Regex

+GNU Regex Functions

+===================

+ If you're writing code that doesn't need to be compatible with either

+POSIX or Berkeley UNIX, you can use these functions. They provide more

+options than the other interfaces.

+* Menu:

+* GNU Pattern Buffers:: The re_pattern_buffer type.

+* GNU Regular Expression Compiling:: re_compile_pattern ()

+* GNU Matching:: re_match ()

+* GNU Searching:: re_search ()

+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()

+* Searching with Fastmaps:: re_compile_fastmap ()

+* GNU Translate Tables:: The `translate' field.

+* Using Registers:: The re_registers type and related fns.

+* Freeing GNU Pattern Buffers:: regfree ()

+File: regex.info, Node: GNU Pattern Buffers, Next: GNU Regular Expression Compiling, Up: GNU Regex Functions

+GNU Pattern Buffers

+-------------------

+ To compile, match, or search for a given regular expression, you must

+supply a pattern buffer. A "pattern buffer" holds one compiled regular

+expression.(1)

+ You can have several different pattern buffers simultaneously, each

+holding a compiled pattern for a different regular expression.

+ `regex.h' defines the pattern buffer `struct' as follows:

+ /* Space that holds the compiled pattern. It is declared as

+ `unsigned char *' because its elements are

+ sometimes used as array indexes. */

+ unsigned char *buffer;

+ /* Number of bytes to which `buffer' points. */

+ unsigned long allocated;

+ /* Number of bytes actually used in `buffer'. */

+ unsigned long used;

+ /* Syntax setting with which the pattern was compiled. */

+ reg_syntax_t syntax;

+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses

+ the fastmap, if there is one, to skip over impossible

+ starting points for matches. */

+ char *fastmap;

+ /* Either a translate table to apply to all characters before

+ comparing them, or zero for no translation. The translation

+ is applied to a pattern when it is compiled and to a string

+ when it is matched. */

+ char *translate;

+ /* Number of subexpressions found by the compiler. */

+ size_t re_nsub;

+ /* Zero if this pattern cannot match the empty string, one else.

+ Well, in truth it's used only in `re_search_2', to see

+ whether or not we should use the fastmap, so we don't set

+ this absolutely perfectly; see `re_compile_fastmap' (the

+ `duplicate' case). */

+ unsigned can_be_null : 1;

+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure

+ for `max (RE_NREGS, re_nsub + 1)' groups.

+ If REGS_REALLOCATE, reallocate space if necessary.

+ If REGS_FIXED, use what's there. */

+ #define REGS_UNALLOCATED 0

+ #define REGS_REALLOCATE 1

+ #define REGS_FIXED 2

+ unsigned regs_allocated : 2;

+ /* Set to zero when `regex_compile' compiles a pattern; set to one

+ by `re_compile_fastmap' if it updates the fastmap. */

+ unsigned fastmap_accurate : 1;

+ /* If set, `re_match_2' does not return information about

+ subexpressions. */

+ unsigned no_sub : 1;

+ /* If set, a beginning-of-line anchor doesn't match at the

+ beginning of the string. */

+ unsigned not_bol : 1;

+ /* Similarly for an end-of-line anchor. */

+ unsigned not_eol : 1;

+ /* If true, an anchor at a newline matches. */

+ unsigned newline_anchor : 1;

+ ---------- Footnotes ----------

+ (1) Regular expressions are also referred to as "patterns," hence

+the name "pattern buffer."

+File: regex.info, Node: GNU Regular Expression Compiling, Next: GNU Matching, Prev: GNU Pattern Buffers, Up: GNU Regex Functions

+GNU Regular Expression Compiling

+--------------------------------

+ In GNU, you can both match and search for a given regular expression.

+To do either, you must first compile it in a pattern buffer (*note GNU

+Pattern Buffers::.).

+ Regular expressions match according to the syntax with which they were

+compiled; with GNU, you indicate what syntax you want by setting the

+variable `re_syntax_options' (declared in `regex.h' and defined in

+`regex.c') before calling the compiling function, `re_compile_pattern'

+(see below). *Note Syntax Bits::, and *Note Predefined Syntaxes::.

+ You can change the value of `re_syntax_options' at any time.

+Usually, however, you set its value once and then never change it.

+ `re_compile_pattern' takes a pattern buffer as an argument. You must

+initialize the following fields:

+`translate initialization'

+`translate'

+ Initialize this to point to a translate table if you want one, or

+ to zero if you don't. We explain translate tables in *Note GNU

+ Translate Tables::.

+`fastmap'

+ Initialize this to nonzero if you want a fastmap, or to zero if you

+ don't.

+`buffer'

+`allocated'

+ If you want `re_compile_pattern' to allocate memory for the

+ compiled pattern, set both of these to zero. If you have an

+ existing block of memory (allocated with `malloc') you want Regex

+ to use, set `buffer' to its address and `allocated' to its size (in

+ bytes).

+ `re_compile_pattern' uses `realloc' to extend the space for the

+ compiled pattern as necessary.

+ To compile a pattern buffer, use:

+ char *

+ re_compile_pattern (const char *REGEX, const int REGEX_SIZE,

+ struct re_pattern_buffer *PATTERN_BUFFER)

+REGEX is the regular expression's address, REGEX_SIZE is its length,

+and PATTERN_BUFFER is the pattern buffer's address.

+ If `re_compile_pattern' successfully compiles the regular expression,

+it returns zero and sets `*PATTERN_BUFFER' to the compiled pattern. It

+sets the pattern buffer's fields as follows:

+`buffer'

+ to the compiled pattern.

+`used'

+ to the number of bytes the compiled pattern in `buffer' occupies.

+`syntax'

+ to the current value of `re_syntax_options'.

+`re_nsub'

+ to the number of subexpressions in REGEX.

+`fastmap_accurate'

+ to zero on the theory that the pattern you're compiling is

+ different than the one previously compiled into `buffer'; in that

+ case (since you can't make a fastmap without a compiled pattern),

+ `fastmap' would either contain an incompatible fastmap, or nothing

+ at all.

+ If `re_compile_pattern' can't compile REGEX, it returns an error

+string corresponding to one of the errors listed in *Note POSIX Regular

+Expression Compiling::.

+File: regex.info, Node: GNU Matching, Next: GNU Searching, Prev: GNU Regular Expression Compiling, Up: GNU Regex Functions

+GNU Matching

+------------

+ Matching the GNU way means trying to match as much of a string as

+possible starting at a position within it you specify. Once you've

+compiled a pattern into a pattern buffer (*note GNU Regular Expression

+Compiling::.), you can ask the matcher to match that pattern against a

+string using:

+ int

+ re_match (struct re_pattern_buffer *PATTERN_BUFFER,

+ const char *STRING, const int SIZE,

+ const int START, struct re_registers *REGS)

+PATTERN_BUFFER is the address of a pattern buffer containing a compiled

+pattern. STRING is the string you want to match; it can contain

+newline and null characters. SIZE is the length of that string. START

+is the string index at which you want to begin matching; the first

+character of STRING is at index zero. *Note Using Registers::, for a

+explanation of REGS; you can safely pass zero.

+ `re_match' matches the regular expression in PATTERN_BUFFER against

+the string STRING according to the syntax in PATTERN_BUFFERS's `syntax'

+field. (*Note GNU Regular Expression Compiling::, for how to set it.)

+The function returns -1 if the compiled pattern does not match any part

+of STRING and -2 if an internal error happens; otherwise, it returns

+how many (possibly zero) characters of STRING the pattern matched.

+ An example: suppose PATTERN_BUFFER points to a pattern buffer

+containing the compiled pattern for `a*', and STRING points to `aaaaab'

+(whereupon SIZE should be 6). Then if START is 2, `re_match' returns 3,

+i.e., `a*' would have matched the last three `a's in STRING. If START

+is 0, `re_match' returns 5, i.e., `a*' would have matched all the `a's

+in STRING. If START is either 5 or 6, it returns zero.

+ If START is not between zero and SIZE, then `re_match' returns -1.

+File: regex.info, Node: GNU Searching, Next: Matching/Searching with Split Data, Prev: GNU Matching, Up: GNU Regex Functions

+GNU Searching

+-------------

+ "Searching" means trying to match starting at successive positions

+within a string. The function `re_search' does this.

+ Before calling `re_search', you must compile your regular expression.

+*Note GNU Regular Expression Compiling::.

+ Here is the function declaration:

+ int

+ re_search (struct re_pattern_buffer *PATTERN_BUFFER,

+ const char *STRING, const int SIZE,

+ const int START, const int RANGE,

+ struct re_registers *REGS)

+whose arguments are the same as those to `re_match' (*note GNU

+Matching::.) except that the two arguments START and RANGE replace

+`re_match''s argument START.

+ If RANGE is positive, then `re_search' attempts a match starting

+first at index START, then at START + 1 if that fails, and so on, up to

+START + RANGE; if RANGE is negative, then it attempts a match starting

+first at index START, then at START -1 if that fails, and so on.

+ If START is not between zero and SIZE, then `re_search' returns -1.

+When RANGE is positive, `re_search' adjusts RANGE so that START + RANGE

+- 1 is between zero and SIZE, if necessary; that way it won't search

+outside of STRING. Similarly, when RANGE is negative, `re_search'

+adjusts RANGE so that START + RANGE + 1 is between zero and SIZE, if

+necessary.

+ If the `fastmap' field of PATTERN_BUFFER is zero, `re_search' matches

+starting at consecutive positions; otherwise, it uses `fastmap' to make

+the search more efficient. *Note Searching with Fastmaps::.

+ If no match is found, `re_search' returns -1. If a match is found,

+it returns the index where the match began. If an internal error

+happens, it returns -2.

+File: regex.info, Node: Matching/Searching with Split Data, Next: Searching with Fastmaps, Prev: GNU Searching, Up: GNU Regex Functions

+Matching and Searching with Split Data

+--------------------------------------

+ Using the functions `re_match_2' and `re_search_2', you can match or

+search in data that is divided into two strings.

+ The function:

+ int

+ re_match_2 (struct re_pattern_buffer *BUFFER,

+ const char *STRING1, const int SIZE1,

+ const char *STRING2, const int SIZE2,

+ const int START,

+ struct re_registers *REGS,

+ const int STOP)

+is similar to `re_match' (*note GNU Matching::.) except that you pass

+*two* data strings and sizes, and an index STOP beyond which you don't

+want the matcher to try matching. As with `re_match', if it succeeds,

+`re_match_2' returns how many characters of STRING it matched. Regard

+STRING1 and STRING2 as concatenated when you set the arguments START and

+STOP and use the contents of REGS; `re_match_2' never returns a value

+larger than SIZE1 + SIZE2.

+ The function:

+ int

+ re_search_2 (struct re_pattern_buffer *BUFFER,

+ const char *STRING1, const int SIZE1,

+ const char *STRING2, const int SIZE2,

+ const int START, const int RANGE,

+ struct re_registers *REGS,

+ const int STOP)

+is similarly related to `re_search'.

+File: regex.info, Node: Searching with Fastmaps, Next: GNU Translate Tables, Prev: Matching/Searching with Split Data, Up: GNU Regex Functions

+Searching with Fastmaps

+-----------------------

+ If you're searching through a long string, you should use a fastmap.

+Without one, the searcher tries to match at consecutive positions in the

+string. Generally, most of the characters in the string could not start

+a match. It takes much longer to try matching at a given position in

+the string than it does to check in a table whether or not the

+character at that position could start a match. A "fastmap" is such a

+table.

+ More specifically, a fastmap is an array indexed by the characters in

+your character set. Under the ASCII encoding, therefore, a fastmap has

+256 elements. If you want the searcher to use a fastmap with a given

+pattern buffer, you must allocate the array and assign the array's

+address to the pattern buffer's `fastmap' field. You either can

+compile the fastmap yourself or have `re_search' do it for you; when

+`fastmap' is nonzero, it automatically compiles a fastmap the first

+time you search using a particular compiled pattern.

+ To compile a fastmap yourself, use:

+ int

+ re_compile_fastmap (struct re_pattern_buffer *PATTERN_BUFFER)

+PATTERN_BUFFER is the address of a pattern buffer. If the character C

+could start a match for the pattern, `re_compile_fastmap' makes

+`PATTERN_BUFFER->fastmap[C]' nonzero. It returns 0 if it can compile a

+fastmap and -2 if there is an internal error. For example, if `|' is

+the alternation operator and PATTERN_BUFFER holds the compiled pattern

+for `a|b', then `re_compile_fastmap' sets `fastmap['a']' and

+`fastmap['b']' (and no others).

+ `re_search' uses a fastmap as it moves along in the string: it checks

+the string's characters until it finds one that's in the fastmap. Then

+it tries matching at that character. If the match fails, it repeats

+the process. So, by using a fastmap, `re_search' doesn't waste time

+trying to match at positions in the string that couldn't start a match.

+ If you don't want `re_search' to use a fastmap, store zero in the

+`fastmap' field of the pattern buffer before calling `re_search'.

+ Once you've initialized a pattern buffer's `fastmap' field, you need

+never do so again--even if you compile a new pattern in it--provided

+the way the field is set still reflects whether or not you want a

+fastmap. `re_search' will still either do nothing if `fastmap' is null

+or, if it isn't, compile a new fastmap for the new pattern.

+File: regex.info, Node: GNU Translate Tables, Next: Using Registers, Prev: Searching with Fastmaps, Up: GNU Regex Functions

+GNU Translate Tables

+--------------------

+ If you set the `translate' field of a pattern buffer to a translate

+table, then the GNU Regex functions to which you've passed that pattern

+buffer use it to apply a simple transformation to all the regular

+expression and string characters at which they look.

+ A "translate table" is an array indexed by the characters in your

+character set. Under the ASCII encoding, therefore, a translate table

+has 256 elements. The array's elements are also characters in your

+character set. When the Regex functions see a character C, they use

+`translate[C]' in its place, with one exception: the character after a

+`\' is not translated. (This ensures that, the operators, e.g., `\B'

+and `\b', are always distinguishable.)

+ For example, a table that maps all lowercase letters to the

+corresponding uppercase ones would cause the matcher to ignore

+differences in case.(1) Such a table would map all characters except

+lowercase letters to themselves, and lowercase letters to the

+corresponding uppercase ones. Under the ASCII encoding, here's how you

+could initialize such a table (we'll call it `case_fold'):

+ for (i = 0; i < 256; i++)

+ case_fold[i] = i;

+ for (i = 'a'; i <= 'z'; i++)

+ case_fold[i] = i - ('a' - 'A');

+ You tell Regex to use a translate table on a given pattern buffer by

+assigning that table's address to the `translate' field of that buffer.

+If you don't want Regex to do any translation, put zero into this

+field. You'll get weird results if you change the table's contents

+anytime between compiling the pattern buffer, compiling its fastmap, and

+matching or searching with the pattern buffer.

+ ---------- Footnotes ----------

+ (1) A table that maps all uppercase letters to the corresponding

+lowercase ones would work just as well for this purpose.

+File: regex.info, Node: Using Registers, Next: Freeing GNU Pattern Buffers, Prev: GNU Translate Tables, Up: GNU Regex Functions

+Using Registers

+---------------

+ A group in a regular expression can match a (posssibly empty)

+substring of the string that regular expression as a whole matched.

+The matcher remembers the beginning and end of the substring matched by

+each group.

+ To find out what they matched, pass a nonzero REGS argument to a GNU

+matching or searching function (*note GNU Matching::. and *Note GNU

+Searching::), i.e., the address of a structure of this type, as defined

+in `regex.h':

+ struct re_registers

+ {

+ unsigned num_regs;

+ regoff_t *start;

+ regoff_t *end;

+ };

+ Except for (possibly) the NUM_REGS'th element (see below), the Ith

+element of the `start' and `end' arrays records information about the

+Ith group in the pattern. (They're declared as C pointers, but this is

+only because not all C compilers accept zero-length arrays;

+conceptually, it is simplest to think of them as arrays.)

+ The `start' and `end' arrays are allocated in various ways, depending

+on the value of the `regs_allocated' field in the pattern buffer passed

+to the matcher.

+ The simplest and perhaps most useful is to let the matcher

+(re)allocate enough space to record information for all the groups in

+the regular expression. If `regs_allocated' is `REGS_UNALLOCATED', the

+matcher allocates 1 + RE_NSUB (another field in the pattern buffer;

+*note GNU Pattern Buffers::.). The extra element is set to -1, and

+sets `regs_allocated' to `REGS_REALLOCATE'. Then on subsequent calls

+with the same pattern buffer and REGS arguments, the matcher

+reallocates more space if necessary.

+ It would perhaps be more logical to make the `regs_allocated' field

+part of the `re_registers' structure, instead of part of the pattern

+buffer. But in that case the caller would be forced to initialize the

+structure before passing it. Much existing code doesn't do this

+initialization, and it's arguably better to avoid it anyway.

+ `re_compile_pattern' sets `regs_allocated' to `REGS_UNALLOCATED', so

+if you use the GNU regular expression functions, you get this behavior

+by default.

+ xx document re_set_registers

+ POSIX, on the other hand, requires a different interface: the caller

+is supposed to pass in a fixed-length array which the matcher fills.

+Therefore, if `regs_allocated' is `REGS_FIXED' the matcher simply fills

+that array.

+ The following examples illustrate the information recorded in the

+`re_registers' structure. (In all of them, `(' represents the

+open-group and `)' the close-group operator. The first character in

+the string STRING is at index 0.)

+ * If the regular expression has an I-th group not contained within

+ another group that matches a substring of STRING, then the

+ function sets `REGS->start[I]' to the index in STRING where the

+ substring matched by the I-th group begins, and `REGS->end[I]' to

+ the index just beyond that substring's end. The function sets

+ `REGS->start[0]' and `REGS->end[0]' to analogous information about

+ the entire pattern.

+ For example, when you match `((a)(b))' against `ab', you get:

+ * 0 in `REGS->start[0]' and 2 in `REGS->end[0]'

+ * 0 in `REGS->start[1]' and 2 in `REGS->end[1]'

+ * 0 in `REGS->start[2]' and 1 in `REGS->end[2]'

+ * 1 in `REGS->start[3]' and 2 in `REGS->end[3]'

+ * If a group matches more than once (as it might if followed by,

+ e.g., a repetition operator), then the function reports the

+ information about what the group *last* matched.

+ For example, when you match the pattern `(a)*' against the string

+ `aa', you get:

+ * 0 in `REGS->start[0]' and 2 in `REGS->end[0]'

+ * 1 in `REGS->start[1]' and 2 in `REGS->end[1]'

+ * If the I-th group does not participate in a successful match,

+ e.g., it is an alternative not taken or a repetition operator

+ allows zero repetitions of it, then the function sets

+ `REGS->start[I]' and `REGS->end[I]' to -1.

+ For example, when you match the pattern `(a)*b' against the string

+ `b', you get:

+ * 0 in `REGS->start[0]' and 1 in `REGS->end[0]'

+ * -1 in `REGS->start[1]' and -1 in `REGS->end[1]'

+ * If the I-th group matches a zero-length string, then the function

+ sets `REGS->start[I]' and `REGS->end[I]' to the index just beyond

+ that zero-length string.

+ For example, when you match the pattern `(a*)b' against the string

+ `b', you get:

+ * 0 in `REGS->start[0]' and 1 in `REGS->end[0]'

+ * 0 in `REGS->start[1]' and 0 in `REGS->end[1]'

+ * If an I-th group contains a J-th group in turn not contained

+ within any other group within group I and the function reports a

+ match of the I-th group, then it records in `REGS->start[J]' and

+ `REGS->end[J]' the last match (if it matched) of the J-th group.

+ For example, when you match the pattern `((a*)b)*' against the

+ string `abb', group 2 last matches the empty string, so you get

+ what it previously matched:

+ * 0 in `REGS->start[0]' and 3 in `REGS->end[0]'

+ * 2 in `REGS->start[1]' and 3 in `REGS->end[1]'

+ * 2 in `REGS->start[2]' and 2 in `REGS->end[2]'

+ When you match the pattern `((a)*b)*' against the string `abb',

+ group 2 doesn't participate in the last match, so you get:

+ * 0 in `REGS->start[0]' and 3 in `REGS->end[0]'

+ * 2 in `REGS->start[1]' and 3 in `REGS->end[1]'

+ * 0 in `REGS->start[2]' and 1 in `REGS->end[2]'

+ * If an I-th group contains a J-th group in turn not contained

+ within any other group within group I and the function sets

+ `REGS->start[I]' and `REGS->end[I]' to -1, then it also sets

+ `REGS->start[J]' and `REGS->end[J]' to -1.

+ For example, when you match the pattern `((a)*b)*c' against the

+ string `c', you get:

+ * 0 in `REGS->start[0]' and 1 in `REGS->end[0]'

+ * -1 in `REGS->start[1]' and -1 in `REGS->end[1]'

+ * -1 in `REGS->start[2]' and -1 in `REGS->end[2]'

+File: regex.info, Node: Freeing GNU Pattern Buffers, Prev: Using Registers, Up: GNU Regex Functions

+Freeing GNU Pattern Buffers

+---------------------------

+ To free any allocated fields of a pattern buffer, you can use the

+POSIX function described in *Note Freeing POSIX Pattern Buffers::,

+since the type `regex_t'--the type for POSIX pattern buffers--is

+equivalent to the type `re_pattern_buffer'. After freeing a pattern

+buffer, you need to again compile a regular expression in it (*note GNU

+Regular Expression Compiling::.) before passing it to a matching or

+searching function.

+File: regex.info, Node: POSIX Regex Functions, Next: BSD Regex Functions, Prev: GNU Regex Functions, Up: Programming with Regex

+POSIX Regex Functions

+=====================

+ If you're writing code that has to be POSIX compatible, you'll need

+to use these functions. Their interfaces are as specified by POSIX,

+draft 1003.2/D11.2.

+* Menu:

+* POSIX Pattern Buffers:: The regex_t type.

+* POSIX Regular Expression Compiling:: regcomp ()

+* POSIX Matching:: regexec ()

+* Reporting Errors:: regerror ()

+* Using Byte Offsets:: The regmatch_t type.

+* Freeing POSIX Pattern Buffers:: regfree ()

+File: regex.info, Node: POSIX Pattern Buffers, Next: POSIX Regular Expression Compiling, Up: POSIX Regex Functions

+POSIX Pattern Buffers

+---------------------

+ To compile or match a given regular expression the POSIX way, you

+must supply a pattern buffer exactly the way you do for GNU (*note GNU

+Pattern Buffers::.). POSIX pattern buffers have type `regex_t', which

+is equivalent to the GNU pattern buffer type `re_pattern_buffer'.

+File: regex.info, Node: POSIX Regular Expression Compiling, Next: POSIX Matching, Prev: POSIX Pattern Buffers, Up: POSIX Regex Functions

+POSIX Regular Expression Compiling

+----------------------------------

+ With POSIX, you can only search for a given regular expression; you

+can't match it. To do this, you must first compile it in a pattern

+buffer, using `regcomp'.

+ To compile a pattern buffer, use:

+ int

+ regcomp (regex_t *PREG, const char *REGEX, int CFLAGS)

+PREG is the initialized pattern buffer's address, REGEX is the regular

+expression's address, and CFLAGS is the compilation flags, which Regex

+considers as a collection of bits. Here are the valid bits, as defined

+in `regex.h':

+`REG_EXTENDED'

+ says to use POSIX Extended Regular Expression syntax; if this isn't

+ set, then says to use POSIX Basic Regular Expression syntax.

+ `regcomp' sets PREG's `syntax' field accordingly.

+`REG_ICASE'

+ says to ignore case; `regcomp' sets PREG's `translate' field to a

+ translate table which ignores case, replacing anything you've put

+ there before.

+`REG_NOSUB'

+ says to set PREG's `no_sub' field; *note POSIX Matching::., for

+ what this means.

+`REG_NEWLINE'

+ says that a:

+ * match-any-character operator (*note Match-any-character

+ Operator::.) doesn't match a newline.

+ * nonmatching list not containing a newline (*note List

+ Operators::.) matches a newline.

+ * match-beginning-of-line operator (*note

+ Match-beginning-of-line Operator::.) matches the empty string

+ immediately after a newline, regardless of how `REG_NOTBOL'

+ is set (*note POSIX Matching::., for an explanation of

+ `REG_NOTBOL').

+ * match-end-of-line operator (*note Match-beginning-of-line

+ Operator::.) matches the empty string immediately before a

+ newline, regardless of how `REG_NOTEOL' is set (*note POSIX

+ Matching::., for an explanation of `REG_NOTEOL').

+ If `regcomp' successfully compiles the regular expression, it returns

+zero and sets `*PATTERN_BUFFER' to the compiled pattern. Except for

+`syntax' (which it sets as explained above), it also sets the same

+fields the same way as does the GNU compiling function (*note GNU

+Regular Expression Compiling::.).

+ If `regcomp' can't compile the regular expression, it returns one of

+the error codes listed here. (Except when noted differently, the

+syntax of in all examples below is basic regular expression syntax.)

+`REG_BADRPT'

+ For example, the consecutive repetition operators `**' in `a**'

+ are invalid. As another example, if the syntax is extended

+ regular expression syntax, then the repetition operator `*' with

+ nothing on which to operate in `*' is invalid.

+`REG_BADBR'

+ For example, the COUNT `-1' in `a\{-1' is invalid.

+`REG_EBRACE'

+ For example, `a\{1' is missing a close-interval operator.

+`REG_EBRACK'

+ For example, `[a' is missing a close-list operator.

+`REG_ERANGE'

+ For example, the range ending point `z' that collates lower than

+ does its starting point `a' in `[z-a]' is invalid. Also, the

+ range with the character class `[:alpha:]' as its starting point in

+ `[[:alpha:]-|]'.

+`REG_ECTYPE'

+ For example, the character class name `foo' in `[[:foo:]' is

+ invalid.

+`REG_EPAREN'

+ For example, `a\)' is missing an open-group operator and `\(a' is

+ missing a close-group operator.

+`REG_ESUBREG'

+ For example, the back reference `\2' that refers to a nonexistent

+ subexpression in `$a$\2' is invalid.

+`REG_EEND'

+ Returned when a regular expression causes no other more specific

+ error.

+`REG_EESCAPE'

+ For example, the trailing backslash `\' in `a\' is invalid, as is

+ the one in `\'.

+`REG_BADPAT'

+ For example, in the extended regular expression syntax, the empty

+ group `()' in `a()b' is invalid.

+`REG_ESIZE'

+ Returned when a regular expression needs a pattern buffer larger

+ than 65536 bytes.

+`REG_ESPACE'

+ Returned when a regular expression makes Regex to run out of

+ memory.

+File: regex.info, Node: POSIX Matching, Next: Reporting Errors, Prev: POSIX Regular Expression Compiling, Up: POSIX Regex Functions

+POSIX Matching

+--------------

+ Matching the POSIX way means trying to match a null-terminated string

+starting at its first character. Once you've compiled a pattern into a

+pattern buffer (*note POSIX Regular Expression Compiling::.), you can

+ask the matcher to match that pattern against a string using:

+ int

+ regexec (const regex_t *PREG, const char *STRING,

+ size_t NMATCH, regmatch_t PMATCH[], int EFLAGS)

+PREG is the address of a pattern buffer for a compiled pattern. STRING

+is the string you want to match.

+ *Note Using Byte Offsets::, for an explanation of PMATCH. If you

+pass zero for NMATCH or you compiled PREG with the compilation flag

+`REG_NOSUB' set, then `regexec' will ignore PMATCH; otherwise, you must

+allocate it to have at least NMATCH elements. `regexec' will record

+NMATCH byte offsets in PMATCH, and set to -1 any unused elements up to

+PMATCH`[NMATCH]' - 1.

+ EFLAGS specifies "execution flags"--namely, the two bits `REG_NOTBOL'

+and `REG_NOTEOL' (defined in `regex.h'). If you set `REG_NOTBOL', then

+the match-beginning-of-line operator (*note Match-beginning-of-line

+Operator::.) always fails to match. This lets you match against pieces

+of a line, as you would need to if, say, searching for repeated

+instances of a given pattern in a line; it would work correctly for

+patterns both with and without match-beginning-of-line operators.

+`REG_NOTEOL' works analogously for the match-end-of-line operator

+(*note Match-end-of-line Operator::.); it exists for symmetry.

+ `regexec' tries to find a match for PREG in STRING according to the

+syntax in PREG's `syntax' field. (*Note POSIX Regular Expression

+Compiling::, for how to set it.) The function returns zero if the

+compiled pattern matches STRING and `REG_NOMATCH' (defined in

+`regex.h') if it doesn't.

+File: regex.info, Node: Reporting Errors, Next: Using Byte Offsets, Prev: POSIX Matching, Up: POSIX Regex Functions

+Reporting Errors

+----------------

+ If either `regcomp' or `regexec' fail, they return a nonzero error

+code, the possibilities for which are defined in `regex.h'. *Note

+POSIX Regular Expression Compiling::, and *Note POSIX Matching::, for

+what these codes mean. To get an error string corresponding to these

+codes, you can use:

+ size_t

+ regerror (int ERRCODE,

+ const regex_t *PREG,

+ char *ERRBUF,

+ size_t ERRBUF_SIZE)

+ERRCODE is an error code, PREG is the address of the pattern buffer

+which provoked the error, ERRBUF is the error buffer, and ERRBUF_SIZE

+is ERRBUF's size.

+ `regerror' returns the size in bytes of the error string

+corresponding to ERRCODE (including its terminating null). If ERRBUF

+and ERRBUF_SIZE are nonzero, it also returns in ERRBUF the first

+ERRBUF_SIZE - 1 characters of the error string, followed by a null.

+eRRBUF_SIZE must be a nonnegative number less than or equal to the size

+in bytes of ERRBUF.

+ You can call `regerror' with a null ERRBUF and a zero ERRBUF_SIZE to

+determine how large ERRBUF need be to accommodate `regerror''s error

+string.

+File: regex.info, Node: Using Byte Offsets, Next: Freeing POSIX Pattern Buffers, Prev: Reporting Errors, Up: POSIX Regex Functions

+Using Byte Offsets

+------------------

+ In POSIX, variables of type `regmatch_t' hold analogous information,

+but are not identical to, GNU's registers (*note Using Registers::.).

+To get information about registers in POSIX, pass to `regexec' a

+nonzero PMATCH of type `regmatch_t', i.e., the address of a structure

+of this type, defined in `regex.h':

+ typedef struct

+ {

+ regoff_t rm_so;

+ regoff_t rm_eo;

+ } regmatch_t;

+ When reading in *Note Using Registers::, about how the matching

+function stores the information into the registers, substitute PMATCH

+for REGS, `PMATCH[I]->rm_so' for `REGS->start[I]' and

+`PMATCH[I]->rm_eo' for `REGS->end[I]'.

+File: regex.info, Node: Freeing POSIX Pattern Buffers, Prev: Using Byte Offsets, Up: POSIX Regex Functions

+Freeing POSIX Pattern Buffers

+-----------------------------

+ To free any allocated fields of a pattern buffer, use:

+ void

+ regfree (regex_t *PREG)

+PREG is the pattern buffer whose allocated fields you want freed.

+`regfree' also sets PREG's `allocated' and `used' fields to zero.

+After freeing a pattern buffer, you need to again compile a regular

+expression in it (*note POSIX Regular Expression Compiling::.) before

+passing it to the matching function (*note POSIX Matching::.).

+File: regex.info, Node: BSD Regex Functions, Prev: POSIX Regex Functions, Up: Programming with Regex

+BSD Regex Functions

+===================

+ If you're writing code that has to be Berkeley UNIX compatible,

+you'll need to use these functions whose interfaces are the same as

+those in Berkeley UNIX.

+* Menu:

+* BSD Regular Expression Compiling:: re_comp ()

+* BSD Searching:: re_exec ()

+File: regex.info, Node: BSD Regular Expression Compiling, Next: BSD Searching, Up: BSD Regex Functions

+BSD Regular Expression Compiling

+--------------------------------

+ With Berkeley UNIX, you can only search for a given regular

+expression; you can't match one. To search for it, you must first

+compile it. Before you compile it, you must indicate the regular

+expression syntax you want it compiled according to by setting the

+variable `re_syntax_options' (declared in `regex.h' to some syntax

+(*note Regular Expression Syntax::.).

+ To compile a regular expression use:

+ char *

+ re_comp (char *REGEX)

+REGEX is the address of a null-terminated regular expression.

+`re_comp' uses an internal pattern buffer, so you can use only the most

+recently compiled pattern buffer. This means that if you want to use a

+given regular expression that you've already compiled--but it isn't the

+latest one you've compiled--you'll have to recompile it. If you call

+`re_comp' with the null string (*not* the empty string) as the

+argument, it doesn't change the contents of the pattern buffer.

+ If `re_comp' successfully compiles the regular expression, it returns

+zero. If it can't compile the regular expression, it returns an error

+string. `re_comp''s error messages are identical to those of

+`re_compile_pattern' (*note GNU Regular Expression Compiling::.).

+File: regex.info, Node: BSD Searching, Prev: BSD Regular Expression Compiling, Up: BSD Regex Functions

+BSD Searching

+-------------

+ Searching the Berkeley UNIX way means searching in a string starting

+at its first character and trying successive positions within it to

+find a match. Once you've compiled a pattern using `re_comp' (*note

+BSD Regular Expression Compiling::.), you can ask Regex to search for

+that pattern in a string using:

+ int

+ re_exec (char *STRING)

+STRING is the address of the null-terminated string in which you want

+to search.

+ `re_exec' returns either 1 for success or 0 for failure. It

+automatically uses a GNU fastmap (*note Searching with Fastmaps::.).

+File: regex.info, Node: Copying, Next: Index, Prev: Programming with Regex, Up: Top

+GNU GENERAL PUBLIC LICENSE

+**************************

+ Version 2, June 1991

+ 675 Mass Ave, Cambridge, MA 02139, USA

+ Everyone is permitted to copy and distribute verbatim copies

+ of this license document, but changing it is not allowed.

+Preamble

+========

+ The licenses for most software are designed to take away your freedom

+to share and change it. By contrast, the GNU General Public License is

+intended to guarantee your freedom to share and change free

+software--to make sure the software is free for all its users. This

+General Public License applies to most of the Free Software

+Foundation's software and to any other program whose authors commit to

+using it. (Some other Free Software Foundation software is covered by

+the GNU Library General Public License instead.) You can apply it to

+your programs, too.

+ When we speak of free software, we are referring to freedom, not

+price. Our General Public Licenses are designed to make sure that you

+have the freedom to distribute copies of free software (and charge for

+this service if you wish), that you receive source code or can get it

+if you want it, that you can change the software or use pieces of it in

+new free programs; and that you know you can do these things.

+ To protect your rights, we need to make restrictions that forbid

+anyone to deny you these rights or to ask you to surrender the rights.

+These restrictions translate to certain responsibilities for you if you

+distribute copies of the software, or if you modify it.

+ For example, if you distribute copies of such a program, whether

+gratis or for a fee, you must give the recipients all the rights that

+you have. You must make sure that they, too, receive or can get the

+source code. And you must show them these terms so they know their

+rights.

+ We protect your rights with two steps: (1) copyright the software, and

+(2) offer you this license which gives you legal permission to copy,

+distribute and/or modify the software.

+ Also, for each author's protection and ours, we want to make certain

+that everyone understands that there is no warranty for this free

+software. If the software is modified by someone else and passed on, we

+want its recipients to know that what they have is not the original, so

+that any problems introduced by others will not reflect on the original

+authors' reputations.

+ Finally, any free program is threatened constantly by software

+patents. We wish to avoid the danger that redistributors of a free

+program will individually obtain patent licenses, in effect making the

+program proprietary. To prevent this, we have made it clear that any

+patent must be licensed for everyone's free use or not licensed at all.

+ The precise terms and conditions for copying, distribution and

+modification follow.

+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION

+ 1. This License applies to any program or other work which contains a

+ notice placed by the copyright holder saying it may be distributed

+ under the terms of this General Public License. The "Program",

+ below, refers to any such program or work, and a "work based on

+ the Program" means either the Program or any derivative work under

+ copyright law: that is to say, a work containing the Program or a

+ portion of it, either verbatim or with modifications and/or

+ translated into another language. (Hereinafter, translation is

+ included without limitation in the term "modification".) Each

+ licensee is addressed as "you".

+ Activities other than copying, distribution and modification are

+ not covered by this License; they are outside its scope. The act

+ of running the Program is not restricted, and the output from the

+ Program is covered only if its contents constitute a work based on

+ the Program (independent of having been made by running the

+ Program). Whether that is true depends on what the Program does.

+ 2. You may copy and distribute verbatim copies of the Program's

+ source code as you receive it, in any medium, provided that you

+ conspicuously and appropriately publish on each copy an appropriate

+ copyright notice and disclaimer of warranty; keep intact all the

+ notices that refer to this License and to the absence of any

+ warranty; and give any other recipients of the Program a copy of

+ this License along with the Program.

+ You may charge a fee for the physical act of transferring a copy,

+ and you may at your option offer warranty protection in exchange

+ for a fee.

+ 3. You may modify your copy or copies of the Program or any portion

+ of it, thus forming a work based on the Program, and copy and

+ distribute such modifications or work under the terms of Section 1

+ above, provided that you also meet all of these conditions:

+ a. You must cause the modified files to carry prominent notices

+ stating that you changed the files and the date of any change.

+ b. You must cause any work that you distribute or publish, that

+ in whole or in part contains or is derived from the Program

+ or any part thereof, to be licensed as a whole at no charge

+ to all third parties under the terms of this License.

+ c. If the modified program normally reads commands interactively

+ when run, you must cause it, when started running for such

+ interactive use in the most ordinary way, to print or display

+ an announcement including an appropriate copyright notice and

+ a notice that there is no warranty (or else, saying that you

+ provide a warranty) and that users may redistribute the

+ program under these conditions, and telling the user how to

+ view a copy of this License. (Exception: if the Program

+ itself is interactive but does not normally print such an

+ announcement, your work based on the Program is not required

+ to print an announcement.)

+ These requirements apply to the modified work as a whole. If

+ identifiable sections of that work are not derived from the

+ Program, and can be reasonably considered independent and separate

+ works in themselves, then this License, and its terms, do not

+ apply to those sections when you distribute them as separate

+ works. But when you distribute the same sections as part of a

+ whole which is a work based on the Program, the distribution of

+ the whole must be on the terms of this License, whose permissions

+ for other licensees extend to the entire whole, and thus to each

+ and every part regardless of who wrote it.

+ Thus, it is not the intent of this section to claim rights or

+ contest your rights to work written entirely by you; rather, the

+ intent is to exercise the right to control the distribution of

+ derivative or collective works based on the Program.

+ In addition, mere aggregation of another work not based on the

+ Program with the Program (or with a work based on the Program) on

+ a volume of a storage or distribution medium does not bring the

+ other work under the scope of this License.

+ 4. You may copy and distribute the Program (or a work based on it,

+ under Section 2) in object code or executable form under the terms

+ of Sections 1 and 2 above provided that you also do one of the

+ following:

+ a. Accompany it with the complete corresponding machine-readable

+ source code, which must be distributed under the terms of

+ Sections 1 and 2 above on a medium customarily used for

+ software interchange; or,

+ b. Accompany it with a written offer, valid for at least three

+ years, to give any third party, for a charge no more than your

+ cost of physically performing source distribution, a complete

+ machine-readable copy of the corresponding source code, to be

+ distributed under the terms of Sections 1 and 2 above on a

+ medium customarily used for software interchange; or,

+ c. Accompany it with the information you received as to the offer

+ to distribute corresponding source code. (This alternative is

+ allowed only for noncommercial distribution and only if you

+ received the program in object code or executable form with

+ such an offer, in accord with Subsection b above.)

+ The source code for a work means the preferred form of the work for

+ making modifications to it. For an executable work, complete

+ source code means all the source code for all modules it contains,

+ plus any associated interface definition files, plus the scripts

+ used to control compilation and installation of the executable.

+ However, as a special exception, the source code distributed need

+ not include anything that is normally distributed (in either

+ source or binary form) with the major components (compiler,

+ kernel, and so on) of the operating system on which the executable

+ runs, unless that component itself accompanies the executable.

+ If distribution of executable or object code is made by offering

+ access to copy from a designated place, then offering equivalent

+ access to copy the source code from the same place counts as

+ distribution of the source code, even though third parties are not

+ compelled to copy the source along with the object code.

+ 5. You may not copy, modify, sublicense, or distribute the Program

+ except as expressly provided under this License. Any attempt

+ otherwise to copy, modify, sublicense or distribute the Program is

+ void, and will automatically terminate your rights under this

+ License. However, parties who have received copies, or rights,

+ from you under this License will not have their licenses

+ terminated so long as such parties remain in full compliance.

+ 6. You are not required to accept this License, since you have not

+ signed it. However, nothing else grants you permission to modify

+ or distribute the Program or its derivative works. These actions

+ are prohibited by law if you do not accept this License.

+ Therefore, by modifying or distributing the Program (or any work

+ based on the Program), you indicate your acceptance of this

+ License to do so, and all its terms and conditions for copying,

+ distributing or modifying the Program or works based on it.

+ 7. Each time you redistribute the Program (or any work based on the

+ Program), the recipient automatically receives a license from the

+ original licensor to copy, distribute or modify the Program

+ subject to these terms and conditions. You may not impose any

+ further restrictions on the recipients' exercise of the rights

+ granted herein. You are not responsible for enforcing compliance

+ by third parties to this License.

+ 8. If, as a consequence of a court judgment or allegation of patent

+ infringement or for any other reason (not limited to patent

+ issues), conditions are imposed on you (whether by court order,

+ agreement or otherwise) that contradict the conditions of this

+ License, they do not excuse you from the conditions of this

+ License. If you cannot distribute so as to satisfy simultaneously

+ your obligations under this License and any other pertinent

+ obligations, then as a consequence you may not distribute the

+ Program at all. For example, if a patent license would not permit

+ royalty-free redistribution of the Program by all those who

+ receive copies directly or indirectly through you, then the only

+ way you could satisfy both it and this License would be to refrain

+ entirely from distribution of the Program.

+ If any portion of this section is held invalid or unenforceable

+ under any particular circumstance, the balance of the section is

+ intended to apply and the section as a whole is intended to apply

+ in other circumstances.

+ It is not the purpose of this section to induce you to infringe any

+ patents or other property right claims or to contest validity of

+ any such claims; this section has the sole purpose of protecting

+ the integrity of the free software distribution system, which is

+ implemented by public license practices. Many people have made

+ generous contributions to the wide range of software distributed

+ through that system in reliance on consistent application of that

+ system; it is up to the author/donor to decide if he or she is

+ willing to distribute software through any other system and a

+ licensee cannot impose that choice.

+ This section is intended to make thoroughly clear what is believed

+ to be a consequence of the rest of this License.

+ 9. If the distribution and/or use of the Program is restricted in

+ certain countries either by patents or by copyrighted interfaces,

+ the original copyright holder who places the Program under this

+ License may add an explicit geographical distribution limitation

+ excluding those countries, so that distribution is permitted only

+ in or among countries not thus excluded. In such case, this

+ License incorporates the limitation as if written in the body of

+ this License.

+ 10. The Free Software Foundation may publish revised and/or new

+ versions of the General Public License from time to time. Such

+ new versions will be similar in spirit to the present version, but

+ may differ in detail to address new problems or concerns.

+ Each version is given a distinguishing version number. If the

+ Program specifies a version number of this License which applies

+ to it and "any later version", you have the option of following

+ the terms and conditions either of that version or of any later

+ version published by the Free Software Foundation. If the Program

+ does not specify a version number of this License, you may choose

+ any version ever published by the Free Software Foundation.

+ 11. If you wish to incorporate parts of the Program into other free

+ programs whose distribution conditions are different, write to the

+ author to ask for permission. For software which is copyrighted

+ by the Free Software Foundation, write to the Free Software

+ Foundation; we sometimes make exceptions for this. Our decision

+ will be guided by the two goals of preserving the free status of

+ all derivatives of our free software and of promoting the sharing

+ and reuse of software generally.

+ NO WARRANTY

+ 12. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO

+ WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE

+ LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT

+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT

+ WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT

+ NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

+ FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE

+ QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE

+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY

+ SERVICING, REPAIR OR CORRECTION.

+ 13. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN

+ WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY

+ MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE

+ LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,

+ INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR

+ INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF

+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU

+ OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY

+ OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN

+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

+ END OF TERMS AND CONDITIONS

+Appendix: How to Apply These Terms to Your New Programs

+=======================================================

+ If you develop a new program, and you want it to be of the greatest

+possible use to the public, the best way to achieve this is to make it

+free software which everyone can redistribute and change under these

+terms.

+ To do so, attach the following notices to the program. It is safest

+to attach them to the start of each source file to most effectively

+convey the exclusion of warranty; and each file should have at least

+the "copyright" line and a pointer to where the full notice is found.

+ ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.

+ Copyright (C) 19YY NAME OF AUTHOR

+ This program is free software; you can redistribute it and/or modify

+ it under the terms of the GNU General Public License as published by

+ the Free Software Foundation; either version 2 of the License, or

+ (at your option) any later version.

+ This program is distributed in the hope that it will be useful,

+ but WITHOUT ANY WARRANTY; without even the implied warranty of

+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+ GNU General Public License for more details.

+ You should have received a copy of the GNU General Public License

+ along with this program; if not, write to the Free Software

+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

+ Also add information on how to contact you by electronic and paper

+mail.

+ If the program is interactive, make it output a short notice like this

+when it starts in an interactive mode:

+ Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR

+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.

+ This is free software, and you are welcome to redistribute it

+ under certain conditions; type `show c' for details.

+ The hypothetical commands `show w' and `show c' should show the

+appropriate parts of the General Public License. Of course, the

+commands you use may be called something other than `show w' and `show

+c'; they could even be mouse-clicks or menu items--whatever suits your

+program.

+ You should also get your employer (if you work as a programmer) or

+your school, if any, to sign a "copyright disclaimer" for the program,

+if necessary. Here is a sample; alter the names:

+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program

+ `Gnomovision' (which makes passes at compilers) written by James Hacker.

+ SIGNATURE OF TY COON, 1 April 1989

+ Ty Coon, President of Vice

+ This General Public License does not permit incorporating your

+program into proprietary programs. If your program is a subroutine

+library, you may consider it more useful to permit linking proprietary

+applications with the library. If this is what you want to do, use the

+GNU Library General Public License instead of this License.

+File: regex.info, Node: Index, Prev: Copying, Up: Top

+Index

+*****

+* Menu:

+* $: Match-end-of-line Operator.

+* (: Grouping Operators.

+* ): Grouping Operators.

+* *: Match-zero-or-more Operator.

+* +: Match-one-or-more Operator.

+* -: List Operators.

+* .: Match-any-character Operator.

+* :] in regex: Character Class Operators.

+* ?: Match-zero-or-one Operator.

+* {: Interval Operators.

+* }: Interval Operators.

+* [: in regex: Character Class Operators.

+* [^: List Operators.

+* [: List Operators.

+* \': Match-end-of-buffer Operator.

+* \<: Match-beginning-of-word Operator.

+* \>: Match-end-of-word Operator.

+* \{: Interval Operators.

+* \}: Interval Operators.

+* \b: Match-word-boundary Operator.

+* \B: Match-within-word Operator.

+* \s: Match-syntactic-class Operator.

+* \S: Match-not-syntactic-class Operator.

+* \w: Match-word-constituent Operator.

+* \W: Match-non-word-constituent Operator.

+* \`: Match-beginning-of-buffer Operator.

+* \: List Operators.

+* ]: List Operators.

+* ^: List Operators.

+* allocated initialization: GNU Regular Expression Compiling.

+* alternation operator: Alternation Operator.

+* alternation operator and ^: Match-beginning-of-line Operator.

+* anchoring: Anchoring Operators.

+* anchors: Match-end-of-line Operator.

+* anchors: Match-beginning-of-line Operator.

+* Awk: Predefined Syntaxes.

+* back references: Back-reference Operator.

+* backtracking: Match-zero-or-more Operator.

+* backtracking: Alternation Operator.

+* beginning-of-line operator: Match-beginning-of-line Operator.

+* bracket expression: List Operators.

+* buffer field, set by re_compile_pattern: GNU Regular Expression Compiling.

+* buffer initialization: GNU Regular Expression Compiling.

+* character classes: Character Class Operators.

+* Egrep: Predefined Syntaxes.

+* Emacs: Predefined Syntaxes.

+* end in struct re_registers: Using Registers.

+* end-of-line operator: Match-end-of-line Operator.

+* fastmap initialization: GNU Regular Expression Compiling.

+* fastmaps: Searching with Fastmaps.

+* fastmap_accurate field, set by re_compile_pattern: GNU Regular Expression Compiling.

+* Grep: Predefined Syntaxes.

+* grouping: Grouping Operators.

+* ignoring case: POSIX Regular Expression Compiling.

+* interval expression: Interval Operators.

+* matching list: List Operators.

+* matching newline: List Operators.

+* matching with GNU functions: GNU Matching.

+* newline_anchor field in pattern buffer: Match-beginning-of-line Operator.

+* nonmatching list: List Operators.

+* not_bol field in pattern buffer: Match-beginning-of-line Operator.

+* num_regs in struct re_registers: Using Registers.

+* open-group operator and ^: Match-beginning-of-line Operator.

+* or operator: Alternation Operator.

+* parenthesizing: Grouping Operators.

+* pattern buffer initialization: GNU Regular Expression Compiling.

+* pattern buffer, definition of: GNU Pattern Buffers.

+* POSIX Awk: Predefined Syntaxes.

+* range argument to re_search: GNU Searching.

+* regex.c: Overview.

+* regex.h: Overview.

+* regexp anchoring: Anchoring Operators.

+* regmatch_t: Using Byte Offsets.

+* regs_allocated: Using Registers.

+* REGS_FIXED: Using Registers.

+* REGS_REALLOCATE: Using Registers.

+* REGS_UNALLOCATED: Using Registers.

+* regular expressions, syntax of: Regular Expression Syntax.

+* REG_EXTENDED: POSIX Regular Expression Compiling.

+* REG_ICASE: POSIX Regular Expression Compiling.

+* REG_NEWLINE: POSIX Regular Expression Compiling.

+* REG_NOSUB: POSIX Regular Expression Compiling.

+* RE_BACKSLASH_ESCAPE_IN_LIST: Syntax Bits.

+* RE_BK_PLUS_QM: Syntax Bits.

+* RE_CHAR_CLASSES: Syntax Bits.

+* RE_CONTEXT_INDEP_ANCHORS: Syntax Bits.

+* RE_CONTEXT_INDEP_ANCHORS (and ^): Match-beginning-of-line Operator.

+* RE_CONTEXT_INDEP_OPS: Syntax Bits.

+* RE_CONTEXT_INVALID_OPS: Syntax Bits.

+* RE_DOT_NEWLINE: Syntax Bits.

+* RE_DOT_NOT_NULL: Syntax Bits.

+* RE_INTERVALS: Syntax Bits.

+* RE_LIMITED_OPS: Syntax Bits.

+* RE_NEWLINE_ALT: Syntax Bits.

+* RE_NO_BK_BRACES: Syntax Bits.

+* RE_NO_BK_PARENS: Syntax Bits.

+* RE_NO_BK_REFS: Syntax Bits.

+* RE_NO_BK_VBAR: Syntax Bits.

+* RE_NO_EMPTY_RANGES: Syntax Bits.

+* re_nsub field, set by re_compile_pattern: GNU Regular Expression Compiling.

+* re_pattern_buffer definition: GNU Pattern Buffers.

+* re_registers: Using Registers.

+* re_syntax_options initialization: GNU Regular Expression Compiling.

+* RE_UNMATCHED_RIGHT_PAREN_ORD: Syntax Bits.

+* searching with GNU functions: GNU Searching.

+* start argument to re_search: GNU Searching.

+* start in struct re_registers: Using Registers.

+* struct re_pattern_buffer definition: GNU Pattern Buffers.

+* subexpressions: Grouping Operators.

+* syntax field, set by re_compile_pattern: GNU Regular Expression Compiling.

+* syntax bits: Syntax Bits.

+* syntax initialization: GNU Regular Expression Compiling.

+* syntax of regular expressions: Regular Expression Syntax.

+* translate initialization: GNU Regular Expression Compiling.

+* used field, set by re_compile_pattern: GNU Regular Expression Compiling.

+* word boundaries, matching: Match-word-boundary Operator.

+* \: The Backslash Character.

+* \(: Grouping Operators.

+* \): Grouping Operators.

+* \|: Alternation Operator.

+* ^: Match-beginning-of-line Operator.

+* |: Alternation Operator.

+Tag Table:

+Node: Top1064

+Node: Overview4562

+Node: Regular Expression Syntax6746

+Node: Syntax Bits7916

+Node: Predefined Syntaxes14018

+Node: Collating Elements vs. Characters17872

+Node: The Backslash Character18835

+Node: Common Operators21992

+Node: Match-self Operator23445

+Node: Match-any-character Operator23941

+Node: Concatenation Operator24520

+Node: Repetition Operators25017

+Node: Match-zero-or-more Operator25436

+Node: Match-one-or-more Operator27483

+Node: Match-zero-or-one Operator28341

+Node: Interval Operators29196

+Node: Alternation Operator30991

+Node: List Operators32489

+Node: Character Class Operators35272

+Node: Range Operator36901

+Node: Grouping Operators38930

+Node: Back-reference Operator40251

+Node: Anchoring Operators43073

+Node: Match-beginning-of-line Operator43447

+Node: Match-end-of-line Operator44779

+Node: GNU Operators45518

+Node: Word Operators45767

+Node: Non-Emacs Syntax Tables46391

+Node: Match-word-boundary Operator47465

+Node: Match-within-word Operator47858

+Node: Match-beginning-of-word Operator48255

+Node: Match-end-of-word Operator48588

+Node: Match-word-constituent Operator48908

+Node: Match-non-word-constituent Operator49234

+Node: Buffer Operators49545

+Node: Match-beginning-of-buffer Operator49952

+Node: Match-end-of-buffer Operator50264

+Node: GNU Emacs Operators50558

+Node: Syntactic Class Operators50901

+Node: Emacs Syntax Tables51307

+Node: Match-syntactic-class Operator51963

+Node: Match-not-syntactic-class Operator52560

+Node: What Gets Matched?53150

+Node: Programming with Regex53799

+Node: GNU Regex Functions54237

+Node: GNU Pattern Buffers55078

+Node: GNU Regular Expression Compiling58303

+Node: GNU Matching61181

+Node: GNU Searching63101

+Node: Matching/Searching with Split Data64913

+Node: Searching with Fastmaps66369

+Node: GNU Translate Tables68921

+Node: Using Registers70892

+Node: Freeing GNU Pattern Buffers77000

+Node: POSIX Regex Functions77593

+Node: POSIX Pattern Buffers78266

+Node: POSIX Regular Expression Compiling78709

+Node: POSIX Matching82836

+Node: Reporting Errors84791

+Node: Using Byte Offsets86048

+Node: Freeing POSIX Pattern Buffers86861

+Node: BSD Regex Functions87467

+Node: BSD Regular Expression Compiling87886

+Node: BSD Searching89258

+Node: Copying89960

+Node: Index109122

+End Tag Table

diff --git a/gnu/lib/libregex/doc/regex.texi b/gnu/lib/libregex/doc/regex.texi
new file mode 100644
index 000000000000..d93953ece20c
--- /dev/null
+++ b/gnu/lib/libregex/doc/regex.texi

@@ -0,0 +1,3138 @@

+\input texinfo

+@c %**start of header

+@setfilename regex.info

+@settitle Regex

+@c %**end of header

+@c \\{fill-paragraph} works better (for me, anyway) if the text in the

+@c source file isn't indented.

+@paragraphindent 2

+@c Define a new index for our magic constants.

+@defcodeindex cn

+@c Put everything in one index (arbitrarily chosen to be the concept index).

+@syncodeindex cn cp

+@syncodeindex ky cp

+@syncodeindex pg cp

+@syncodeindex tp cp

+@syncodeindex vr cp

+@c Here is what we use in the Info `dir' file:

+@c * Regex: (regex). Regular expression library.

+@ifinfo

+This file documents the GNU regular expression library.

+Permission is granted to make and distribute verbatim copies of this

+manual provided the copyright notice and this permission notice are

+preserved on all copies.

+@ignore

+Permission is granted to process this file through TeX and print the

+results, provided the printed document carries a copying permission

+notice identical to this one except for the removal of this paragraph

+(this paragraph not being relevant to the printed manual).

+@end ignore

+Permission is granted to copy and distribute modified versions of this

+manual under the conditions for verbatim copying, provided also that the

+section entitled ``GNU General Public License'' is included exactly as

+in the original, and provided that the entire resulting derived work is

+distributed under the terms of a permission notice identical to this one.

+Permission is granted to copy and distribute translations of this manual

+into another language, under the above conditions for modified versions,

+except that the section entitled ``GNU General Public License'' may be

+included in a translation approved by the Free Software Foundation

+instead of in the original English.

+@end ifinfo

+@titlepage

+@title Regex

+@subtitle edition 0.12a

+@subtitle 19 September 1992

+@author Kathryn A. Hargreaves

+@author Karl Berry

+@page

+@vskip 0pt plus 1filll

+Copyright @copyright{} 1992 Free Software Foundation.

+Permission is granted to make and distribute verbatim copies of this

+manual provided the copyright notice and this permission notice are

+preserved on all copies.

+Permission is granted to copy and distribute modified versions of this

+manual under the conditions for verbatim copying, provided also that the

+section entitled ``GNU General Public License'' is included exactly as

+in the original, and provided that the entire resulting derived work is

+distributed under the terms of a permission notice identical to this

+one.

+Permission is granted to copy and distribute translations of this manual

+into another language, under the above conditions for modified versions,

+except that the section entitled ``GNU General Public License'' may be

+included in a translation approved by the Free Software Foundation

+instead of in the original English.

+@end titlepage

+@ifinfo

+@node Top, Overview, (dir), (dir)

+@top Regular Expression Library

+This manual documents how to program with the GNU regular expression

+library. This is edition 0.12a of the manual, 19 September 1992.

+The first part of this master menu lists the major nodes in this Info

+document, including the index. The rest of the menu lists all the

+lower level nodes in the document.

+@menu

+* Overview::

+* Regular Expression Syntax::

+* Common Operators::

+* GNU Operators::

+* GNU Emacs Operators::

+* What Gets Matched?::

+* Programming with Regex::

+* Copying:: Copying and sharing Regex.

+* Index:: General index.

+ --- The Detailed Node Listing ---

+Regular Expression Syntax

+* Syntax Bits::

+* Predefined Syntaxes::

+* Collating Elements vs. Characters::

+* The Backslash Character::

+Common Operators

+* Match-self Operator:: Ordinary characters.

+* Match-any-character Operator:: .

+* Concatenation Operator:: Juxtaposition.

+* Repetition Operators:: * + ? @{@}

+* Alternation Operator:: |

+* List Operators:: [...] [^...]

+* Grouping Operators:: (...)

+* Back-reference Operator:: \digit

+* Anchoring Operators:: ^ $

+Repetition Operators

+* Match-zero-or-more Operator:: *

+* Match-one-or-more Operator:: +

+* Match-zero-or-one Operator:: ?

+* Interval Operators:: @{@}

+List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]})

+* Character Class Operators:: [:class:]

+* Range Operator:: start-end

+Anchoring Operators

+* Match-beginning-of-line Operator:: ^

+* Match-end-of-line Operator:: $

+GNU Operators

+* Word Operators::

+* Buffer Operators::

+Word Operators

+* Non-Emacs Syntax Tables::

+* Match-word-boundary Operator:: \b

+* Match-within-word Operator:: \B

+* Match-beginning-of-word Operator:: \<

+* Match-end-of-word Operator:: \>

+* Match-word-constituent Operator:: \w

+* Match-non-word-constituent Operator:: \W

+Buffer Operators

+* Match-beginning-of-buffer Operator:: \`

+* Match-end-of-buffer Operator:: \'

+GNU Emacs Operators

+* Syntactic Class Operators::

+Syntactic Class Operators

+* Emacs Syntax Tables::

+* Match-syntactic-class Operator:: \sCLASS

+* Match-not-syntactic-class Operator:: \SCLASS

+Programming with Regex

+* GNU Regex Functions::

+* POSIX Regex Functions::

+* BSD Regex Functions::

+GNU Regex Functions

+* GNU Pattern Buffers:: The re_pattern_buffer type.

+* GNU Regular Expression Compiling:: re_compile_pattern ()

+* GNU Matching:: re_match ()

+* GNU Searching:: re_search ()

+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()

+* Searching with Fastmaps:: re_compile_fastmap ()

+* GNU Translate Tables:: The `translate' field.

+* Using Registers:: The re_registers type and related fns.

+* Freeing GNU Pattern Buffers:: regfree ()

+POSIX Regex Functions

+* POSIX Pattern Buffers:: The regex_t type.

+* POSIX Regular Expression Compiling:: regcomp ()

+* POSIX Matching:: regexec ()

+* Reporting Errors:: regerror ()

+* Using Byte Offsets:: The regmatch_t type.

+* Freeing POSIX Pattern Buffers:: regfree ()

+BSD Regex Functions

+* BSD Regular Expression Compiling:: re_comp ()

+* BSD Searching:: re_exec ()

+@end menu

+@end ifinfo

+@node Overview, Regular Expression Syntax, Top, Top

+@chapter Overview

+A @dfn{regular expression} (or @dfn{regexp}, or @dfn{pattern}) is a text

+string that describes some (mathematical) set of strings. A regexp

+@var{r} @dfn{matches} a string @var{s} if @var{s} is in the set of

+strings described by @var{r}.

+Using the Regex library, you can:

+@itemize @bullet

+@item

+see if a string matches a specified pattern as a whole, and

+@item

+search within a string for a substring matching a specified pattern.

+@end itemize

+Some regular expressions match only one string, i.e., the set they

+describe has only one member. For example, the regular expression

+@samp{foo} matches the string @samp{foo} and no others. Other regular

+expressions match more than one string, i.e., the set they describe has

+more than one member. For example, the regular expression @samp{f*}

+matches the set of strings made up of any number (including zero) of

+@samp{f}s. As you can see, some characters in regular expressions match

+themselves (such as @samp{f}) and some don't (such as @samp{*}); the

+ones that don't match themselves instead let you specify patterns that

+describe many different strings.

+To either match or search for a regular expression with the Regex

+library functions, you must first compile it with a Regex pattern

+compiling function. A @dfn{compiled pattern} is a regular expression

+converted to the internal format used by the library functions. Once

+you've compiled a pattern, you can use it for matching or searching any

+number of times.

+The Regex library consists of two source files: @file{regex.h} and

+@file{regex.c}.

+@pindex regex.h

+@pindex regex.c

+Regex provides three groups of functions with which you can operate on

+regular expressions. One group---the @sc{gnu} group---is more powerful

+but not completely compatible with the other two, namely the @sc{posix}

+and Berkeley @sc{unix} groups; its interface was designed specifically

+for @sc{gnu}. The other groups have the same interfaces as do the

+regular expression functions in @sc{posix} and Berkeley

+@sc{unix}.

+We wrote this chapter with programmers in mind, not users of

+programs---such as Emacs---that use Regex. We describe the Regex

+library in its entirety, not how to write regular expressions that a

+particular program understands.

+@node Regular Expression Syntax, Common Operators, Overview, Top

+@chapter Regular Expression Syntax

+@cindex regular expressions, syntax of

+@cindex syntax of regular expressions

+@dfn{Characters} are things you can type. @dfn{Operators} are things in

+a regular expression that match one or more characters. You compose

+regular expressions from operators, which in turn you specify using one

+or more characters.

+Most characters represent what we call the match-self operator, i.e.,

+they match themselves; we call these characters @dfn{ordinary}. Other

+characters represent either all or parts of fancier operators; e.g.,

+@samp{.} represents what we call the match-any-character operator

+(which, no surprise, matches (almost) any character); we call these

+characters @dfn{special}. Two different things determine what

+characters represent what operators:

+@enumerate

+@item

+the regular expression syntax your program has told the Regex library to

+recognize, and

+@item

+the context of the character in the regular expression.

+@end enumerate

+In the following sections, we describe these things in more detail.

+@menu

+* Syntax Bits::

+* Predefined Syntaxes::

+* Collating Elements vs. Characters::

+* The Backslash Character::

+@end menu

+@node Syntax Bits, Predefined Syntaxes, , Regular Expression Syntax

+@section Syntax Bits

+@cindex syntax bits

+In any particular syntax for regular expressions, some characters are

+always special, others are sometimes special, and others are never

+special. The particular syntax that Regex recognizes for a given

+regular expression depends on the value in the @code{syntax} field of

+the pattern buffer of that regular expression.

+You get a pattern buffer by compiling a regular expression. @xref{GNU

+Pattern Buffers}, and @ref{POSIX Pattern Buffers}, for more information

+on pattern buffers. @xref{GNU Regular Expression Compiling}, @ref{POSIX

+Regular Expression Compiling}, and @ref{BSD Regular Expression

+Compiling}, for more information on compiling.

+Regex considers the value of the @code{syntax} field to be a collection

+of bits; we refer to these bits as @dfn{syntax bits}. In most cases,

+they affect what characters represent what operators. We describe the

+meanings of the operators to which we refer in @ref{Common Operators},

+@ref{GNU Operators}, and @ref{GNU Emacs Operators}.

+For reference, here is the complete list of syntax bits, in alphabetical

+order:

+@table @code

+@cnindex RE_BACKSLASH_ESCAPE_IN_LIST

+@item RE_BACKSLASH_ESCAPE_IN_LISTS

+If this bit is set, then @samp{\} inside a list (@pxref{List Operators}

+quotes (makes ordinary, if it's special) the following character; if

+this bit isn't set, then @samp{\} is an ordinary character inside lists.

+(@xref{The Backslash Character}, for what `\' does outside of lists.)

+@cnindex RE_BK_PLUS_QM

+@item RE_BK_PLUS_QM

+If this bit is set, then @samp{\+} represents the match-one-or-more

+operator and @samp{\?} represents the match-zero-or-more operator; if

+this bit isn't set, then @samp{+} represents the match-one-or-more

+operator and @samp{?} represents the match-zero-or-one operator. This

+bit is irrelevant if @code{RE_LIMITED_OPS} is set.

+@cnindex RE_CHAR_CLASSES

+@item RE_CHAR_CLASSES

+If this bit is set, then you can use character classes in lists; if this

+bit isn't set, then you can't.

+@cnindex RE_CONTEXT_INDEP_ANCHORS

+@item RE_CONTEXT_INDEP_ANCHORS

+If this bit is set, then @samp{^} and @samp{$} are special anywhere outside

+a list; if this bit isn't set, then these characters are special only in

+certain contexts. @xref{Match-beginning-of-line Operator}, and

+@ref{Match-end-of-line Operator}.

+@cnindex RE_CONTEXT_INDEP_OPS

+@item RE_CONTEXT_INDEP_OPS

+If this bit is set, then certain characters are special anywhere outside

+a list; if this bit isn't set, then those characters are special only in

+some contexts and are ordinary elsewhere. Specifically, if this bit

+isn't set then @samp{*}, and (if the syntax bit @code{RE_LIMITED_OPS}

+isn't set) @samp{+} and @samp{?} (or @samp{\+} and @samp{\?}, depending

+on the syntax bit @code{RE_BK_PLUS_QM}) represent repetition operators

+only if they're not first in a regular expression or just after an

+open-group or alternation operator. The same holds for @samp{@{} (or

+@samp{\@{}, depending on the syntax bit @code{RE_NO_BK_BRACES}) if

+it is the beginning of a valid interval and the syntax bit

+@code{RE_INTERVALS} is set.

+@cnindex RE_CONTEXT_INVALID_OPS

+@item RE_CONTEXT_INVALID_OPS

+If this bit is set, then repetition and alternation operators can't be

+in certain positions within a regular expression. Specifically, the

+regular expression is invalid if it has:

+@itemize @bullet

+@item

+a repetition operator first in the regular expression or just after a

+match-beginning-of-line, open-group, or alternation operator; or

+@item

+an alternation operator first or last in the regular expression, just

+before a match-end-of-line operator, or just after an alternation or

+open-group operator.

+@end itemize

+If this bit isn't set, then you can put the characters representing the

+repetition and alternation characters anywhere in a regular expression.

+Whether or not they will in fact be operators in certain positions

+depends on other syntax bits.

+@cnindex RE_DOT_NEWLINE

+@item RE_DOT_NEWLINE

+If this bit is set, then the match-any-character operator matches

+a newline; if this bit isn't set, then it doesn't.

+@cnindex RE_DOT_NOT_NULL

+@item RE_DOT_NOT_NULL

+If this bit is set, then the match-any-character operator doesn't match

+a null character; if this bit isn't set, then it does.

+@cnindex RE_INTERVALS

+@item RE_INTERVALS

+If this bit is set, then Regex recognizes interval operators; if this bit

+isn't set, then it doesn't.

+@cnindex RE_LIMITED_OPS

+@item RE_LIMITED_OPS

+If this bit is set, then Regex doesn't recognize the match-one-or-more,

+match-zero-or-one or alternation operators; if this bit isn't set, then

+it does.

+@cnindex RE_NEWLINE_ALT

+@item RE_NEWLINE_ALT

+If this bit is set, then newline represents the alternation operator; if

+this bit isn't set, then newline is ordinary.

+@cnindex RE_NO_BK_BRACES

+@item RE_NO_BK_BRACES

+If this bit is set, then @samp{@{} represents the open-interval operator

+and @samp{@}} represents the close-interval operator; if this bit isn't

+set, then @samp{\@{} represents the open-interval operator and

+@samp{\@}} represents the close-interval operator. This bit is relevant

+only if @code{RE_INTERVALS} is set.

+@cnindex RE_NO_BK_PARENS

+@item RE_NO_BK_PARENS

+If this bit is set, then @samp{(} represents the open-group operator and

+@samp{)} represents the close-group operator; if this bit isn't set, then

+@samp{$} represents the open-group operator and @samp{$} represents

+the close-group operator.

+@cnindex RE_NO_BK_REFS

+@item RE_NO_BK_REFS

+If this bit is set, then Regex doesn't recognize @samp{\}@var{digit} as

+the back reference operator; if this bit isn't set, then it does.

+@cnindex RE_NO_BK_VBAR

+@item RE_NO_BK_VBAR

+If this bit is set, then @samp{|} represents the alternation operator;

+if this bit isn't set, then @samp{\|} represents the alternation

+operator. This bit is irrelevant if @code{RE_LIMITED_OPS} is set.

+@cnindex RE_NO_EMPTY_RANGES

+@item RE_NO_EMPTY_RANGES

+If this bit is set, then a regular expression with a range whose ending

+point collates lower than its starting point is invalid; if this bit

+isn't set, then Regex considers such a range to be empty.

+@cnindex RE_UNMATCHED_RIGHT_PAREN_ORD

+@item RE_UNMATCHED_RIGHT_PAREN_ORD

+If this bit is set and the regular expression has no matching open-group

+operator, then Regex considers what would otherwise be a close-group

+operator (based on how @code{RE_NO_BK_PARENS} is set) to match @samp{)}.

+@end table

+@node Predefined Syntaxes, Collating Elements vs. Characters, Syntax Bits, Regular Expression Syntax

+@section Predefined Syntaxes

+If you're programming with Regex, you can set a pattern buffer's

+(@pxref{GNU Pattern Buffers}, and @ref{POSIX Pattern Buffers})

+@code{syntax} field either to an arbitrary combination of syntax bits

+(@pxref{Syntax Bits}) or else to the configurations defined by Regex.

+These configurations define the syntaxes used by certain

+programs---@sc{gnu} Emacs,

+@cindex Emacs

+@sc{posix} Awk,

+@cindex POSIX Awk

+traditional Awk,

+@cindex Awk

+Grep,

+@cindex Grep

+@cindex Egrep

+Egrep---in addition to syntaxes for @sc{posix} basic and extended

+regular expressions.

+The predefined syntaxes--taken directly from @file{regex.h}---are:

+@example

+#define RE_SYNTAX_EMACS 0

+#define RE_SYNTAX_AWK \

+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \

+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \

+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \

+ | RE_UNMATCHED_RIGHT_PAREN_ORD)

+#define RE_SYNTAX_POSIX_AWK \

+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)

+#define RE_SYNTAX_GREP \

+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \

+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \

+ | RE_NEWLINE_ALT)

+#define RE_SYNTAX_EGREP \

+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \

+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \

+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \

+ | RE_NO_BK_VBAR)

+#define RE_SYNTAX_POSIX_EGREP \

+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)

+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */

+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC

+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC

+/* Syntax bits common to both basic and extended POSIX regex syntax. */

+#define _RE_SYNTAX_POSIX_COMMON \

+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \

+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)

+#define RE_SYNTAX_POSIX_BASIC \

+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)

+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes

+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this

+ isn't minimal, since other operators, such as \`, aren't disabled. */

+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \

+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)

+#define RE_SYNTAX_POSIX_EXTENDED \

+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \

+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \

+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \

+ | RE_UNMATCHED_RIGHT_PAREN_ORD)

+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS

+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */

+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \

+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \

+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \

+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \

+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)

+@end example

+@node Collating Elements vs. Characters, The Backslash Character, Predefined Syntaxes, Regular Expression Syntax

+@section Collating Elements vs.@: Characters

+@sc{posix} generalizes the notion of a character to that of a

+collating element. It defines a @dfn{collating element} to be ``a

+sequence of one or more bytes defined in the current collating sequence

+as a unit of collation.''

+This generalizes the notion of a character in

+two ways. First, a single character can map into two or more collating

+elements. For example, the German

+@tex

+`\ss'

+@end tex

+@ifinfo

+``es-zet''

+@end ifinfo

+collates as the collating element @samp{s} followed by another collating

+element @samp{s}. Second, two or more characters can map into one

+collating element. For example, the Spanish @samp{ll} collates after

+@samp{l} and before @samp{m}.

+Since @sc{posix}'s ``collating element'' preserves the essential idea of

+a ``character,'' we use the latter, more familiar, term in this document.

+@node The Backslash Character, , Collating Elements vs. Characters, Regular Expression Syntax

+@section The Backslash Character

+@cindex \

+The @samp{\} character has one of four different meanings, depending on

+the context in which you use it and what syntax bits are set

+(@pxref{Syntax Bits}). It can: 1) stand for itself, 2) quote the next

+character, 3) introduce an operator, or 4) do nothing.

+@enumerate

+@item

+It stands for itself inside a list

+(@pxref{List Operators}) if the syntax bit

+@code{RE_BACKSLASH_ESCAPE_IN_LISTS} is not set. For example, @samp{[\]}

+would match @samp{\}.

+@item

+It quotes (makes ordinary, if it's special) the next character when you

+use it either:

+@itemize @bullet

+@item

+outside a list,@footnote{Sometimes

+you don't have to explicitly quote special characters to make

+them ordinary. For instance, most characters lose any special meaning

+inside a list (@pxref{List Operators}). In addition, if the syntax bits

+@code{RE_CONTEXT_INVALID_OPS} and @code{RE_CONTEXT_INDEP_OPS}

+aren't set, then (for historical reasons) the matcher considers special

+characters ordinary if they are in contexts where the operations they

+represent make no sense; for example, then the match-zero-or-more

+operator (represented by @samp{*}) matches itself in the regular

+expression @samp{*foo} because there is no preceding expression on which

+it can operate. It is poor practice, however, to depend on this

+behavior; if you want a special character to be ordinary outside a list,

+it's better to always quote it, regardless.} or

+@item

+inside a list and the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is set.

+@end itemize

+@item

+It introduces an operator when followed by certain ordinary

+characters---sometimes only when certain syntax bits are set. See the

+cases @code{RE_BK_PLUS_QM}, @code{RE_NO_BK_BRACES}, @code{RE_NO_BK_VAR},

+@code{RE_NO_BK_PARENS}, @code{RE_NO_BK_REF} in @ref{Syntax Bits}. Also:

+@itemize @bullet

+@item

+@samp{\b} represents the match-word-boundary operator

+(@pxref{Match-word-boundary Operator}).

+@item

+@samp{\B} represents the match-within-word operator

+(@pxref{Match-within-word Operator}).

+@item

+@samp{\<} represents the match-beginning-of-word operator @*

+(@pxref{Match-beginning-of-word Operator}).

+@item

+@samp{\>} represents the match-end-of-word operator

+(@pxref{Match-end-of-word Operator}).

+@item

+@samp{\w} represents the match-word-constituent operator

+(@pxref{Match-word-constituent Operator}).

+@item

+@samp{\W} represents the match-non-word-constituent operator

+(@pxref{Match-non-word-constituent Operator}).

+@item

+@samp{\`} represents the match-beginning-of-buffer

+operator and @samp{\'} represents the match-end-of-buffer operator

+(@pxref{Buffer Operators}).

+@item

+If Regex was compiled with the C preprocessor symbol @code{emacs}

+defined, then @samp{\s@var{class}} represents the match-syntactic-class

+operator and @samp{\S@var{class}} represents the

+match-not-syntactic-class operator (@pxref{Syntactic Class Operators}).

+@end itemize

+@item

+In all other cases, Regex ignores @samp{\}. For example,

+@samp{\n} matches @samp{n}.

+@end enumerate

+@node Common Operators, GNU Operators, Regular Expression Syntax, Top

+@chapter Common Operators

+You compose regular expressions from operators. In the following

+sections, we describe the regular expression operators specified by

+@sc{posix}; @sc{gnu} also uses these. Most operators have more than one

+representation as characters. @xref{Regular Expression Syntax}, for

+what characters represent what operators under what circumstances.

+For most operators that can be represented in two ways, one

+representation is a single character and the other is that character

+preceded by @samp{\}. For example, either @samp{(} or @samp{\(}

+represents the open-group operator. Which one does depends on the

+setting of a syntax bit, in this case @code{RE_NO_BK_PARENS}. Why is

+this so? Historical reasons dictate some of the varying

+representations, while @sc{posix} dictates others.

+Finally, almost all characters lose any special meaning inside a list

+(@pxref{List Operators}).

+@menu

+* Match-self Operator:: Ordinary characters.

+* Match-any-character Operator:: .

+* Concatenation Operator:: Juxtaposition.

+* Repetition Operators:: * + ? @{@}

+* Alternation Operator:: |

+* List Operators:: [...] [^...]

+* Grouping Operators:: (...)

+* Back-reference Operator:: \digit

+* Anchoring Operators:: ^ $

+@end menu

+@node Match-self Operator, Match-any-character Operator, , Common Operators

+@section The Match-self Operator (@var{ordinary character})

+This operator matches the character itself. All ordinary characters

+(@pxref{Regular Expression Syntax}) represent this operator. For

+example, @samp{f} is always an ordinary character, so the regular

+expression @samp{f} matches only the string @samp{f}. In

+particular, it does @emph{not} match the string @samp{ff}.

+@node Match-any-character Operator, Concatenation Operator, Match-self Operator, Common Operators

+@section The Match-any-character Operator (@code{.})

+@cindex @samp{.}

+This operator matches any single printing or nonprinting character

+except it won't match a:

+@table @asis

+@item newline

+if the syntax bit @code{RE_DOT_NEWLINE} isn't set.

+@item null

+if the syntax bit @code{RE_DOT_NOT_NULL} is set.

+@end table

+The @samp{.} (period) character represents this operator. For example,

+@samp{a.b} matches any three-character string beginning with @samp{a}

+and ending with @samp{b}.

+@node Concatenation Operator, Repetition Operators, Match-any-character Operator, Common Operators

+@section The Concatenation Operator

+This operator concatenates two regular expressions @var{a} and @var{b}.

+No character represents this operator; you simply put @var{b} after

+@var{a}. The result is a regular expression that will match a string if

+@var{a} matches its first part and @var{b} matches the rest. For

+example, @samp{xy} (two match-self operators) matches @samp{xy}.

+@node Repetition Operators, Alternation Operator, Concatenation Operator, Common Operators

+@section Repetition Operators

+Repetition operators repeat the preceding regular expression a specified

+number of times.

+@menu

+* Match-zero-or-more Operator:: *

+* Match-one-or-more Operator:: +

+* Match-zero-or-one Operator:: ?

+* Interval Operators:: @{@}

+@end menu

+@node Match-zero-or-more Operator, Match-one-or-more Operator, , Repetition Operators

+@subsection The Match-zero-or-more Operator (@code{*})

+@cindex @samp{*}

+This operator repeats the smallest possible preceding regular expression

+as many times as necessary (including zero) to match the pattern.

+@samp{*} represents this operator. For example, @samp{o*}

+matches any string made up of zero or more @samp{o}s. Since this

+operator operates on the smallest preceding regular expression,

+@samp{fo*} has a repeating @samp{o}, not a repeating @samp{fo}. So,

+@samp{fo*} matches @samp{f}, @samp{fo}, @samp{foo}, and so on.

+Since the match-zero-or-more operator is a suffix operator, it may be

+useless as such when no regular expression precedes it. This is the

+case when it:

+@itemize @bullet

+@item

+is first in a regular expression, or

+@item

+follows a match-beginning-of-line, open-group, or alternation

+operator.

+@end itemize

+@noindent

+Three different things can happen in these cases:

+@enumerate

+@item

+If the syntax bit @code{RE_CONTEXT_INVALID_OPS} is set, then the

+regular expression is invalid.

+@item

+If @code{RE_CONTEXT_INVALID_OPS} isn't set, but

+@code{RE_CONTEXT_INDEP_OPS} is, then @samp{*} represents the

+match-zero-or-more operator (which then operates on the empty string).

+@item

+Otherwise, @samp{*} is ordinary.

+@end enumerate

+@cindex backtracking

+The matcher processes a match-zero-or-more operator by first matching as

+many repetitions of the smallest preceding regular expression as it can.

+Then it continues to match the rest of the pattern.

+If it can't match the rest of the pattern, it backtracks (as many times

+as necessary), each time discarding one of the matches until it can

+either match the entire pattern or be certain that it cannot get a

+match. For example, when matching @samp{ca*ar} against @samp{caaar},

+the matcher first matches all three @samp{a}s of the string with the

+@samp{a*} of the regular expression. However, it cannot then match the

+final @samp{ar} of the regular expression against the final @samp{r} of

+the string. So it backtracks, discarding the match of the last @samp{a}

+in the string. It can then match the remaining @samp{ar}.

+@node Match-one-or-more Operator, Match-zero-or-one Operator, Match-zero-or-more Operator, Repetition Operators

+@subsection The Match-one-or-more Operator (@code{+} or @code{\+})

+@cindex @samp{+}

+If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't recognize

+this operator. Otherwise, if the syntax bit @code{RE_BK_PLUS_QM} isn't

+set, then @samp{+} represents this operator; if it is, then @samp{\+}

+does.

+This operator is similar to the match-zero-or-more operator except that

+it repeats the preceding regular expression at least once;

+@pxref{Match-zero-or-more Operator}, for what it operates on, how some

+syntax bits affect it, and how Regex backtracks to match it.

+For example, supposing that @samp{+} represents the match-one-or-more

+operator; then @samp{ca+r} matches, e.g., @samp{car} and

+@samp{caaaar}, but not @samp{cr}.

+@node Match-zero-or-one Operator, Interval Operators, Match-one-or-more Operator, Repetition Operators

+@subsection The Match-zero-or-one Operator (@code{?} or @code{\?})

+@cindex @samp{?}

+If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't

+recognize this operator. Otherwise, if the syntax bit

+@code{RE_BK_PLUS_QM} isn't set, then @samp{?} represents this operator;

+if it is, then @samp{\?} does.

+This operator is similar to the match-zero-or-more operator except that

+it repeats the preceding regular expression once or not at all;

+@pxref{Match-zero-or-more Operator}, to see what it operates on, how

+some syntax bits affect it, and how Regex backtracks to match it.

+For example, supposing that @samp{?} represents the match-zero-or-one

+operator; then @samp{ca?r} matches both @samp{car} and @samp{cr}, but

+nothing else.

+@node Interval Operators, , Match-zero-or-one Operator, Repetition Operators

+@subsection Interval Operators (@code{@{} @dots{} @code{@}} or @code{\@{} @dots{} @code{\@}})

+@cindex interval expression

+@cindex @samp{@{}

+@cindex @samp{@}}

+@cindex @samp{\@{}

+@cindex @samp{\@}}

+If the syntax bit @code{RE_INTERVALS} is set, then Regex recognizes

+@dfn{interval expressions}. They repeat the smallest possible preceding

+regular expression a specified number of times.

+If the syntax bit @code{RE_NO_BK_BRACES} is set, @samp{@{} represents

+the @dfn{open-interval operator} and @samp{@}} represents the

+@dfn{close-interval operator} ; otherwise, @samp{\@{} and @samp{\@}} do.

+Specifically, supposing that @samp{@{} and @samp{@}} represent the

+open-interval and close-interval operators; then:

+@table @code

+@item @{@var{count}@}

+matches exactly @var{count} occurrences of the preceding regular

+expression.

+@item @{@var{min,}@}

+matches @var{min} or more occurrences of the preceding regular

+expression.

+@item @{@var{min, max}@}

+matches at least @var{min} but no more than @var{max} occurrences of

+the preceding regular expression.

+@end table

+The interval expression (but not necessarily the regular expression that

+contains it) is invalid if:

+@itemize @bullet

+@item

+@var{min} is greater than @var{max}, or

+@item

+any of @var{count}, @var{min}, or @var{max} are outside the range

+zero to @code{RE_DUP_MAX} (which symbol @file{regex.h}

+defines).

+@end itemize

+If the interval expression is invalid and the syntax bit

+@code{RE_NO_BK_BRACES} is set, then Regex considers all the

+characters in the would-be interval to be ordinary. If that bit

+isn't set, then the regular expression is invalid.

+If the interval expression is valid but there is no preceding regular

+expression on which to operate, then if the syntax bit

+@code{RE_CONTEXT_INVALID_OPS} is set, the regular expression is invalid.

+If that bit isn't set, then Regex considers all the characters---other

+than backslashes, which it ignores---in the would-be interval to be

+ordinary.

+@node Alternation Operator, List Operators, Repetition Operators, Common Operators

+@section The Alternation Operator (@code{|} or @code{\|})

+@kindex |

+@kindex \|

+@cindex alternation operator

+@cindex or operator

+If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't

+recognize this operator. Otherwise, if the syntax bit

+@code{RE_NO_BK_VBAR} is set, then @samp{|} represents this operator;

+otherwise, @samp{\|} does.

+Alternatives match one of a choice of regular expressions:

+if you put the character(s) representing the alternation operator between

+any two regular expressions @var{a} and @var{b}, the result matches

+the union of the strings that @var{a} and @var{b} match. For

+example, supposing that @samp{|} is the alternation operator, then

+@samp{foo|bar|quux} would match any of @samp{foo}, @samp{bar} or

+@samp{quux}.

+@ignore

+@c Nobody needs to disallow empty alternatives any more.

+If the syntax bit @code{RE_NO_EMPTY_ALTS} is set, then if either of the regular

+expressions @var{a} or @var{b} is empty, the

+regular expression is invalid. More precisely, if this syntax bit is

+set, then the alternation operator can't:

+@itemize @bullet

+@item

+be first or last in a regular expression;

+@item

+follow either another alternation operator or an open-group operator

+(@pxref{Grouping Operators}); or

+@item

+precede a close-group operator.

+@end itemize

+@noindent

+For example, supposing @samp{(} and @samp{)} represent the open and

+close-group operators, then @samp{|foo}, @samp{foo|}, @samp{foo||bar},

+@samp{foo(|bar)}, and @samp{(foo|)bar} would all be invalid.

+@end ignore

+The alternation operator operates on the @emph{largest} possible

+surrounding regular expressions. (Put another way, it has the lowest

+precedence of any regular expression operator.)

+Thus, the only way you can

+delimit its arguments is to use grouping. For example, if @samp{(} and

+@samp{)} are the open and close-group operators, then @samp{fo(o|b)ar}

+would match either @samp{fooar} or @samp{fobar}. (@samp{foo|bar} would

+match @samp{foo} or @samp{bar}.)

+@cindex backtracking

+The matcher usually tries all combinations of alternatives so as to

+match the longest possible string. For example, when matching

+@samp{(fooq|foo)*(qbarquux|bar)} against @samp{fooqbarquux}, it cannot

+take, say, the first (``depth-first'') combination it could match, since

+then it would be content to match just @samp{fooqbar}.

+@comment xx something about leftmost-longest

+@node List Operators, Grouping Operators, Alternation Operator, Common Operators

+@section List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]})

+@cindex matching list

+@cindex @samp{[}

+@cindex @samp{]}

+@cindex @samp{^}

+@cindex @samp{-}

+@cindex @samp{\}

+@cindex @samp{[^}

+@cindex nonmatching list

+@cindex matching newline

+@cindex bracket expression

+@dfn{Lists}, also called @dfn{bracket expressions}, are a set of one or

+more items. An @dfn{item} is a character,

+@ignore

+(These get added when they get implemented.)

+a collating symbol, an equivalence class expression,

+@end ignore

+a character class expression, or a range expression. The syntax bits

+affect which kinds of items you can put in a list. We explain the last

+two items in subsections below. Empty lists are invalid.

+A @dfn{matching list} matches a single character represented by one of

+the list items. You form a matching list by enclosing one or more items

+within an @dfn{open-matching-list operator} (represented by @samp{[})

+and a @dfn{close-list operator} (represented by @samp{]}).

+For example, @samp{[ab]} matches either @samp{a} or @samp{b}.

+@samp{[ad]*} matches the empty string and any string composed of just

+@samp{a}s and @samp{d}s in any order. Regex considers invalid a regular

+expression with a @samp{[} but no matching

+@samp{]}.

+@dfn{Nonmatching lists} are similar to matching lists except that they

+match a single character @emph{not} represented by one of the list

+items. You use an @dfn{open-nonmatching-list operator} (represented by

+@samp{[^}@footnote{Regex therefore doesn't consider the @samp{^} to be

+the first character in the list. If you put a @samp{^} character first

+in (what you think is) a matching list, you'll turn it into a

+nonmatching list.}) instead of an open-matching-list operator to start a

+nonmatching list.

+For example, @samp{[^ab]} matches any character except @samp{a} or

+@samp{b}.

+If the @code{posix_newline} field in the pattern buffer (@pxref{GNU

+Pattern Buffers} is set, then nonmatching lists do not match a newline.

+Most characters lose any special meaning inside a list. The special

+characters inside a list follow.

+@table @samp

+@item ]

+ends the list if it's not the first list item. So, if you want to make

+the @samp{]} character a list item, you must put it first.

+@item \

+quotes the next character if the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is

+set.

+@ignore

+Put these in if they get implemented.

+@item [.

+represents the open-collating-symbol operator (@pxref{Collating Symbol

+Operators}).

+@item .]

+represents the close-collating-symbol operator.

+@item [=

+represents the open-equivalence-class operator (@pxref{Equivalence Class

+Operators}).

+@item =]

+represents the close-equivalence-class operator.

+@end ignore

+@item [:

+represents the open-character-class operator (@pxref{Character Class

+Operators}) if the syntax bit @code{RE_CHAR_CLASSES} is set and what

+follows is a valid character class expression.

+@item :]

+represents the close-character-class operator if the syntax bit

+@code{RE_CHAR_CLASSES} is set and what precedes it is an

+open-character-class operator followed by a valid character class name.

+@item -

+represents the range operator (@pxref{Range Operator}) if it's

+not first or last in a list or the ending point of a range.

+@end table

+@noindent

+All other characters are ordinary. For example, @samp{[.*]} matches

+@samp{.} and @samp{*}.

+@menu

+* Character Class Operators:: [:class:]

+* Range Operator:: start-end

+@end menu

+@ignore

+(If collating symbols and equivalence class expressions get implemented,

+then add this.)

+node Collating Symbol Operators

+subsubsection Collating Symbol Operators (@code{[.} @dots{} @code{.]})

+If the syntax bit @code{XX} is set, then you can represent

+collating symbols inside lists. You form a @dfn{collating symbol} by

+putting a collating element between an @dfn{open-collating-symbol

+operator} and an @dfn{close-collating-symbol operator}. @samp{[.}

+represents the open-collating-symbol operator and @samp{.]} represents

+the close-collating-symbol operator. For example, if @samp{ll} is a

+collating element, then @samp{[[.ll.]]} would match @samp{ll}.

+node Equivalence Class Operators

+subsubsection Equivalence Class Operators (@code{[=} @dots{} @code{=]})

+@cindex equivalence class expression in regex

+@cindex @samp{[=} in regex

+@cindex @samp{=]} in regex

+If the syntax bit @code{XX} is set, then Regex recognizes equivalence class

+expressions inside lists. A @dfn{equivalence class expression} is a set

+of collating elements which all belong to the same equivalence class.

+You form an equivalence class expression by putting a collating

+element between an @dfn{open-equivalence-class operator} and a

+@dfn{close-equivalence-class operator}. @samp{[=} represents the

+open-equivalence-class operator and @samp{=]} represents the

+close-equivalence-class operator. For example, if @samp{a} and @samp{A}

+were an equivalence class, then both @samp{[[=a=]]} and @samp{[[=A=]]}

+would match both @samp{a} and @samp{A}. If the collating element in an

+equivalence class expression isn't part of an equivalence class, then

+the matcher considers the equivalence class expression to be a collating

+symbol.

+@end ignore

+@node Character Class Operators, Range Operator, , List Operators

+@subsection Character Class Operators (@code{[:} @dots{} @code{:]})

+@cindex character classes

+@cindex @samp{[:} in regex

+@cindex @samp{:]} in regex

+If the syntax bit @code{RE_CHARACTER_CLASSES} is set, then Regex

+recognizes character class expressions inside lists. A @dfn{character

+class expression} matches one character from a given class. You form a

+character class expression by putting a character class name between an

+@dfn{open-character-class operator} (represented by @samp{[:}) and a

+@dfn{close-character-class operator} (represented by @samp{:]}). The

+character class names and their meanings are:

+@table @code

+@item alnum

+letters and digits

+@item alpha

+letters

+@item blank

+system-dependent; for @sc{gnu}, a space or tab

+@item cntrl

+control characters (in the @sc{ascii} encoding, code 0177 and codes

+less than 040)

+@item digit

+digits

+@item graph

+same as @code{print} except omits space

+@item lower

+lowercase letters

+@item print

+printable characters (in the @sc{ascii} encoding, space

+tilde---codes 040 through 0176)

+@item punct

+neither control nor alphanumeric characters

+@item space

+space, carriage return, newline, vertical tab, and form feed

+@item upper

+uppercase letters

+@item xdigit

+hexadecimal digits: @code{0}--@code{9}, @code{a}--@code{f}, @code{A}--@code{F}

+@end table

+@noindent

+These correspond to the definitions in the C library's @file{<ctype.h>}

+facility. For example, @samp{[:alpha:]} corresponds to the standard

+facility @code{isalpha}. Regex recognizes character class expressions

+only inside of lists; so @samp{[[:alpha:]]} matches any letter, but

+@samp{[:alpha:]} outside of a bracket expression and not followed by a

+repetition operator matches just itself.

+@node Range Operator, , Character Class Operators, List Operators

+@subsection The Range Operator (@code{-})

+Regex recognizes @dfn{range expressions} inside a list. They represent

+those characters

+that fall between two elements in the current collating sequence. You

+form a range expression by putting a @dfn{range operator} between two

+@ignore

+(If these get implemented, then substitute this for ``characters.'')

+of any of the following: characters, collating elements, collating symbols,

+and equivalence class expressions. The starting point of the range and

+the ending point of the range don't have to be the same kind of item,

+e.g., the starting point could be a collating element and the ending

+point could be an equivalence class expression. If a range's ending

+point is an equivalence class, then all the collating elements in that

+class will be in the range.

+@end ignore

+characters.@footnote{You can't use a character class for the starting

+or ending point of a range, since a character class is not a single

+character.} @samp{-} represents the range operator. For example,

+@samp{a-f} within a list represents all the characters from @samp{a}

+through @samp{f}

+inclusively.

+If the syntax bit @code{RE_NO_EMPTY_RANGES} is set, then if the range's

+ending point collates less than its starting point, the range (and the

+regular expression containing it) is invalid. For example, the regular

+expression @samp{[z-a]} would be invalid. If this bit isn't set, then

+Regex considers such a range to be empty.

+Since @samp{-} represents the range operator, if you want to make a

+@samp{-} character itself

+a list item, you must do one of the following:

+@itemize @bullet

+@item

+Put the @samp{-} either first or last in the list.

+@item

+Include a range whose starting point collates strictly lower than

+@samp{-} and whose ending point collates equal or higher. Unless a

+range is the first item in a list, a @samp{-} can't be its starting

+point, but @emph{can} be its ending point. That is because Regex

+considers @samp{-} to be the range operator unless it is preceded by

+another @samp{-}. For example, in the @sc{ascii} encoding, @samp{)},

+@samp{*}, @samp{+}, @samp{,}, @samp{-}, @samp{.}, and @samp{/} are

+contiguous characters in the collating sequence. You might think that

+@samp{[)-+--/]} has two ranges: @samp{)-+} and @samp{--/}. Rather, it

+has the ranges @samp{)-+} and @samp{+--}, plus the character @samp{/}, so

+it matches, e.g., @samp{,}, not @samp{.}.

+@item

+Put a range whose starting point is @samp{-} first in the list.

+@end itemize

+For example, @samp{[-a-z]} matches a lowercase letter or a hyphen (in

+English, in @sc{ascii}).

+@node Grouping Operators, Back-reference Operator, List Operators, Common Operators

+@section Grouping Operators (@code{(} @dots{} @code{)} or @code{$} @dots{} @code{$})

+@kindex (

+@kindex )

+@kindex \(

+@kindex \)

+@cindex grouping

+@cindex subexpressions

+@cindex parenthesizing

+A @dfn{group}, also known as a @dfn{subexpression}, consists of an

+@dfn{open-group operator}, any number of other operators, and a

+@dfn{close-group operator}. Regex treats this sequence as a unit, just

+as mathematics and programming languages treat a parenthesized

+expression as a unit.

+Therefore, using @dfn{groups}, you can:

+@itemize @bullet

+@item

+delimit the argument(s) to an alternation operator (@pxref{Alternation

+Operator}) or a repetition operator (@pxref{Repetition

+Operators}).

+@item

+keep track of the indices of the substring that matched a given group.

+@xref{Using Registers}, for a precise explanation.

+This lets you:

+@itemize @bullet

+@item

+use the back-reference operator (@pxref{Back-reference Operator}).

+@item

+use registers (@pxref{Using Registers}).

+@end itemize

+If the syntax bit @code{RE_NO_BK_PARENS} is set, then @samp{(} represents

+the open-group operator and @samp{)} represents the

+close-group operator; otherwise, @samp{$} and @samp{$} do.

+If the syntax bit @code{RE_UNMATCHED_RIGHT_PAREN_ORD} is set and a

+close-group operator has no matching open-group operator, then Regex

+considers it to match @samp{)}.

+@node Back-reference Operator, Anchoring Operators, Grouping Operators, Common Operators

+@section The Back-reference Operator (@dfn{\}@var{digit})

+@cindex back references

+If the syntax bit @code{RE_NO_BK_REF} isn't set, then Regex recognizes

+back references. A back reference matches a specified preceding group.

+The back reference operator is represented by @samp{\@var{digit}}

+anywhere after the end of a regular expression's @w{@var{digit}-th}

+group (@pxref{Grouping Operators}).

+@var{digit} must be between @samp{1} and @samp{9}. The matcher assigns

+numbers 1 through 9 to the first nine groups it encounters. By using

+one of @samp{\1} through @samp{\9} after the corresponding group's

+close-group operator, you can match a substring identical to the

+one that the group does.

+Back references match according to the following (in all examples below,

+@samp{(} represents the open-group, @samp{)} the close-group, @samp{@{}

+the open-interval and @samp{@}} the close-interval operator):

+@itemize @bullet

+@item

+If the group matches a substring, the back reference matches an

+identical substring. For example, @samp{(a)\1} matches @samp{aa} and

+@samp{(bana)na\1bo\1} matches @samp{bananabanabobana}. Likewise,

+@samp{(.*)\1} matches any (newline-free if the syntax bit

+@code{RE_DOT_NEWLINE} isn't set) string that is composed of two

+identical halves; the @samp{(.*)} matches the first half and the

+@samp{\1} matches the second half.

+@item

+If the group matches more than once (as it might if followed

+by, e.g., a repetition operator), then the back reference matches the

+substring the group @emph{last} matched. For example,

+@samp{((a*)b)*\1\2} matches @samp{aabababa}; first @w{group 1} (the

+outer one) matches @samp{aab} and @w{group 2} (the inner one) matches

+@samp{aa}. Then @w{group 1} matches @samp{ab} and @w{group 2} matches

+@samp{a}. So, @samp{\1} matches @samp{ab} and @samp{\2} matches

+@samp{a}.

+@item

+If the group doesn't participate in a match, i.e., it is part of an

+alternative not taken or a repetition operator allows zero repetitions

+of it, then the back reference makes the whole match fail. For example,

+@samp{(one()|two())-and-(three\2|four\3)} matches @samp{one-and-three}

+and @samp{two-and-four}, but not @samp{one-and-four} or

+@samp{two-and-three}. For example, if the pattern matches

+@samp{one-and-}, then its @w{group 2} matches the empty string and its

+@w{group 3} doesn't participate in the match. So, if it then matches

+@samp{four}, then when it tries to back reference @w{group 3}---which it

+will attempt to do because @samp{\3} follows the @samp{four}---the match

+will fail because @w{group 3} didn't participate in the match.

+@end itemize

+You can use a back reference as an argument to a repetition operator. For

+example, @samp{(a(b))\2*} matches @samp{a} followed by two or more

+@samp{b}s. Similarly, @samp{(a(b))\2@{3@}} matches @samp{abbbb}.

+If there is no preceding @w{@var{digit}-th} subexpression, the regular

+expression is invalid.

+@node Anchoring Operators, , Back-reference Operator, Common Operators

+@section Anchoring Operators

+@cindex anchoring

+@cindex regexp anchoring

+These operators can constrain a pattern to match only at the beginning or

+end of the entire string or at the beginning or end of a line.

+@menu

+* Match-beginning-of-line Operator:: ^

+* Match-end-of-line Operator:: $

+@end menu

+@node Match-beginning-of-line Operator, Match-end-of-line Operator, , Anchoring Operators

+@subsection The Match-beginning-of-line Operator (@code{^})

+@kindex ^

+@cindex beginning-of-line operator

+@cindex anchors

+This operator can match the empty string either at the beginning of the

+string or after a newline character. Thus, it is said to @dfn{anchor}

+the pattern to the beginning of a line.

+In the cases following, @samp{^} represents this operator. (Otherwise,

+@samp{^} is ordinary.)

+@itemize @bullet

+@item

+It (the @samp{^}) is first in the pattern, as in @samp{^foo}.

+@cnindex RE_CONTEXT_INDEP_ANCHORS @r{(and @samp{^})}

+@item

+The syntax bit @code{RE_CONTEXT_INDEP_ANCHORS} is set, and it is outside

+a bracket expression.

+@cindex open-group operator and @samp{^}

+@cindex alternation operator and @samp{^}

+@item

+It follows an open-group or alternation operator, as in @samp{a$^b$}

+and @samp{a\|^b}. @xref{Grouping Operators}, and @ref{Alternation

+Operator}.

+@end itemize

+These rules imply that some valid patterns containing @samp{^} cannot be

+matched; for example, @samp{foo^bar} if @code{RE_CONTEXT_INDEP_ANCHORS}

+is set.

+@vindex not_bol @r{field in pattern buffer}

+If the @code{not_bol} field is set in the pattern buffer (@pxref{GNU

+Pattern Buffers}), then @samp{^} fails to match at the beginning of the

+string. @xref{POSIX Matching}, for when you might find this useful.

+@vindex newline_anchor @r{field in pattern buffer}

+If the @code{newline_anchor} field is set in the pattern buffer, then

+@samp{^} fails to match after a newline. This is useful when you do not

+regard the string to be matched as broken into lines.

+@node Match-end-of-line Operator, , Match-beginning-of-line Operator, Anchoring Operators

+@subsection The Match-end-of-line Operator (@code{$})

+@kindex $

+@cindex end-of-line operator

+@cindex anchors

+This operator can match the empty string either at the end of

+the string or before a newline character in the string. Thus, it is

+said to @dfn{anchor} the pattern to the end of a line.

+It is always represented by @samp{$}. For example, @samp{foo$} usually

+matches, e.g., @samp{foo} and, e.g., the first three characters of

+@samp{foo\nbar}.

+Its interaction with the syntax bits and pattern buffer fields is

+exactly the dual of @samp{^}'s; see the previous section. (That is,

+``beginning'' becomes ``end'', ``next'' becomes ``previous'', and

+``after'' becomes ``before''.)

+@node GNU Operators, GNU Emacs Operators, Common Operators, Top

+@chapter GNU Operators

+Following are operators that @sc{gnu} defines (and @sc{posix} doesn't).

+@menu

+* Word Operators::

+* Buffer Operators::

+@end menu

+@node Word Operators, Buffer Operators, , GNU Operators

+@section Word Operators

+The operators in this section require Regex to recognize parts of words.

+Regex uses a syntax table to determine whether or not a character is

+part of a word, i.e., whether or not it is @dfn{word-constituent}.

+@menu

+* Non-Emacs Syntax Tables::

+* Match-word-boundary Operator:: \b

+* Match-within-word Operator:: \B

+* Match-beginning-of-word Operator:: \<

+* Match-end-of-word Operator:: \>

+* Match-word-constituent Operator:: \w

+* Match-non-word-constituent Operator:: \W

+@end menu

+@node Non-Emacs Syntax Tables, Match-word-boundary Operator, , Word Operators

+@subsection Non-Emacs Syntax Tables

+A @dfn{syntax table} is an array indexed by the characters in your

+character set. In the @sc{ascii} encoding, therefore, a syntax table

+has 256 elements. Regex always uses a @code{char *} variable

+@code{re_syntax_table} as its syntax table. In some cases, it

+initializes this variable and in others it expects you to initialize it.

+@itemize @bullet

+@item

+If Regex is compiled with the preprocessor symbols @code{emacs} and

+@code{SYNTAX_TABLE} both undefined, then Regex allocates

+@code{re_syntax_table} and initializes an element @var{i} either to

+@code{Sword} (which it defines) if @var{i} is a letter, number, or

+@samp{_}, or to zero if it's not.

+@item

+If Regex is compiled with @code{emacs} undefined but @code{SYNTAX_TABLE}

+defined, then Regex expects you to define a @code{char *} variable

+@code{re_syntax_table} to be a valid syntax table.

+@item

+@xref{Emacs Syntax Tables}, for what happens when Regex is compiled with

+the preprocessor symbol @code{emacs} defined.

+@end itemize

+@node Match-word-boundary Operator, Match-within-word Operator, Non-Emacs Syntax Tables, Word Operators

+@subsection The Match-word-boundary Operator (@code{\b})

+@cindex @samp{\b}

+@cindex word boundaries, matching

+This operator (represented by @samp{\b}) matches the empty string at

+either the beginning or the end of a word. For example, @samp{\brat\b}

+matches the separate word @samp{rat}.

+@node Match-within-word Operator, Match-beginning-of-word Operator, Match-word-boundary Operator, Word Operators

+@subsection The Match-within-word Operator (@code{\B})

+@cindex @samp{\B}

+This operator (represented by @samp{\B}) matches the empty string within

+a word. For example, @samp{c\Brat\Be} matches @samp{crate}, but

+@samp{dirty \Brat} doesn't match @samp{dirty rat}.

+@node Match-beginning-of-word Operator, Match-end-of-word Operator, Match-within-word Operator, Word Operators

+@subsection The Match-beginning-of-word Operator (@code{\<})

+@cindex @samp{\<}

+This operator (represented by @samp{\<}) matches the empty string at the

+beginning of a word.

+@node Match-end-of-word Operator, Match-word-constituent Operator, Match-beginning-of-word Operator, Word Operators

+@subsection The Match-end-of-word Operator (@code{\>})

+@cindex @samp{\>}

+This operator (represented by @samp{\>}) matches the empty string at the

+end of a word.

+@node Match-word-constituent Operator, Match-non-word-constituent Operator, Match-end-of-word Operator, Word Operators

+@subsection The Match-word-constituent Operator (@code{\w})

+@cindex @samp{\w}

+This operator (represented by @samp{\w}) matches any word-constituent

+character.

+@node Match-non-word-constituent Operator, , Match-word-constituent Operator, Word Operators

+@subsection The Match-non-word-constituent Operator (@code{\W})

+@cindex @samp{\W}

+This operator (represented by @samp{\W}) matches any character that is

+not word-constituent.

+@node Buffer Operators, , Word Operators, GNU Operators

+@section Buffer Operators

+Following are operators which work on buffers. In Emacs, a @dfn{buffer}

+is, naturally, an Emacs buffer. For other programs, Regex considers the

+entire string to be matched as the buffer.

+@menu

+* Match-beginning-of-buffer Operator:: \`

+* Match-end-of-buffer Operator:: \'

+@end menu

+@node Match-beginning-of-buffer Operator, Match-end-of-buffer Operator, , Buffer Operators

+@subsection The Match-beginning-of-buffer Operator (@code{\`})

+@cindex @samp{\`}

+This operator (represented by @samp{\`}) matches the empty string at the

+beginning of the buffer.

+@node Match-end-of-buffer Operator, , Match-beginning-of-buffer Operator, Buffer Operators

+@subsection The Match-end-of-buffer Operator (@code{\'})

+@cindex @samp{\'}

+This operator (represented by @samp{\'}) matches the empty string at the

+end of the buffer.

+@node GNU Emacs Operators, What Gets Matched?, GNU Operators, Top

+@chapter GNU Emacs Operators

+Following are operators that @sc{gnu} defines (and @sc{posix} doesn't)

+that you can use only when Regex is compiled with the preprocessor

+symbol @code{emacs} defined.

+@menu

+* Syntactic Class Operators::

+@end menu

+@node Syntactic Class Operators, , , GNU Emacs Operators

+@section Syntactic Class Operators

+The operators in this section require Regex to recognize the syntactic

+classes of characters. Regex uses a syntax table to determine this.

+@menu

+* Emacs Syntax Tables::

+* Match-syntactic-class Operator:: \sCLASS

+* Match-not-syntactic-class Operator:: \SCLASS

+@end menu

+@node Emacs Syntax Tables, Match-syntactic-class Operator, , Syntactic Class Operators

+@subsection Emacs Syntax Tables

+A @dfn{syntax table} is an array indexed by the characters in your

+character set. In the @sc{ascii} encoding, therefore, a syntax table

+has 256 elements.

+If Regex is compiled with the preprocessor symbol @code{emacs} defined,

+then Regex expects you to define and initialize the variable

+@code{re_syntax_table} to be an Emacs syntax table. Emacs' syntax

+tables are more complicated than Regex's own (@pxref{Non-Emacs Syntax

+Tables}). @xref{Syntax, , Syntax, emacs, The GNU Emacs User's Manual},

+for a description of Emacs' syntax tables.

+@node Match-syntactic-class Operator, Match-not-syntactic-class Operator, Emacs Syntax Tables, Syntactic Class Operators

+@subsection The Match-syntactic-class Operator (@code{\s}@var{class})

+@cindex @samp{\s}

+This operator matches any character whose syntactic class is represented

+by a specified character. @samp{\s@var{class}} represents this operator

+where @var{class} is the character representing the syntactic class you

+want. For example, @samp{w} represents the syntactic

+class of word-constituent characters, so @samp{\sw} matches any

+word-constituent character.

+@node Match-not-syntactic-class Operator, , Match-syntactic-class Operator, Syntactic Class Operators

+@subsection The Match-not-syntactic-class Operator (@code{\S}@var{class})

+@cindex @samp{\S}

+This operator is similar to the match-syntactic-class operator except

+that it matches any character whose syntactic class is @emph{not}

+represented by the specified character. @samp{\S@var{class}} represents

+this operator. For example, @samp{w} represents the syntactic class of

+word-constituent characters, so @samp{\Sw} matches any character that is

+not word-constituent.

+@node What Gets Matched?, Programming with Regex, GNU Emacs Operators, Top

+@chapter What Gets Matched?

+Regex usually matches strings according to the ``leftmost longest''

+rule; that is, it chooses the longest of the leftmost matches. This

+does not mean that for a regular expression containing subexpressions

+that it simply chooses the longest match for each subexpression, left to

+right; the overall match must also be the longest possible one.

+For example, @samp{(ac*)(c*d[ac]*)\1} matches @samp{acdacaaa}, not

+@samp{acdac}, as it would if it were to choose the longest match for the

+first subexpression.

+@node Programming with Regex, Copying, What Gets Matched?, Top

+@chapter Programming with Regex

+Here we describe how you use the Regex data structures and functions in

+C programs. Regex has three interfaces: one designed for @sc{gnu}, one

+compatible with @sc{posix} and one compatible with Berkeley @sc{unix}.

+@menu

+* GNU Regex Functions::

+* POSIX Regex Functions::

+* BSD Regex Functions::

+@end menu

+@node GNU Regex Functions, POSIX Regex Functions, , Programming with Regex

+@section GNU Regex Functions

+If you're writing code that doesn't need to be compatible with either

+@sc{posix} or Berkeley @sc{unix}, you can use these functions. They

+provide more options than the other interfaces.

+@menu

+* GNU Pattern Buffers:: The re_pattern_buffer type.

+* GNU Regular Expression Compiling:: re_compile_pattern ()

+* GNU Matching:: re_match ()

+* GNU Searching:: re_search ()

+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()

+* Searching with Fastmaps:: re_compile_fastmap ()

+* GNU Translate Tables:: The `translate' field.

+* Using Registers:: The re_registers type and related fns.

+* Freeing GNU Pattern Buffers:: regfree ()

+@end menu

+@node GNU Pattern Buffers, GNU Regular Expression Compiling, , GNU Regex Functions

+@subsection GNU Pattern Buffers

+@cindex pattern buffer, definition of

+@tindex re_pattern_buffer @r{definition}

+@tindex struct re_pattern_buffer @r{definition}

+To compile, match, or search for a given regular expression, you must

+supply a pattern buffer. A @dfn{pattern buffer} holds one compiled

+regular expression.@footnote{Regular expressions are also referred to as

+``patterns,'' hence the name ``pattern buffer.''}

+You can have several different pattern buffers simultaneously, each

+holding a compiled pattern for a different regular expression.

+@file{regex.h} defines the pattern buffer @code{struct} as follows:

+@example

+ /* Space that holds the compiled pattern. It is declared as

+ `unsigned char *' because its elements are

+ sometimes used as array indexes. */

+ unsigned char *buffer;

+ /* Number of bytes to which `buffer' points. */

+ unsigned long allocated;

+ /* Number of bytes actually used in `buffer'. */

+ unsigned long used;

+ /* Syntax setting with which the pattern was compiled. */

+ reg_syntax_t syntax;

+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses

+ the fastmap, if there is one, to skip over impossible

+ starting points for matches. */

+ char *fastmap;

+ /* Either a translate table to apply to all characters before

+ comparing them, or zero for no translation. The translation

+ is applied to a pattern when it is compiled and to a string

+ when it is matched. */

+ char *translate;

+ /* Number of subexpressions found by the compiler. */

+ size_t re_nsub;

+ /* Zero if this pattern cannot match the empty string, one else.

+ Well, in truth it's used only in `re_search_2', to see

+ whether or not we should use the fastmap, so we don't set

+ this absolutely perfectly; see `re_compile_fastmap' (the

+ `duplicate' case). */

+ unsigned can_be_null : 1;

+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure

+ for `max (RE_NREGS, re_nsub + 1)' groups.

+ If REGS_REALLOCATE, reallocate space if necessary.

+ If REGS_FIXED, use what's there. */

+#define REGS_UNALLOCATED 0

+#define REGS_REALLOCATE 1

+#define REGS_FIXED 2

+ unsigned regs_allocated : 2;

+ /* Set to zero when `regex_compile' compiles a pattern; set to one

+ by `re_compile_fastmap' if it updates the fastmap. */

+ unsigned fastmap_accurate : 1;

+ /* If set, `re_match_2' does not return information about

+ subexpressions. */

+ unsigned no_sub : 1;

+ /* If set, a beginning-of-line anchor doesn't match at the

+ beginning of the string. */

+ unsigned not_bol : 1;

+ /* Similarly for an end-of-line anchor. */

+ unsigned not_eol : 1;

+ /* If true, an anchor at a newline matches. */

+ unsigned newline_anchor : 1;

+@end example

+@node GNU Regular Expression Compiling, GNU Matching, GNU Pattern Buffers, GNU Regex Functions

+@subsection GNU Regular Expression Compiling

+In @sc{gnu}, you can both match and search for a given regular

+expression. To do either, you must first compile it in a pattern buffer

+(@pxref{GNU Pattern Buffers}).

+@cindex syntax initialization

+@vindex re_syntax_options @r{initialization}

+Regular expressions match according to the syntax with which they were

+compiled; with @sc{gnu}, you indicate what syntax you want by setting

+the variable @code{re_syntax_options} (declared in @file{regex.h} and

+defined in @file{regex.c}) before calling the compiling function,

+@code{re_compile_pattern} (see below). @xref{Syntax Bits}, and

+@ref{Predefined Syntaxes}.

+You can change the value of @code{re_syntax_options} at any time.

+Usually, however, you set its value once and then never change it.

+@cindex pattern buffer initialization

+@code{re_compile_pattern} takes a pattern buffer as an argument. You

+must initialize the following fields:

+@table @code

+@item translate @r{initialization}

+@item translate

+@vindex translate @r{initialization}

+Initialize this to point to a translate table if you want one, or to

+zero if you don't. We explain translate tables in @ref{GNU Translate

+Tables}.

+@item fastmap

+@vindex fastmap @r{initialization}

+Initialize this to nonzero if you want a fastmap, or to zero if you

+don't.

+@item buffer

+@itemx allocated

+@vindex buffer @r{initialization}

+@vindex allocated @r{initialization}

+@findex malloc

+If you want @code{re_compile_pattern} to allocate memory for the

+compiled pattern, set both of these to zero. If you have an existing

+block of memory (allocated with @code{malloc}) you want Regex to use,

+set @code{buffer} to its address and @code{allocated} to its size (in

+bytes).

+@code{re_compile_pattern} uses @code{realloc} to extend the space for

+the compiled pattern as necessary.

+@end table

+To compile a pattern buffer, use:

+@findex re_compile_pattern

+@example

+char *

+re_compile_pattern (const char *@var{regex}, const int @var{regex_size},

+ struct re_pattern_buffer *@var{pattern_buffer})

+@end example

+@noindent

+@var{regex} is the regular expression's address, @var{regex_size} is its

+length, and @var{pattern_buffer} is the pattern buffer's address.

+If @code{re_compile_pattern} successfully compiles the regular

+expression, it returns zero and sets @code{*@var{pattern_buffer}} to the

+compiled pattern. It sets the pattern buffer's fields as follows:

+@table @code

+@item buffer

+@vindex buffer @r{field, set by @code{re_compile_pattern}}

+to the compiled pattern.

+@item used

+@vindex used @r{field, set by @code{re_compile_pattern}}

+to the number of bytes the compiled pattern in @code{buffer} occupies.

+@item syntax

+@vindex syntax @r{field, set by @code{re_compile_pattern}}

+to the current value of @code{re_syntax_options}.

+@item re_nsub

+@vindex re_nsub @r{field, set by @code{re_compile_pattern}}

+to the number of subexpressions in @var{regex}.

+@item fastmap_accurate

+@vindex fastmap_accurate @r{field, set by @code{re_compile_pattern}}

+to zero on the theory that the pattern you're compiling is different

+than the one previously compiled into @code{buffer}; in that case (since

+you can't make a fastmap without a compiled pattern),

+@code{fastmap} would either contain an incompatible fastmap, or nothing

+at all.

+@c xx what else?

+@end table

+If @code{re_compile_pattern} can't compile @var{regex}, it returns an

+error string corresponding to one of the errors listed in @ref{POSIX

+Regular Expression Compiling}.

+@node GNU Matching, GNU Searching, GNU Regular Expression Compiling, GNU Regex Functions

+@subsection GNU Matching

+@cindex matching with GNU functions

+Matching the @sc{gnu} way means trying to match as much of a string as

+possible starting at a position within it you specify. Once you've compiled

+a pattern into a pattern buffer (@pxref{GNU Regular Expression

+Compiling}), you can ask the matcher to match that pattern against a

+string using:

+@findex re_match

+@example

+int

+re_match (struct re_pattern_buffer *@var{pattern_buffer},

+ const char *@var{string}, const int @var{size},

+ const int @var{start}, struct re_registers *@var{regs})

+@end example

+@noindent

+@var{pattern_buffer} is the address of a pattern buffer containing a

+compiled pattern. @var{string} is the string you want to match; it can

+contain newline and null characters. @var{size} is the length of that

+string. @var{start} is the string index at which you want to

+begin matching; the first character of @var{string} is at index zero.

+@xref{Using Registers}, for a explanation of @var{regs}; you can safely

+pass zero.

+@code{re_match} matches the regular expression in @var{pattern_buffer}

+against the string @var{string} according to the syntax in

+@var{pattern_buffers}'s @code{syntax} field. (@xref{GNU Regular

+Expression Compiling}, for how to set it.) The function returns

+@math{-1} if the compiled pattern does not match any part of

+@var{string} and @math{-2} if an internal error happens; otherwise, it

+returns how many (possibly zero) characters of @var{string} the pattern

+matched.

+An example: suppose @var{pattern_buffer} points to a pattern buffer

+containing the compiled pattern for @samp{a*}, and @var{string} points

+to @samp{aaaaab} (whereupon @var{size} should be 6). Then if @var{start}

+is 2, @code{re_match} returns 3, i.e., @samp{a*} would have matched the

+last three @samp{a}s in @var{string}. If @var{start} is 0,

+@code{re_match} returns 5, i.e., @samp{a*} would have matched all the

+@samp{a}s in @var{string}. If @var{start} is either 5 or 6, it returns

+zero.

+If @var{start} is not between zero and @var{size}, then

+@code{re_match} returns @math{-1}.

+@node GNU Searching, Matching/Searching with Split Data, GNU Matching, GNU Regex Functions

+@subsection GNU Searching

+@cindex searching with GNU functions

+@dfn{Searching} means trying to match starting at successive positions

+within a string. The function @code{re_search} does this.

+Before calling @code{re_search}, you must compile your regular

+expression. @xref{GNU Regular Expression Compiling}.

+Here is the function declaration:

+@findex re_search

+@example

+int

+re_search (struct re_pattern_buffer *@var{pattern_buffer},

+ const char *@var{string}, const int @var{size},

+ const int @var{start}, const int @var{range},

+ struct re_registers *@var{regs})

+@end example

+@noindent

+@vindex start @r{argument to @code{re_search}}

+@vindex range @r{argument to @code{re_search}}

+whose arguments are the same as those to @code{re_match} (@pxref{GNU

+Matching}) except that the two arguments @var{start} and @var{range}

+replace @code{re_match}'s argument @var{start}.

+If @var{range} is positive, then @code{re_search} attempts a match

+starting first at index @var{start}, then at @math{@var{start} + 1} if

+that fails, and so on, up to @math{@var{start} + @var{range}}; if

+@var{range} is negative, then it attempts a match starting first at

+index @var{start}, then at @math{@var{start} -1} if that fails, and so

+on.

+If @var{start} is not between zero and @var{size}, then @code{re_search}

+returns @math{-1}. When @var{range} is positive, @code{re_search}

+adjusts @var{range} so that @math{@var{start} + @var{range} - 1} is

+between zero and @var{size}, if necessary; that way it won't search

+outside of @var{string}. Similarly, when @var{range} is negative,

+@code{re_search} adjusts @var{range} so that @math{@var{start} +

+@var{range} + 1} is between zero and @var{size}, if necessary.

+If the @code{fastmap} field of @var{pattern_buffer} is zero,

+@code{re_search} matches starting at consecutive positions; otherwise,

+it uses @code{fastmap} to make the search more efficient.

+@xref{Searching with Fastmaps}.

+If no match is found, @code{re_search} returns @math{-1}. If

+a match is found, it returns the index where the match began. If an

+internal error happens, it returns @math{-2}.

+@node Matching/Searching with Split Data, Searching with Fastmaps, GNU Searching, GNU Regex Functions

+@subsection Matching and Searching with Split Data

+Using the functions @code{re_match_2} and @code{re_search_2}, you can

+match or search in data that is divided into two strings.

+The function:

+@findex re_match_2

+@example

+int

+re_match_2 (struct re_pattern_buffer *@var{buffer},

+ const char *@var{string1}, const int @var{size1},

+ const char *@var{string2}, const int @var{size2},

+ const int @var{start},

+ struct re_registers *@var{regs},

+ const int @var{stop})

+@end example

+@noindent

+is similar to @code{re_match} (@pxref{GNU Matching}) except that you

+pass @emph{two} data strings and sizes, and an index @var{stop} beyond

+which you don't want the matcher to try matching. As with

+@code{re_match}, if it succeeds, @code{re_match_2} returns how many

+characters of @var{string} it matched. Regard @var{string1} and

+@var{string2} as concatenated when you set the arguments @var{start} and

+@var{stop} and use the contents of @var{regs}; @code{re_match_2} never

+returns a value larger than @math{@var{size1} + @var{size2}}.

+The function:

+@findex re_search_2

+@example

+int

+re_search_2 (struct re_pattern_buffer *@var{buffer},

+ const char *@var{string1}, const int @var{size1},

+ const char *@var{string2}, const int @var{size2},

+ const int @var{start}, const int @var{range},

+ struct re_registers *@var{regs},

+ const int @var{stop})

+@end example

+@noindent

+is similarly related to @code{re_search}.

+@node Searching with Fastmaps, GNU Translate Tables, Matching/Searching with Split Data, GNU Regex Functions

+@subsection Searching with Fastmaps

+@cindex fastmaps

+If you're searching through a long string, you should use a fastmap.

+Without one, the searcher tries to match at consecutive positions in the

+string. Generally, most of the characters in the string could not start

+a match. It takes much longer to try matching at a given position in the

+string than it does to check in a table whether or not the character at

+that position could start a match. A @dfn{fastmap} is such a table.

+More specifically, a fastmap is an array indexed by the characters in

+your character set. Under the @sc{ascii} encoding, therefore, a fastmap

+has 256 elements. If you want the searcher to use a fastmap with a

+given pattern buffer, you must allocate the array and assign the array's

+address to the pattern buffer's @code{fastmap} field. You either can

+compile the fastmap yourself or have @code{re_search} do it for you;

+when @code{fastmap} is nonzero, it automatically compiles a fastmap the

+first time you search using a particular compiled pattern.

+To compile a fastmap yourself, use:

+@findex re_compile_fastmap

+@example

+int

+re_compile_fastmap (struct re_pattern_buffer *@var{pattern_buffer})

+@end example

+@noindent

+@var{pattern_buffer} is the address of a pattern buffer. If the

+character @var{c} could start a match for the pattern,

+@code{re_compile_fastmap} makes

+@code{@var{pattern_buffer}->fastmap[@var{c}]} nonzero. It returns

+@math{0} if it can compile a fastmap and @math{-2} if there is an

+internal error. For example, if @samp{|} is the alternation operator

+and @var{pattern_buffer} holds the compiled pattern for @samp{a|b}, then

+@code{re_compile_fastmap} sets @code{fastmap['a']} and

+@code{fastmap['b']} (and no others).

+@code{re_search} uses a fastmap as it moves along in the string: it

+checks the string's characters until it finds one that's in the fastmap.

+Then it tries matching at that character. If the match fails, it

+repeats the process. So, by using a fastmap, @code{re_search} doesn't

+waste time trying to match at positions in the string that couldn't

+start a match.

+If you don't want @code{re_search} to use a fastmap,

+store zero in the @code{fastmap} field of the pattern buffer before

+calling @code{re_search}.

+Once you've initialized a pattern buffer's @code{fastmap} field, you

+need never do so again---even if you compile a new pattern in

+it---provided the way the field is set still reflects whether or not you

+want a fastmap. @code{re_search} will still either do nothing if

+@code{fastmap} is null or, if it isn't, compile a new fastmap for the

+new pattern.

+@node GNU Translate Tables, Using Registers, Searching with Fastmaps, GNU Regex Functions

+@subsection GNU Translate Tables

+If you set the @code{translate} field of a pattern buffer to a translate

+table, then the @sc{gnu} Regex functions to which you've passed that

+pattern buffer use it to apply a simple transformation

+to all the regular expression and string characters at which they look.

+A @dfn{translate table} is an array indexed by the characters in your

+character set. Under the @sc{ascii} encoding, therefore, a translate

+table has 256 elements. The array's elements are also characters in

+your character set. When the Regex functions see a character @var{c},

+they use @code{translate[@var{c}]} in its place, with one exception: the

+character after a @samp{\} is not translated. (This ensures that, the

+operators, e.g., @samp{\B} and @samp{\b}, are always distinguishable.)

+For example, a table that maps all lowercase letters to the

+corresponding uppercase ones would cause the matcher to ignore

+differences in case.@footnote{A table that maps all uppercase letters to

+the corresponding lowercase ones would work just as well for this

+purpose.} Such a table would map all characters except lowercase letters

+to themselves, and lowercase letters to the corresponding uppercase

+ones. Under the @sc{ascii} encoding, here's how you could initialize

+such a table (we'll call it @code{case_fold}):

+@example

+for (i = 0; i < 256; i++)

+ case_fold[i] = i;

+for (i = 'a'; i <= 'z'; i++)

+ case_fold[i] = i - ('a' - 'A');

+@end example

+You tell Regex to use a translate table on a given pattern buffer by

+assigning that table's address to the @code{translate} field of that

+buffer. If you don't want Regex to do any translation, put zero into

+this field. You'll get weird results if you change the table's contents

+anytime between compiling the pattern buffer, compiling its fastmap, and

+matching or searching with the pattern buffer.

+@node Using Registers, Freeing GNU Pattern Buffers, GNU Translate Tables, GNU Regex Functions

+@subsection Using Registers

+A group in a regular expression can match a (posssibly empty) substring

+of the string that regular expression as a whole matched. The matcher

+remembers the beginning and end of the substring matched by

+each group.

+To find out what they matched, pass a nonzero @var{regs} argument to a

+@sc{gnu} matching or searching function (@pxref{GNU Matching} and

+@ref{GNU Searching}), i.e., the address of a structure of this type, as

+defined in @file{regex.h}:

+@c We don't bother to include this directly from regex.h,

+@c since it changes so rarely.

+@example

+@tindex re_registers

+@vindex num_regs @r{in @code{struct re_registers}}

+@vindex start @r{in @code{struct re_registers}}

+@vindex end @r{in @code{struct re_registers}}

+struct re_registers

+@{

+ unsigned num_regs;

+ regoff_t *start;

+ regoff_t *end;

+@};

+@end example

+Except for (possibly) the @var{num_regs}'th element (see below), the

+@var{i}th element of the @code{start} and @code{end} arrays records

+information about the @var{i}th group in the pattern. (They're declared

+as C pointers, but this is only because not all C compilers accept

+zero-length arrays; conceptually, it is simplest to think of them as

+arrays.)

+The @code{start} and @code{end} arrays are allocated in various ways,

+depending on the value of the @code{regs_allocated}

+@vindex regs_allocated

+field in the pattern buffer passed to the matcher.

+The simplest and perhaps most useful is to let the matcher (re)allocate

+enough space to record information for all the groups in the regular

+expression. If @code{regs_allocated} is @code{REGS_UNALLOCATED},

+@vindex REGS_UNALLOCATED

+the matcher allocates @math{1 + @var{re_nsub}} (another field in the

+pattern buffer; @pxref{GNU Pattern Buffers}). The extra element is set

+to @math{-1}, and sets @code{regs_allocated} to @code{REGS_REALLOCATE}.

+@vindex REGS_REALLOCATE

+Then on subsequent calls with the same pattern buffer and @var{regs}

+arguments, the matcher reallocates more space if necessary.

+It would perhaps be more logical to make the @code{regs_allocated} field

+part of the @code{re_registers} structure, instead of part of the

+pattern buffer. But in that case the caller would be forced to

+initialize the structure before passing it. Much existing code doesn't

+do this initialization, and it's arguably better to avoid it anyway.

+@code{re_compile_pattern} sets @code{regs_allocated} to

+@code{REGS_UNALLOCATED},

+so if you use the GNU regular expression

+functions, you get this behavior by default.

+xx document re_set_registers

+@sc{posix}, on the other hand, requires a different interface: the

+caller is supposed to pass in a fixed-length array which the matcher

+fills. Therefore, if @code{regs_allocated} is @code{REGS_FIXED}

+@vindex REGS_FIXED

+the matcher simply fills that array.

+The following examples illustrate the information recorded in the

+@code{re_registers} structure. (In all of them, @samp{(} represents the

+open-group and @samp{)} the close-group operator. The first character

+in the string @var{string} is at index 0.)

+@c xx i'm not sure this is all true anymore.

+@itemize @bullet

+@item

+If the regular expression has an @w{@var{i}-th}

+group not contained within another group that matches a

+substring of @var{string}, then the function sets

+@code{@w{@var{regs}->}start[@var{i}]} to the index in @var{string} where

+the substring matched by the @w{@var{i}-th} group begins, and

+@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that

+substring's end. The function sets @code{@w{@var{regs}->}start[0]} and

+@code{@w{@var{regs}->}end[0]} to analogous information about the entire

+pattern.

+For example, when you match @samp{((a)(b))} against @samp{ab}, you get:

+@itemize

+@item

+0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]}

+@item

+0 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]}

+@item

+0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]}

+@item

+1 in @code{@w{@var{regs}->}start[3]} and 2 in @code{@w{@var{regs}->}end[3]}

+@end itemize

+@item

+If a group matches more than once (as it might if followed by,

+e.g., a repetition operator), then the function reports the information

+about what the group @emph{last} matched.

+For example, when you match the pattern @samp{(a)*} against the string

+@samp{aa}, you get:

+@itemize

+@item

+0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]}

+@item

+1 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]}

+@end itemize

+@item

+If the @w{@var{i}-th} group does not participate in a

+successful match, e.g., it is an alternative not taken or a

+repetition operator allows zero repetitions of it, then the function

+sets @code{@w{@var{regs}->}start[@var{i}]} and

+@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}.

+For example, when you match the pattern @samp{(a)*b} against

+the string @samp{b}, you get:

+@itemize

+@item

+0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}

+@item

+@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]}

+@end itemize

+@item

+If the @w{@var{i}-th} group matches a zero-length string, then the

+function sets @code{@w{@var{regs}->}start[@var{i}]} and

+@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that

+zero-length string.

+For example, when you match the pattern @samp{(a*)b} against the string

+@samp{b}, you get:

+@itemize

+@item

+0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}

+@item

+0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]}

+@end itemize

+@ignore

+The function sets @code{@w{@var{regs}->}start[0]} and

+@code{@w{@var{regs}->}end[0]} to analogous information about the entire

+pattern.

+For example, when you match the pattern @samp{(a*)} against the empty

+string, you get:

+@itemize

+@item

+0 in @code{@w{@var{regs}->}start[0]} and 0 in @code{@w{@var{regs}->}end[0]}

+@item

+0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]}

+@end itemize

+@end ignore

+@item

+If an @w{@var{i}-th} group contains a @w{@var{j}-th} group

+in turn not contained within any other group within group @var{i} and

+the function reports a match of the @w{@var{i}-th} group, then it

+records in @code{@w{@var{regs}->}start[@var{j}]} and

+@code{@w{@var{regs}->}end[@var{j}]} the last match (if it matched) of

+the @w{@var{j}-th} group.

+For example, when you match the pattern @samp{((a*)b)*} against the

+string @samp{abb}, @w{group 2} last matches the empty string, so you

+get what it previously matched:

+@itemize

+@item

+0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]}

+@item

+2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]}

+@item

+2 in @code{@w{@var{regs}->}start[2]} and 2 in @code{@w{@var{regs}->}end[2]}

+@end itemize

+When you match the pattern @samp{((a)*b)*} against the string

+@samp{abb}, @w{group 2} doesn't participate in the last match, so you

+get:

+@itemize

+@item

+0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]}

+@item

+2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]}

+@item

+0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]}

+@end itemize

+@item

+If an @w{@var{i}-th} group contains a @w{@var{j}-th} group

+in turn not contained within any other group within group @var{i}

+and the function sets

+@code{@w{@var{regs}->}start[@var{i}]} and

+@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}, then it also sets

+@code{@w{@var{regs}->}start[@var{j}]} and

+@code{@w{@var{regs}->}end[@var{j}]} to @math{-1}.

+For example, when you match the pattern @samp{((a)*b)*c} against the

+string @samp{c}, you get:

+@itemize

+@item

+0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}

+@item

+@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]}

+@item

+@math{-1} in @code{@w{@var{regs}->}start[2]} and @math{-1} in @code{@w{@var{regs}->}end[2]}

+@end itemize

+@node Freeing GNU Pattern Buffers, , Using Registers, GNU Regex Functions

+@subsection Freeing GNU Pattern Buffers

+To free any allocated fields of a pattern buffer, you can use the

+@sc{posix} function described in @ref{Freeing POSIX Pattern Buffers},

+since the type @code{regex_t}---the type for @sc{posix} pattern

+buffers---is equivalent to the type @code{re_pattern_buffer}. After

+freeing a pattern buffer, you need to again compile a regular expression

+in it (@pxref{GNU Regular Expression Compiling}) before passing it to

+a matching or searching function.

+@node POSIX Regex Functions, BSD Regex Functions, GNU Regex Functions, Programming with Regex

+@section POSIX Regex Functions

+If you're writing code that has to be @sc{posix} compatible, you'll need

+to use these functions. Their interfaces are as specified by @sc{posix},

+draft 1003.2/D11.2.

+@menu

+* POSIX Pattern Buffers:: The regex_t type.

+* POSIX Regular Expression Compiling:: regcomp ()

+* POSIX Matching:: regexec ()

+* Reporting Errors:: regerror ()

+* Using Byte Offsets:: The regmatch_t type.

+* Freeing POSIX Pattern Buffers:: regfree ()

+@end menu

+@node POSIX Pattern Buffers, POSIX Regular Expression Compiling, , POSIX Regex Functions

+@subsection POSIX Pattern Buffers

+To compile or match a given regular expression the @sc{posix} way, you

+must supply a pattern buffer exactly the way you do for @sc{gnu}

+(@pxref{GNU Pattern Buffers}). @sc{posix} pattern buffers have type

+@code{regex_t}, which is equivalent to the @sc{gnu} pattern buffer

+type @code{re_pattern_buffer}.

+@node POSIX Regular Expression Compiling, POSIX Matching, POSIX Pattern Buffers, POSIX Regex Functions

+@subsection POSIX Regular Expression Compiling

+With @sc{posix}, you can only search for a given regular expression; you

+can't match it. To do this, you must first compile it in a

+pattern buffer, using @code{regcomp}.

+@ignore

+Before calling @code{regcomp}, you must initialize this pattern buffer

+as you do for @sc{gnu} (@pxref{GNU Regular Expression Compiling}). See

+below, however, for how to choose a syntax with which to compile.

+@end ignore

+To compile a pattern buffer, use:

+@findex regcomp

+@example

+int

+regcomp (regex_t *@var{preg}, const char *@var{regex}, int @var{cflags})

+@end example

+@noindent

+@var{preg} is the initialized pattern buffer's address, @var{regex} is

+the regular expression's address, and @var{cflags} is the compilation

+flags, which Regex considers as a collection of bits. Here are the

+valid bits, as defined in @file{regex.h}:

+@table @code

+@item REG_EXTENDED

+@vindex REG_EXTENDED

+says to use @sc{posix} Extended Regular Expression syntax; if this isn't

+set, then says to use @sc{posix} Basic Regular Expression syntax.

+@code{regcomp} sets @var{preg}'s @code{syntax} field accordingly.

+@item REG_ICASE

+@vindex REG_ICASE

+@cindex ignoring case

+says to ignore case; @code{regcomp} sets @var{preg}'s @code{translate}

+field to a translate table which ignores case, replacing anything you've

+put there before.

+@item REG_NOSUB

+@vindex REG_NOSUB

+says to set @var{preg}'s @code{no_sub} field; @pxref{POSIX Matching},

+for what this means.

+@item REG_NEWLINE

+@vindex REG_NEWLINE

+says that a:

+@itemize @bullet

+@item

+match-any-character operator (@pxref{Match-any-character

+Operator}) doesn't match a newline.

+@item

+nonmatching list not containing a newline (@pxref{List

+Operators}) matches a newline.

+@item

+match-beginning-of-line operator (@pxref{Match-beginning-of-line

+Operator}) matches the empty string immediately after a newline,

+regardless of how @code{REG_NOTBOL} is set (@pxref{POSIX Matching}, for

+an explanation of @code{REG_NOTBOL}).

+@item

+match-end-of-line operator (@pxref{Match-beginning-of-line

+Operator}) matches the empty string immediately before a newline,

+regardless of how @code{REG_NOTEOL} is set (@pxref{POSIX Matching},

+for an explanation of @code{REG_NOTEOL}).

+@end itemize

+@end table

+If @code{regcomp} successfully compiles the regular expression, it

+returns zero and sets @code{*@var{pattern_buffer}} to the compiled

+pattern. Except for @code{syntax} (which it sets as explained above), it

+also sets the same fields the same way as does the @sc{gnu} compiling

+function (@pxref{GNU Regular Expression Compiling}).

+If @code{regcomp} can't compile the regular expression, it returns one

+of the error codes listed here. (Except when noted differently, the

+syntax of in all examples below is basic regular expression syntax.)

+@table @code

+@comment repetitions

+@item REG_BADRPT

+For example, the consecutive repetition operators @samp{**} in

+@samp{a**} are invalid. As another example, if the syntax is extended

+regular expression syntax, then the repetition operator @samp{*} with

+nothing on which to operate in @samp{*} is invalid.

+@item REG_BADBR

+For example, the @var{count} @samp{-1} in @samp{a\@{-1} is invalid.

+@item REG_EBRACE

+For example, @samp{a\@{1} is missing a close-interval operator.

+@comment lists

+@item REG_EBRACK

+For example, @samp{[a} is missing a close-list operator.

+@item REG_ERANGE

+For example, the range ending point @samp{z} that collates lower than

+does its starting point @samp{a} in @samp{[z-a]} is invalid. Also, the

+range with the character class @samp{[:alpha:]} as its starting point in

+@samp{[[:alpha:]-|]}.

+@item REG_ECTYPE

+For example, the character class name @samp{foo} in @samp{[[:foo:]} is

+invalid.

+@comment groups

+@item REG_EPAREN

+For example, @samp{a\)} is missing an open-group operator and @samp{\(a}

+is missing a close-group operator.

+@item REG_ESUBREG

+For example, the back reference @samp{\2} that refers to a nonexistent

+subexpression in @samp{$a$\2} is invalid.

+@comment unfinished business

+@item REG_EEND

+Returned when a regular expression causes no other more specific error.

+@item REG_EESCAPE

+For example, the trailing backslash @samp{\} in @samp{a\} is invalid, as is the

+one in @samp{\}.

+@comment kitchen sink

+@item REG_BADPAT

+For example, in the extended regular expression syntax, the empty group

+@samp{()} in @samp{a()b} is invalid.

+@comment internal

+@item REG_ESIZE

+Returned when a regular expression needs a pattern buffer larger than

+65536 bytes.

+@item REG_ESPACE

+Returned when a regular expression makes Regex to run out of memory.

+@end table

+@node POSIX Matching, Reporting Errors, POSIX Regular Expression Compiling, POSIX Regex Functions

+@subsection POSIX Matching

+Matching the @sc{posix} way means trying to match a null-terminated

+string starting at its first character. Once you've compiled a pattern

+into a pattern buffer (@pxref{POSIX Regular Expression Compiling}), you

+can ask the matcher to match that pattern against a string using:

+@findex regexec

+@example

+int

+regexec (const regex_t *@var{preg}, const char *@var{string},

+ size_t @var{nmatch}, regmatch_t @var{pmatch}[], int @var{eflags})

+@end example

+@noindent

+@var{preg} is the address of a pattern buffer for a compiled pattern.

+@var{string} is the string you want to match.

+@xref{Using Byte Offsets}, for an explanation of @var{pmatch}. If you

+pass zero for @var{nmatch} or you compiled @var{preg} with the

+compilation flag @code{REG_NOSUB} set, then @code{regexec} will ignore

+@var{pmatch}; otherwise, you must allocate it to have at least

+@var{nmatch} elements. @code{regexec} will record @var{nmatch} byte

+offsets in @var{pmatch}, and set to @math{-1} any unused elements up to

+@math{@var{pmatch}@code{[@var{nmatch}]} - 1}.

+@var{eflags} specifies @dfn{execution flags}---namely, the two bits

+@code{REG_NOTBOL} and @code{REG_NOTEOL} (defined in @file{regex.h}). If

+you set @code{REG_NOTBOL}, then the match-beginning-of-line operator

+(@pxref{Match-beginning-of-line Operator}) always fails to match.

+This lets you match against pieces of a line, as you would need to if,

+say, searching for repeated instances of a given pattern in a line; it

+would work correctly for patterns both with and without

+match-beginning-of-line operators. @code{REG_NOTEOL} works analogously

+for the match-end-of-line operator (@pxref{Match-end-of-line

+Operator}); it exists for symmetry.

+@code{regexec} tries to find a match for @var{preg} in @var{string}

+according to the syntax in @var{preg}'s @code{syntax} field.

+(@xref{POSIX Regular Expression Compiling}, for how to set it.) The

+function returns zero if the compiled pattern matches @var{string} and

+@code{REG_NOMATCH} (defined in @file{regex.h}) if it doesn't.

+@node Reporting Errors, Using Byte Offsets, POSIX Matching, POSIX Regex Functions

+@subsection Reporting Errors

+If either @code{regcomp} or @code{regexec} fail, they return a nonzero

+error code, the possibilities for which are defined in @file{regex.h}.

+@xref{POSIX Regular Expression Compiling}, and @ref{POSIX Matching}, for

+what these codes mean. To get an error string corresponding to these

+codes, you can use:

+@findex regerror

+@example

+size_t

+regerror (int @var{errcode},

+ const regex_t *@var{preg},

+ char *@var{errbuf},

+ size_t @var{errbuf_size})

+@end example

+@noindent

+@var{errcode} is an error code, @var{preg} is the address of the pattern

+buffer which provoked the error, @var{errbuf} is the error buffer, and

+@var{errbuf_size} is @var{errbuf}'s size.

+@code{regerror} returns the size in bytes of the error string

+corresponding to @var{errcode} (including its terminating null). If

+@var{errbuf} and @var{errbuf_size} are nonzero, it also returns in

+@var{errbuf} the first @math{@var{errbuf_size} - 1} characters of the

+error string, followed by a null.

+@var{errbuf_size} must be a nonnegative number less than or equal to the

+size in bytes of @var{errbuf}.

+You can call @code{regerror} with a null @var{errbuf} and a zero

+@var{errbuf_size} to determine how large @var{errbuf} need be to

+accommodate @code{regerror}'s error string.

+@node Using Byte Offsets, Freeing POSIX Pattern Buffers, Reporting Errors, POSIX Regex Functions

+@subsection Using Byte Offsets

+In @sc{posix}, variables of type @code{regmatch_t} hold analogous

+information, but are not identical to, @sc{gnu}'s registers (@pxref{Using

+Registers}). To get information about registers in @sc{posix}, pass to

+@code{regexec} a nonzero @var{pmatch} of type @code{regmatch_t}, i.e.,

+the address of a structure of this type, defined in

+@file{regex.h}:

+@tindex regmatch_t

+@example

+typedef struct

+@{

+ regoff_t rm_so;

+ regoff_t rm_eo;

+@} regmatch_t;

+@end example

+When reading in @ref{Using Registers}, about how the matching function

+stores the information into the registers, substitute @var{pmatch} for

+@var{regs}, @code{@w{@var{pmatch}[@var{i}]->}rm_so} for

+@code{@w{@var{regs}->}start[@var{i}]} and

+@code{@w{@var{pmatch}[@var{i}]->}rm_eo} for

+@code{@w{@var{regs}->}end[@var{i}]}.

+@node Freeing POSIX Pattern Buffers, , Using Byte Offsets, POSIX Regex Functions

+@subsection Freeing POSIX Pattern Buffers

+To free any allocated fields of a pattern buffer, use:

+@findex regfree

+@example

+void

+regfree (regex_t *@var{preg})

+@end example

+@noindent

+@var{preg} is the pattern buffer whose allocated fields you want freed.

+@code{regfree} also sets @var{preg}'s @code{allocated} and @code{used}

+fields to zero. After freeing a pattern buffer, you need to again

+compile a regular expression in it (@pxref{POSIX Regular Expression

+Compiling}) before passing it to the matching function (@pxref{POSIX

+Matching}).

+@node BSD Regex Functions, , POSIX Regex Functions, Programming with Regex

+@section BSD Regex Functions

+If you're writing code that has to be Berkeley @sc{unix} compatible,

+you'll need to use these functions whose interfaces are the same as those

+in Berkeley @sc{unix}.

+@menu

+* BSD Regular Expression Compiling:: re_comp ()

+* BSD Searching:: re_exec ()

+@end menu

+@node BSD Regular Expression Compiling, BSD Searching, , BSD Regex Functions

+@subsection BSD Regular Expression Compiling

+With Berkeley @sc{unix}, you can only search for a given regular

+expression; you can't match one. To search for it, you must first

+compile it. Before you compile it, you must indicate the regular

+expression syntax you want it compiled according to by setting the

+variable @code{re_syntax_options} (declared in @file{regex.h} to some

+syntax (@pxref{Regular Expression Syntax}).

+To compile a regular expression use:

+@findex re_comp

+@example

+char *

+re_comp (char *@var{regex})

+@end example

+@noindent

+@var{regex} is the address of a null-terminated regular expression.

+@code{re_comp} uses an internal pattern buffer, so you can use only the

+most recently compiled pattern buffer. This means that if you want to

+use a given regular expression that you've already compiled---but it

+isn't the latest one you've compiled---you'll have to recompile it. If

+you call @code{re_comp} with the null string (@emph{not} the empty

+string) as the argument, it doesn't change the contents of the pattern

+buffer.

+If @code{re_comp} successfully compiles the regular expression, it

+returns zero. If it can't compile the regular expression, it returns

+an error string. @code{re_comp}'s error messages are identical to those

+of @code{re_compile_pattern} (@pxref{GNU Regular Expression

+Compiling}).

+@node BSD Searching, , BSD Regular Expression Compiling, BSD Regex Functions

+@subsection BSD Searching

+Searching the Berkeley @sc{unix} way means searching in a string

+starting at its first character and trying successive positions within

+it to find a match. Once you've compiled a pattern using @code{re_comp}

+(@pxref{BSD Regular Expression Compiling}), you can ask Regex

+to search for that pattern in a string using:

+@findex re_exec

+@example

+int

+re_exec (char *@var{string})

+@end example

+@noindent

+@var{string} is the address of the null-terminated string in which you

+want to search.

+@code{re_exec} returns either 1 for success or 0 for failure. It

+automatically uses a @sc{gnu} fastmap (@pxref{Searching with Fastmaps}).

+@node Copying, Index, Programming with Regex, Top

+@appendix GNU GENERAL PUBLIC LICENSE

+@center Version 2, June 1991

+@display

+Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.

+675 Mass Ave, Cambridge, MA 02139, USA

+Everyone is permitted to copy and distribute verbatim copies

+of this license document, but changing it is not allowed.

+@end display

+@unnumberedsec Preamble

+ The licenses for most software are designed to take away your

+freedom to share and change it. By contrast, the GNU General Public

+License is intended to guarantee your freedom to share and change free

+software---to make sure the software is free for all its users. This

+General Public License applies to most of the Free Software

+Foundation's software and to any other program whose authors commit to

+using it. (Some other Free Software Foundation software is covered by

+the GNU Library General Public License instead.) You can apply it to

+your programs, too.

+ When we speak of free software, we are referring to freedom, not

+price. Our General Public Licenses are designed to make sure that you

+have the freedom to distribute copies of free software (and charge for

+this service if you wish), that you receive source code or can get it

+if you want it, that you can change the software or use pieces of it

+in new free programs; and that you know you can do these things.

+ To protect your rights, we need to make restrictions that forbid

+anyone to deny you these rights or to ask you to surrender the rights.

+These restrictions translate to certain responsibilities for you if you

+distribute copies of the software, or if you modify it.

+ For example, if you distribute copies of such a program, whether

+gratis or for a fee, you must give the recipients all the rights that

+you have. You must make sure that they, too, receive or can get the

+source code. And you must show them these terms so they know their

+rights.

+ We protect your rights with two steps: (1) copyright the software, and

+(2) offer you this license which gives you legal permission to copy,

+distribute and/or modify the software.

+ Also, for each author's protection and ours, we want to make certain

+that everyone understands that there is no warranty for this free

+software. If the software is modified by someone else and passed on, we

+want its recipients to know that what they have is not the original, so

+that any problems introduced by others will not reflect on the original

+authors' reputations.

+ Finally, any free program is threatened constantly by software

+patents. We wish to avoid the danger that redistributors of a free

+program will individually obtain patent licenses, in effect making the

+program proprietary. To prevent this, we have made it clear that any

+patent must be licensed for everyone's free use or not licensed at all.

+ The precise terms and conditions for copying, distribution and

+modification follow.

+@iftex

+@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION

+@end iftex

+@ifinfo

+@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION

+@end ifinfo

+@enumerate

+@item

+This License applies to any program or other work which contains

+a notice placed by the copyright holder saying it may be distributed

+under the terms of this General Public License. The ``Program'', below,

+refers to any such program or work, and a ``work based on the Program''

+means either the Program or any derivative work under copyright law:

+that is to say, a work containing the Program or a portion of it,

+either verbatim or with modifications and/or translated into another

+language. (Hereinafter, translation is included without limitation in

+the term ``modification''.) Each licensee is addressed as ``you''.

+Activities other than copying, distribution and modification are not

+covered by this License; they are outside its scope. The act of

+running the Program is not restricted, and the output from the Program

+is covered only if its contents constitute a work based on the

+Program (independent of having been made by running the Program).

+Whether that is true depends on what the Program does.

+@item

+You may copy and distribute verbatim copies of the Program's

+source code as you receive it, in any medium, provided that you

+conspicuously and appropriately publish on each copy an appropriate

+copyright notice and disclaimer of warranty; keep intact all the

+notices that refer to this License and to the absence of any warranty;

+and give any other recipients of the Program a copy of this License

+along with the Program.

+You may charge a fee for the physical act of transferring a copy, and

+you may at your option offer warranty protection in exchange for a fee.

+@item

+You may modify your copy or copies of the Program or any portion

+of it, thus forming a work based on the Program, and copy and

+distribute such modifications or work under the terms of Section 1

+above, provided that you also meet all of these conditions:

+@enumerate a

+@item

+You must cause the modified files to carry prominent notices

+stating that you changed the files and the date of any change.

+@item

+You must cause any work that you distribute or publish, that in

+whole or in part contains or is derived from the Program or any

+part thereof, to be licensed as a whole at no charge to all third

+parties under the terms of this License.

+@item

+If the modified program normally reads commands interactively

+when run, you must cause it, when started running for such

+interactive use in the most ordinary way, to print or display an

+announcement including an appropriate copyright notice and a

+notice that there is no warranty (or else, saying that you provide

+a warranty) and that users may redistribute the program under

+these conditions, and telling the user how to view a copy of this

+License. (Exception: if the Program itself is interactive but

+does not normally print such an announcement, your work based on

+the Program is not required to print an announcement.)

+@end enumerate

+These requirements apply to the modified work as a whole. If

+identifiable sections of that work are not derived from the Program,

+and can be reasonably considered independent and separate works in

+themselves, then this License, and its terms, do not apply to those

+sections when you distribute them as separate works. But when you

+distribute the same sections as part of a whole which is a work based

+on the Program, the distribution of the whole must be on the terms of

+this License, whose permissions for other licensees extend to the

+entire whole, and thus to each and every part regardless of who wrote it.

+Thus, it is not the intent of this section to claim rights or contest

+your rights to work written entirely by you; rather, the intent is to

+exercise the right to control the distribution of derivative or

+collective works based on the Program.

+In addition, mere aggregation of another work not based on the Program

+with the Program (or with a work based on the Program) on a volume of

+a storage or distribution medium does not bring the other work under

+the scope of this License.

+@item

+You may copy and distribute the Program (or a work based on it,

+under Section 2) in object code or executable form under the terms of

+Sections 1 and 2 above provided that you also do one of the following:

+@enumerate a

+@item

+Accompany it with the complete corresponding machine-readable

+source code, which must be distributed under the terms of Sections

+1 and 2 above on a medium customarily used for software interchange; or,

+@item

+Accompany it with a written offer, valid for at least three

+years, to give any third party, for a charge no more than your

+cost of physically performing source distribution, a complete

+machine-readable copy of the corresponding source code, to be

+distributed under the terms of Sections 1 and 2 above on a medium

+customarily used for software interchange; or,

+@item

+Accompany it with the information you received as to the offer

+to distribute corresponding source code. (This alternative is

+allowed only for noncommercial distribution and only if you

+received the program in object code or executable form with such

+an offer, in accord with Subsection b above.)

+@end enumerate

+The source code for a work means the preferred form of the work for

+making modifications to it. For an executable work, complete source

+code means all the source code for all modules it contains, plus any

+associated interface definition files, plus the scripts used to

+control compilation and installation of the executable. However, as a

+special exception, the source code distributed need not include

+anything that is normally distributed (in either source or binary

+form) with the major components (compiler, kernel, and so on) of the

+operating system on which the executable runs, unless that component

+itself accompanies the executable.

+If distribution of executable or object code is made by offering

+access to copy from a designated place, then offering equivalent

+access to copy the source code from the same place counts as

+distribution of the source code, even though third parties are not

+compelled to copy the source along with the object code.

+@item

+You may not copy, modify, sublicense, or distribute the Program

+except as expressly provided under this License. Any attempt

+otherwise to copy, modify, sublicense or distribute the Program is

+void, and will automatically terminate your rights under this License.

+However, parties who have received copies, or rights, from you under

+this License will not have their licenses terminated so long as such

+parties remain in full compliance.

+@item

+You are not required to accept this License, since you have not

+signed it. However, nothing else grants you permission to modify or

+distribute the Program or its derivative works. These actions are

+prohibited by law if you do not accept this License. Therefore, by

+modifying or distributing the Program (or any work based on the

+Program), you indicate your acceptance of this License to do so, and

+all its terms and conditions for copying, distributing or modifying

+the Program or works based on it.

+@item

+Each time you redistribute the Program (or any work based on the

+Program), the recipient automatically receives a license from the

+original licensor to copy, distribute or modify the Program subject to

+these terms and conditions. You may not impose any further

+restrictions on the recipients' exercise of the rights granted herein.

+You are not responsible for enforcing compliance by third parties to

+this License.

+@item

+If, as a consequence of a court judgment or allegation of patent

+infringement or for any other reason (not limited to patent issues),

+conditions are imposed on you (whether by court order, agreement or

+otherwise) that contradict the conditions of this License, they do not

+excuse you from the conditions of this License. If you cannot

+distribute so as to satisfy simultaneously your obligations under this

+License and any other pertinent obligations, then as a consequence you

+may not distribute the Program at all. For example, if a patent

+license would not permit royalty-free redistribution of the Program by

+all those who receive copies directly or indirectly through you, then

+the only way you could satisfy both it and this License would be to

+refrain entirely from distribution of the Program.

+If any portion of this section is held invalid or unenforceable under

+any particular circumstance, the balance of the section is intended to

+apply and the section as a whole is intended to apply in other

+circumstances.

+It is not the purpose of this section to induce you to infringe any

+patents or other property right claims or to contest validity of any

+such claims; this section has the sole purpose of protecting the

+integrity of the free software distribution system, which is

+implemented by public license practices. Many people have made

+generous contributions to the wide range of software distributed

+through that system in reliance on consistent application of that

+system; it is up to the author/donor to decide if he or she is willing

+to distribute software through any other system and a licensee cannot

+impose that choice.

+This section is intended to make thoroughly clear what is believed to

+be a consequence of the rest of this License.

+@item

+If the distribution and/or use of the Program is restricted in

+certain countries either by patents or by copyrighted interfaces, the

+original copyright holder who places the Program under this License

+may add an explicit geographical distribution limitation excluding

+those countries, so that distribution is permitted only in or among

+countries not thus excluded. In such case, this License incorporates

+the limitation as if written in the body of this License.

+@item

+The Free Software Foundation may publish revised and/or new versions

+of the General Public License from time to time. Such new versions will

+be similar in spirit to the present version, but may differ in detail to

+address new problems or concerns.

+Each version is given a distinguishing version number. If the Program

+specifies a version number of this License which applies to it and ``any

+later version'', you have the option of following the terms and conditions

+either of that version or of any later version published by the Free

+Software Foundation. If the Program does not specify a version number of

+this License, you may choose any version ever published by the Free Software

+Foundation.

+@item

+If you wish to incorporate parts of the Program into other free

+programs whose distribution conditions are different, write to the author

+to ask for permission. For software which is copyrighted by the Free

+Software Foundation, write to the Free Software Foundation; we sometimes

+make exceptions for this. Our decision will be guided by the two goals

+of preserving the free status of all derivatives of our free software and

+of promoting the sharing and reuse of software generally.

+@iftex

+@heading NO WARRANTY

+@end iftex

+@ifinfo

+@center NO WARRANTY

+@end ifinfo

+@item

+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY

+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN

+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES

+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED

+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF

+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS

+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE

+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,

+REPAIR OR CORRECTION.

+@item

+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING

+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR

+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,

+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING

+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED

+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY

+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER

+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGES.

+@end enumerate

+@iftex

+@heading END OF TERMS AND CONDITIONS

+@end iftex

+@ifinfo

+@center END OF TERMS AND CONDITIONS

+@end ifinfo

+@page

+@unnumberedsec Appendix: How to Apply These Terms to Your New Programs

+ If you develop a new program, and you want it to be of the greatest

+possible use to the public, the best way to achieve this is to make it

+free software which everyone can redistribute and change under these terms.

+ To do so, attach the following notices to the program. It is safest

+to attach them to the start of each source file to most effectively

+convey the exclusion of warranty; and each file should have at least

+the ``copyright'' line and a pointer to where the full notice is found.

+@smallexample

+@var{one line to give the program's name and a brief idea of what it does.}

+Copyright (C) 19@var{yy} @var{name of author}

+This program is free software; you can redistribute it and/or modify

+it under the terms of the GNU General Public License as published by

+the Free Software Foundation; either version 2 of the License, or

+(at your option) any later version.

+This program is distributed in the hope that it will be useful,

+but WITHOUT ANY WARRANTY; without even the implied warranty of

+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+GNU General Public License for more details.

+You should have received a copy of the GNU General Public License

+along with this program; if not, write to the Free Software

+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

+@end smallexample

+Also add information on how to contact you by electronic and paper mail.

+If the program is interactive, make it output a short notice like this

+when it starts in an interactive mode:

+@smallexample

+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}

+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.

+This is free software, and you are welcome to redistribute it

+under certain conditions; type `show c' for details.

+@end smallexample

+The hypothetical commands @samp{show w} and @samp{show c} should show

+the appropriate parts of the General Public License. Of course, the

+commands you use may be called something other than @samp{show w} and

+@samp{show c}; they could even be mouse-clicks or menu items---whatever

+suits your program.

+You should also get your employer (if you work as a programmer) or your

+school, if any, to sign a ``copyright disclaimer'' for the program, if

+necessary. Here is a sample; alter the names:

+@example

+Yoyodyne, Inc., hereby disclaims all copyright interest in the program

+`Gnomovision' (which makes passes at compilers) written by James Hacker.

+@var{signature of Ty Coon}, 1 April 1989

+Ty Coon, President of Vice

+@end example

+This General Public License does not permit incorporating your program into

+proprietary programs. If your program is a subroutine library, you may

+consider it more useful to permit linking proprietary applications with the

+library. If this is what you want to do, use the GNU Library General

+Public License instead of this License.

+@node Index, , Copying, Top

+@unnumbered Index

+@printindex cp

+@contents

+@bye

diff --git a/gnu/lib/libregex/doc/xregex.texi b/gnu/lib/libregex/doc/xregex.texi
new file mode 100644
index 000000000000..9292b356ef75
--- /dev/null
+++ b/gnu/lib/libregex/doc/xregex.texi

@@ -0,0 +1,3021 @@