diff options
Diffstat (limited to 'gnu/libregex')
53 files changed, 0 insertions, 32109 deletions
diff --git a/gnu/libregex/AUTHORS b/gnu/libregex/AUTHORS deleted file mode 100644 index 058be996a272..000000000000 --- a/gnu/libregex/AUTHORS +++ /dev/null @@ -1,10 +0,0 @@ -Richard Stallman -- original version and continuing revisions of - regex.c and regex.h, and original version of the documentation. - -Karl Berry and Kathryn Hargreaves -- extensive modifications to above, - and all test files. - -Jim Blandy -- original version of re_set_registers, revisions to regex.c. - -Joe Arceneaux, David MacKenzie, Mike Haertel, Charles Hannum, and -probably others -- revisions to regex.c. diff --git a/gnu/libregex/COPYING b/gnu/libregex/COPYING deleted file mode 100644 index a43ea2126fb6..000000000000 --- a/gnu/libregex/COPYING +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - Appendix: How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) 19yy <name of author> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19yy name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - <signature of Ty Coon>, 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/gnu/libregex/ChangeLog b/gnu/libregex/ChangeLog deleted file mode 100644 index ef919d276a76..000000000000 --- a/gnu/libregex/ChangeLog +++ /dev/null @@ -1,3030 +0,0 @@ -Fri Apr 2 17:31:59 1993 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * Released version 0.12. - - * regex.c (regerror): If errcode is zero, that's not a valid - error code, according to POSIX, but return "Success." - - * regex.c (regerror): Remember to actually fetch the message - from re_error_msg. - - * regex.c (regex_compile): Don't use the trick for ".*\n" on - ".+\n". Since the latter involves laying an extra choice - point, the backward jump isn't adjusted properly. - -Thu Mar 25 21:35:18 1993 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * regex.c (regex_compile): In the handle_open and handle_close - sections, clear pending_exact to zero. - -Tue Mar 9 12:03:07 1993 Jim Blandy (jimb@wookumz.gnu.ai.mit.edu) - - * regex.c (re_search_2): In the loop which searches forward - using fastmap, don't forget to cast the character from the - string to an unsigned before using it as an index into the - translate map. - -Thu Jan 14 15:41:46 1993 David J. MacKenzie (djm@kropotkin.gnu.ai.mit.edu) - - * regex.h: Never define const; let the callers do it. - configure.in: Don't define USING_AUTOCONF. - -Wed Jan 6 20:49:29 1993 Jim Blandy (jimb@geech.gnu.ai.mit.edu) - - * regex.c (regerror): Abort if ERRCODE is out of range. - -Sun Dec 20 16:19:10 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * configure.in: Arrange to #define USING_AUTOCONF. - * regex.h: If USING_AUTOCONF is #defined, don't mess with - `const' at all; autoconf has taken care of it. - -Mon Dec 14 21:40:39 1992 David J. MacKenzie (djm@kropotkin.gnu.ai.mit.edu) - - * regex.h (RE_SYNTAX_AWK): Fix typo. From Arnold Robbins. - -Sun Dec 13 20:35:39 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * regex.c (compile_range): Fetch the range start and end by - casting the pattern pointer to an `unsigned char *' before - fetching through it. - -Sat Dec 12 09:41:01 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * regex.c: Undo change of 12/7/92; it's better for Emacs to - #define HAVE_CONFIG_H. - -Fri Dec 11 22:00:34 1992 Jim Meyering (meyering@hal.gnu.ai.mit.edu) - - * regex.c: Define and use isascii-protected ctype.h macros. - -Fri Dec 11 05:10:38 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * regex.c (re_match_2): Undo Karl's November 10th change; it - keeps the group in :\(.*\) from matching :/ properly. - -Mon Dec 7 19:44:56 1992 Jim Blandy (jimb@wookumz.gnu.ai.mit.edu) - - * regex.c: #include config.h if either HAVE_CONFIG_H or emacs - is #defined. - -Tue Dec 1 13:33:17 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) - - * regex.c [HAVE_CONFIG_H]: Include config.h. - -Wed Nov 25 23:46:02 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) - - * regex.c (regcomp): Add parens around bitwise & for clarity. - Initialize preg->allocated to prevent segv. - -Tue Nov 24 09:22:29 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu) - - * regex.c: Use HAVE_STRING_H, not USG. - * configure.in: Check for string.h, not USG. - -Fri Nov 20 06:33:24 1992 Karl Berry (karl@cs.umb.edu) - - * regex.c (SIGN_EXTEND_CHAR) [VMS]: Back out of this change, - since Roland Roberts now says it was a localism. - -Mon Nov 16 07:01:36 1992 Karl Berry (karl@cs.umb.edu) - - * regex.h (const) [!HAVE_CONST]: Test another cpp symbol (from - Autoconf) before zapping const. - -Sun Nov 15 05:36:42 1992 Jim Blandy (jimb@wookumz.gnu.ai.mit.edu) - - * regex.c, regex.h: Changes for VMS from Roland B Roberts - <roberts@nsrl31.nsrl.rochester.edu>. - -Thu Nov 12 11:31:15 1992 Karl Berry (karl@cs.umb.edu) - - * Makefile.in (distfiles): Include INSTALL. - -Tue Nov 10 09:29:23 1992 Karl Berry (karl@cs.umb.edu) - - * regex.c (re_match_2): At maybe_pop_jump, if at end of string - and pattern, just quit the matching loop. - - * regex.c (LETTER_P): Rename to `WORDCHAR_P'. - - * regex.c (AT_STRINGS_{BEG,END}): Take `d' as an arg; change - callers. - - * regex.c (re_match_2) [!emacs]: In wordchar and notwordchar - cases, advance d. - -Wed Nov 4 15:43:58 1992 Karl Berry (karl@hal.gnu.ai.mit.edu) - - * regex.h (const) [!__STDC__]: Don't define if it's already defined. - -Sat Oct 17 19:28:19 1992 Karl Berry (karl@cs.umb.edu) - - * regex.c (bcmp, bcopy, bzero): Only #define if they are not - already #defined. - - * configure.in: Use AC_CONST. - -Thu Oct 15 08:39:06 1992 Karl Berry (karl@cs.umb.edu) - - * regex.h (const) [!const]: Conditionalize. - -Fri Oct 2 13:31:42 1992 Karl Berry (karl@cs.umb.edu) - - * regex.h (RE_SYNTAX_ED): New definition. - -Sun Sep 20 12:53:39 1992 Karl Berry (karl@cs.umb.edu) - - * regex.[ch]: remove traces of `longest_p' -- dumb idea to put - this into the pattern buffer, as it means parallelism loses. - - * Makefile.in (config.status): use sh to run configure --no-create. - - * Makefile.in (realclean): OK, don't remove configure. - -Sat Sep 19 09:05:08 1992 Karl Berry (karl@hayley) - - * regex.c (PUSH_FAILURE_POINT, POP_FAILURE_POINT) [DEBUG]: keep - track of how many failure points we push and pop. - (re_match_2) [DEBUG]: declare variables for that, and print results. - (DEBUG_PRINT4): new macro. - - * regex.h (re_pattern_buffer): new field `longest_p' (to - eliminate backtracking if the user doesn't need it). - * regex.c (re_compile_pattern): initialize it (to 1). - (re_search_2): set it to zero if register information is not needed. - (re_match_2): if it's set, don't backtrack. - - * regex.c (re_search_2): update fastmap only after checking that - the pattern is anchored. - - * regex.c (re_match_2): do more debugging at maybe_pop_jump. - - * regex.c (re_search_2): cast result of TRANSLATE for use in - array subscript. - -Thu Sep 17 19:47:16 1992 Karl Berry (karl@geech.gnu.ai.mit.edu) - - * Version 0.11. - -Wed Sep 16 08:17:10 1992 Karl Berry (karl@hayley) - - * regex.c (INIT_FAIL_STACK): rewrite as statements instead of a - complicated comma expr, to avoid compiler warnings (and also - simplify). - (re_compile_fastmap, re_match_2): change callers. - - * regex.c (POP_FAILURE_POINT): cast pop of regstart and regend - to avoid compiler warnings. - - * regex.h (RE_NEWLINE_ORDINARY): remove this syntax bit, and - remove uses. - * regex.c (at_{beg,end}line_loc_p): go the last mile: remove - the RE_NEWLINE_ORDINARY case which made the ^ in \n^ be an anchor. - -Tue Sep 15 09:55:29 1992 Karl Berry (karl@hayley) - - * regex.c (at_begline_loc_p): new fn. - (at_endline_loc_p): simplify at_endline_op_p. - (regex_compile): in ^/$ cases, call the above. - - * regex.c (POP_FAILURE_POINT): rewrite the fn as a macro again, - as lord's profiling indicates the function is 20% of the time. - (re_match_2): callers changed. - - * configure.in (AC_MEMORY_H): remove, since we never use memcpy et al. - -Mon Sep 14 17:49:27 1992 Karl Berry (karl@hayley) - - * Makefile.in (makeargs): include MFLAGS. - -Sun Sep 13 07:41:45 1992 Karl Berry (karl@hayley) - - * regex.c (regex_compile): in \1..\9 case, make it always - invalid to use \<digit> if there is no preceding <digit>th subexpr. - * regex.h (RE_NO_MISSING_BK_REF): remove this syntax bit. - - * regex.c (regex_compile): remove support for invalid empty groups. - * regex.h (RE_NO_EMPTY_GROUPS): remove this syntax bit. - - * regex.c (FREE_VARIABLES) [!REGEX_MALLOC]: define as alloca (0), - to reclaim memory. - - * regex.h (RE_SYNTAX_POSIX_SED): don't bother with this. - -Sat Sep 12 13:37:21 1992 Karl Berry (karl@hayley) - - * README: incorporate emacs.diff. - - * regex.h (_RE_ARGS) [!__STDC__]: define as empty parens. - - * configure.in: add AC_ALLOCA. - - * Put test files in subdir test, documentation in subdir doc. - Adjust Makefile.in and configure.in accordingly. - -Thu Sep 10 10:29:11 1992 Karl Berry (karl@hayley) - - * regex.h (RE_SYNTAX_{POSIX_,}SED): new definitions. - -Wed Sep 9 06:27:09 1992 Karl Berry (karl@hayley) - - * Version 0.10. - -Tue Sep 8 07:32:30 1992 Karl Berry (karl@hayley) - - * xregex.texinfo: put the day of month into the date. - - * Makefile.in (realclean): remove Texinfo-generated files. - (distclean): remove empty sorted index files. - (clean): remove dvi files, etc. - - * configure.in: test for more Unix variants. - - * fileregex.c: new file. - Makefile.in (fileregex): new target. - - * iregex.c (main): move variable decls to smallest scope. - - * regex.c (FREE_VARIABLES): free reg_{,info_}dummy. - (re_match_2): check that the allocation for those two succeeded. - - * regex.c (FREE_VAR): replace FREE_NONNULL with this. - (FREE_VARIABLES): call it. - (re_match_2) [REGEX_MALLOC]: initialize all our vars to NULL. - - * tregress.c (do_match): generalize simple_match. - (SIMPLE_NONMATCH): new macro. - (SIMPLE_MATCH): change from routine. - - * Makefile.in (regex.texinfo): make file readonly, so we don't - edit it by mistake. - - * many files (re_default_syntax): rename to `re_syntax_options'; - call re_set_syntax instead of assigning to the variable where - possible. - -Mon Sep 7 10:12:16 1992 Karl Berry (karl@hayley) - - * syntax.skel: don't use prototypes. - - * {configure,Makefile}.in: new files. - - * regex.c: include <string.h> `#if USG || STDC_HEADERS'; remove - obsolete test for `POSIX', and test for BSRTING. - Include <strings.h> if we are not USG or STDC_HEADERS. - Do not include <unistd.h>. What did we ever need that for? - - * regex.h (RE_NO_EMPTY_ALTS): remove this. - (RE_SYNTAX_AWK): remove from here, too. - * regex.c (regex_compile): remove the check. - * xregex.texinfo (Alternation Operator): update. - * other.c (test_others): remove tests for this. - - * regex.h (RE_DUP_MAX): undefine if already defined. - - * regex.h: (RE_SYNTAX_POSIX*): redo to allow more operators, and - define new syntaxes with the minimal set. - - * syntax.skel (main): used sscanf instead of scanf. - - * regex.h (RE_SYNTAX_*GREP): new definitions from mike. - - * regex.c (regex_compile): initialize the upper bound of - intervals at the beginning of the interval, not the end. - (From pclink@qld.tne.oz.au.) - - * regex.c (handle_bar): rename to `handle_alt', for consistency. - - * regex.c ({store,insert}_{op1,op2}): new routines (except the last). - ({STORE,INSERT}_JUMP{,2}): macros to replace the old routines, - which took arguments in different orders, and were generally weird. - - * regex.c (PAT_PUSH*): rename to `BUF_PUSH*' -- we're not - appending info to the pattern! - -Sun Sep 6 11:26:49 1992 Karl Berry (karl@hayley) - - * regex.c (regex_compile): delete the variable - `following_left_brace', since we never use it. - - * regex.c (print_compiled_pattern): don't print the fastmap if - it's null. - - * regex.c (re_compile_fastmap): handle - `on_failure_keep_string_jump' like `on_failure_jump'. - - * regex.c (re_match_2): in `charset{,_not' case, cast the bit - count to unsigned, not unsigned char, in case we have a full - 32-byte bit list. - - * tregress.c (simple_match): remove. - (simple_test): rename as `simple_match'. - (simple_compile): print the error string if the compile failed. - - * regex.c (DO_RANGE): rewrite as a function, `compile_range', so - we can debug it. Change pattern characters to unsigned char - *'s, and change the range variable to an unsigned. - (regex_compile): change calls. - -Sat Sep 5 17:40:49 1992 Karl Berry (karl@hayley) - - * regex.h (_RE_ARGS): new macro to put in argument lists (if - ANSI) or omit them (if K&R); don't declare routines twice. - - * many files (obscure_syntax): rename to `re_default_syntax'. - -Fri Sep 4 09:06:53 1992 Karl Berry (karl@hayley) - - * GNUmakefile (extraclean): new target. - (realclean): delete the info files. - -Wed Sep 2 08:14:42 1992 Karl Berry (karl@hayley) - - * regex.h: doc fix. - -Sun Aug 23 06:53:15 1992 Karl Berry (karl@hayley) - - * regex.[ch] (re_comp): no const in the return type (from djm). - -Fri Aug 14 07:25:46 1992 Karl Berry (karl@hayley) - - * regex.c (DO_RANGE): declare variables as unsigned chars, not - signed chars (from jimb). - -Wed Jul 29 18:33:53 1992 Karl Berry (karl@claude.cs.umb.edu) - - * Version 0.9. - - * GNUmakefile (distclean): do not remove regex.texinfo. - (realclean): remove it here. - - * tregress.c (simple_test): initialize buf.buffer. - -Sun Jul 26 08:59:38 1992 Karl Berry (karl@hayley) - - * regex.c (push_dummy_failure): new opcode and corresponding - case in the various routines. Pushed at the end of - alternatives. - - * regex.c (jump_past_next_alt): rename to `jump_past_alt', for - brevity. - (no_pop_jump): rename to `jump'. - - * regex.c (regex_compile) [DEBUG]: terminate printing of pattern - with a newline. - - * NEWS: new file. - - * tregress.c (simple_{compile,match,test}): routines to simplify all - these little tests. - - * tregress.c: test for matching as much as possible. - -Fri Jul 10 06:53:32 1992 Karl Berry (karl@hayley) - - * Version 0.8. - -Wed Jul 8 06:39:31 1992 Karl Berry (karl@hayley) - - * regex.c (SIGN_EXTEND_CHAR): #undef any previous definition, as - ours should always work properly. - -Mon Jul 6 07:10:50 1992 Karl Berry (karl@hayley) - - * iregex.c (main) [DEBUG]: conditionalize the call to - print_compiled_pattern. - - * iregex.c (main): initialize buf.buffer to NULL. - * tregress (test_regress): likewise. - - * regex.c (alloca) [sparc]: #if on HAVE_ALLOCA_H instead. - - * tregress.c (test_regress): didn't have jla's test quite right. - -Sat Jul 4 09:02:12 1992 Karl Berry (karl@hayley) - - * regex.c (re_match_2): only REGEX_ALLOCATE all the register - vectors if the pattern actually has registers. - (match_end): new variable to avoid having to use best_regend[0]. - - * regex.c (IS_IN_FIRST_STRING): rename to FIRST_STRING_P. - - * regex.c: doc fixes. - - * tregess.c (test_regress): new fastmap test forwarded by rms. - - * tregress.c (test_regress): initialize the fastmap field. - - * tregress.c (test_regress): new test from jla that aborted - in re_search_2. - -Fri Jul 3 09:10:05 1992 Karl Berry (karl@hayley) - - * tregress.c (test_regress): add tests for translating charsets, - from kaoru. - - * GNUmakefile (common): add alloca.o. - * alloca.c: new file, copied from bison. - - * other.c (test_others): remove var `buf', since it's no longer used. - - * Below changes from ro@TechFak.Uni-Bielefeld.DE. - - * tregress.c (test_regress): initialize buf.allocated. - - * regex.c (re_compile_fastmap): initialize `succeed_n_p'. - - * GNUmakefile (regex): depend on $(common). - -Wed Jul 1 07:12:46 1992 Karl Berry (karl@hayley) - - * Version 0.7. - - * regex.c: doc fixes. - -Mon Jun 29 08:09:47 1992 Karl Berry (karl@fosse) - - * regex.c (pop_failure_point): change string vars to - `const char *' from `unsigned char *'. - - * regex.c: consolidate debugging stuff. - (print_partial_compiled_pattern): avoid enum clash. - -Mon Jun 29 07:50:27 1992 Karl Berry (karl@hayley) - - * xmalloc.c: new file. - * GNUmakefile (common): add it. - - * iregex.c (print_regs): new routine (from jimb). - (main): call it. - -Sat Jun 27 10:50:59 1992 Jim Blandy (jimb@pogo.cs.oberlin.edu) - - * xregex.c (re_match_2): When we have accepted a match and - restored d from best_regend[0], we need to set dend - appropriately as well. - -Sun Jun 28 08:48:41 1992 Karl Berry (karl@hayley) - - * tregress.c: rename from regress.c. - - * regex.c (print_compiled_pattern): improve charset case to ease - byte-counting. - Also, don't distinguish between Emacs and non-Emacs - {not,}wordchar opcodes. - - * regex.c (print_fastmap): move here. - * test.c: from here. - * regex.c (print_{{partial,}compiled_pattern,double_string}): - rename from ..._printer. Change calls here and in test.c. - - * regex.c: create from xregex.c and regexinc.c for once and for - all, and change the debug fns to be extern, instead of static. - * GNUmakefile: remove traces of xregex.c. - * test.c: put in externs, instead of including regexinc.c. - - * xregex.c: move interactive main program and scanstring to iregex.c. - * iregex.c: new file. - * upcase.c, printchar.c: new files. - - * various doc fixes and other cosmetic changes throughout. - - * regexinc.c (compiled_pattern_printer): change variable name, - for consistency. - (partial_compiled_pattern_printer): print other info about the - compiled pattern, besides just the opcodes. - * xregex.c (regex_compile) [DEBUG]: print the compiled pattern - when we're done. - - * xregex.c (re_compile_fastmap): in the duplicate case, set - `can_be_null' and return. - Also, set `bufp->can_be_null' according to a new variable, - `path_can_be_null'. - Also, rewrite main while loop to not test `p != NULL', since - we never set it that way. - Also, eliminate special `can_be_null' value for the endline case. - (re_search_2): don't test for the special value. - * regex.h (struct re_pattern_buffer): remove the definition. - -Sat Jun 27 15:00:40 1992 Karl Berry (karl@hayley) - - * xregex.c (re_compile_fastmap): remove the `RE_' from - `REG_RE_MATCH_NULL_AT_END'. - Also, assert the fastmap in the pattern buffer is non-null. - Also, reset `succeed_n_p' after we've - paid attention to it, instead of every time through the loop. - Also, in the `anychar' case, only clear fastmap['\n'] if the - syntax says to, and don't return prematurely. - Also, rearrange cases in some semblance of a rational order. - * regex.h (REG_RE_MATCH_NULL_AT_END): remove the `RE_' from the name. - - * other.c: take bug reports from here. - * regress.c: new file for them. - * GNUmakefile (test): add it. - * main.c (main): new possible test. - * test.h (test_type): new value in enum. - -Thu Jun 25 17:37:43 1992 Karl Berry (karl@hayley) - - * xregex.c (scanstring) [test]: new function from jimb to allow some - escapes. - (main) [test]: call it (on the string, not the pattern). - - * xregex.c (main): make return type `int'. - -Wed Jun 24 10:43:03 1992 Karl Berry (karl@hayley) - - * xregex.c (pattern_offset_t): change to `int', for the benefit - of patterns which compile to more than 2^15 bytes. - - * xregex.c (GET_BUFFER_SPACE): remove spurious braces. - - * xregex.texinfo (Using Registers): put in a stub to ``document'' - the new function. - * regex.h (re_set_registers) [!__STDC__]: declare. - * xregex.c (re_set_registers): declare K&R style (also move to a - different place in the file). - -Mon Jun 8 18:03:28 1992 Jim Blandy (jimb@pogo.cs.oberlin.edu) - - * regex.h (RE_NREGS): Doc fix. - - * xregex.c (re_set_registers): New function. - * regex.h (re_set_registers): Declaration for new function. - -Fri Jun 5 06:55:18 1992 Karl Berry (karl@hayley) - - * main.c (main): `return 0' instead of `exit (0)'. (From Paul Eggert) - - * regexinc.c (SIGN_EXTEND_CHAR): cast to unsigned char. - (extract_number, EXTRACT_NUMBER): don't bother to cast here. - -Tue Jun 2 07:37:53 1992 Karl Berry (karl@hayley) - - * Version 0.6. - - * Change copyrights to `1985, 89, ...'. - - * regex.h (REG_RE_MATCH_NULL_AT_END): new macro. - * xregex.c (re_compile_fastmap): initialize `can_be_null' to - `p==pend', instead of in the test at the top of the loop (as - it was, it was always being set). - Also, set `can_be_null'=1 if we would jump to the end of the - pattern in the `on_failure_jump' cases. - (re_search_2): check if `can_be_null' is 1, not nonzero. This - was the original test in rms' regex; why did we change this? - - * xregex.c (re_compile_fastmap): rename `is_a_succeed_n' to - `succeed_n_p'. - -Sat May 30 08:09:08 1992 Karl Berry (karl@hayley) - - * xregex.c (re_compile_pattern): declare `regnum' as `unsigned', - not `regnum_t', for the benefit of those patterns with more - than 255 groups. - - * xregex.c: rename `failure_stack' to `fail_stack', for brevity; - likewise for `match_nothing' to `match_null'. - - * regexinc.c (REGEX_REALLOCATE): take both the new and old - sizes, and copy only the old bytes. - * xregex.c (DOUBLE_FAILURE_STACK): pass both old and new. - * This change from Thorsten Ohl. - -Fri May 29 11:45:22 1992 Karl Berry (karl@hayley) - - * regexinc.c (SIGN_EXTEND_CHAR): define as `(signed char) c' - instead of relying on __CHAR_UNSIGNED__, to work with - compilers other than GCC. From Per Bothner. - - * main.c (main): change return type to `int'. - -Mon May 18 06:37:08 1992 Karl Berry (karl@hayley) - - * regex.h (RE_SYNTAX_AWK): typo in RE_RE_UNMATCHED... - -Fri May 15 10:44:46 1992 Karl Berry (karl@hayley) - - * Version 0.5. - -Sun May 3 13:54:00 1992 Karl Berry (karl@hayley) - - * regex.h (struct re_pattern_buffer): now it's just `regs_allocated'. - (REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED): new constants. - * xregex.c (regexec, re_compile_pattern): set the field appropriately. - (re_match_2): and use it. bufp can't be const any more. - -Fri May 1 15:43:09 1992 Karl Berry (karl@hayley) - - * regexinc.c: unconditionally include <sys/types.h>, first. - - * regex.h (struct re_pattern_buffer): rename - `caller_allocated_regs' to `regs_allocated_p'. - * xregex.c (re_compile_pattern): same change here. - (regexec): and here. - (re_match_2): reallocate registers if necessary. - -Fri Apr 10 07:46:50 1992 Karl Berry (karl@hayley) - - * regex.h (RE_SYNTAX{_POSIX,}_AWK): new definitions from Arnold. - -Sun Mar 15 07:34:30 1992 Karl Berry (karl at hayley) - - * GNUmakefile (dist): versionize regex.{c,h,texinfo}. - -Tue Mar 10 07:05:38 1992 Karl Berry (karl at hayley) - - * Version 0.4. - - * xregex.c (PUSH_FAILURE_POINT): always increment the failure id. - (DEBUG_STATEMENT) [DEBUG]: execute the statement even if `debug'==0. - - * xregex.c (pop_failure_point): if the saved string location is - null, keep the current value. - (re_match_2): at fail, test for a dummy failure point by - checking the restored pattern value, not string value. - (re_match_2): new case, `on_failure_keep_string_jump'. - (regex_compile): output this opcode in the .*\n case. - * regexinc.c (re_opcode_t): define the opcode. - (partial_compiled_pattern_pattern): add the new case. - -Mon Mar 9 09:09:27 1992 Karl Berry (karl at hayley) - - * xregex.c (regex_compile): optimize .*\n to output an - unconditional jump to the ., instead of pushing failure points - each time through the loop. - - * xregex.c (DOUBLE_FAILURE_STACK): compute the maximum size - ourselves (and correctly); change callers. - -Sun Mar 8 17:07:46 1992 Karl Berry (karl at hayley) - - * xregex.c (failure_stack_elt_t): change to `const char *', to - avoid warnings. - - * regex.h (re_set_syntax): declare this. - - * xregex.c (pop_failure_point) [DEBUG]: conditionally pass the - original strings and sizes; change callers. - -Thu Mar 5 16:35:35 1992 Karl Berry (karl at claude.cs.umb.edu) - - * xregex.c (regnum_t): new type for register/group numbers. - (compile_stack_elt_t, regex_compile): use it. - - * xregex.c (regexec): declare len as `int' to match re_search. - - * xregex.c (re_match_2): don't declare p1 twice. - - * xregex.c: change `while (1)' to `for (;;)' to avoid silly - compiler warnings. - - * regex.h [__STDC__]: use #if, not #ifdef. - - * regexinc.c (REGEX_REALLOCATE): cast the result of alloca to - (char *), to avoid warnings. - - * xregex.c (regerror): declare variable as const. - - * xregex.c (re_compile_pattern, re_comp): define as returning a const - char *. - * regex.h (re_compile_pattern, re_comp): likewise. - -Thu Mar 5 15:57:56 1992 Karl Berry (karl@hal) - - * xregex.c (regcomp): declare `syntax' as unsigned. - - * xregex.c (re_match_2): try to avoid compiler warnings about - unsigned comparisons. - - * GNUmakefile (test-xlc): new target. - - * regex.h (reg_errcode_t): remove trailing comma from definition. - * regexinc.c (re_opcode_t): likewise. - -Thu Mar 5 06:56:07 1992 Karl Berry (karl at hayley) - - * GNUmakefile (dist): add version numbers automatically. - (versionfiles): new variable. - (regex.{c,texinfo}): don't add version numbers here. - * regex.h: put in placeholder instead of the version number. - -Fri Feb 28 07:11:33 1992 Karl Berry (karl at hayley) - - * xregex.c (re_error_msg): declare const, since it is. - -Sun Feb 23 05:41:57 1992 Karl Berry (karl at fosse) - - * xregex.c (PAT_PUSH{,_2,_3}, ...): cast args to avoid warnings. - (regex_compile, regexec): return REG_NOERROR, instead - of 0, on success. - (boolean): define as char, and #define false and true. - * regexinc.c (STREQ): cast the result. - -Sun Feb 23 07:45:38 1992 Karl Berry (karl at hayley) - - * GNUmakefile (test-cc, test-hc, test-pcc): new targets. - - * regex.inc (extract_number, extract_number_and_incr) [DEBUG]: - only define if we are debugging. - - * xregex.c [_AIX]: do #pragma alloca first if necessary. - * regexinc.c [_AIX]: remove the #pragma from here. - - * regex.h (reg_syntax_t): declare as unsigned, and redo the enum - as #define's again. Some compilers do stupid things with enums. - -Thu Feb 20 07:19:47 1992 Karl Berry (karl at hayley) - - * Version 0.3. - - * xregex.c, regex.h (newline_anchor_match_p): rename to - `newline_anchor'; dumb idea to change the name. - -Tue Feb 18 07:09:02 1992 Karl Berry (karl at hayley) - - * regexinc.c: go back to original, i.e., don't include - <string.h> or define strchr. - * xregex.c (regexec): don't bother with adding characters after - newlines to the fastmap; instead, just don't use a fastmap. - * xregex.c (regcomp): set the buffer and fastmap fields to zero. - - * xregex.texinfo (GNU r.e. compiling): have to initialize more - than two fields. - - * regex.h (struct re_pattern_buffer): rename `newline_anchor' to - `newline_anchor_match_p', as we're back to two cases. - * xregex.c (regcomp, re_compile_pattern, re_comp): change - accordingly. - (re_match_2): at begline and endline, POSIX is not a special - case anymore; just check newline_anchor_match_p. - -Thu Feb 13 16:29:33 1992 Karl Berry (karl at hayley) - - * xregex.c (*empty_string*): rename to *null_string*, for brevity. - -Wed Feb 12 06:36:22 1992 Karl Berry (karl at hayley) - - * xregex.c (re_compile_fastmap): at endline, don't set fastmap['\n']. - (re_match_2): rewrite the begline/endline cases to take account - of the new field newline_anchor. - -Tue Feb 11 14:34:55 1992 Karl Berry (karl at hayley) - - * regexinc.c [!USG etc.]: include <strings.h> and define strchr - as index. - - * xregex.c (re_search_2): when searching backwards, declare `c' - as a char and use casts when using it as an array subscript. - - * xregex.c (regcomp): if REG_NEWLINE, set - RE_HAT_LISTS_NOT_NEWLINE. Set the `newline_anchor' field - appropriately. - (regex_compile): compile [^...] as matching a \n according to - the syntax bit. - (regexec): if doing REG_NEWLINE stuff, compile a fastmap and add - characters after any \n's to the newline. - * regex.h (RE_HAT_LISTS_NOT_NEWLINE): new syntax bit. - (struct re_pattern_buffer): rename `posix_newline' to - `newline_anchor', define constants for its values. - -Mon Feb 10 07:22:50 1992 Karl Berry (karl at hayley) - - * xregex.c (re_compile_fastmap): combine the code at the top and - bottom of the loop, as it's essentially identical. - -Sun Feb 9 10:02:19 1992 Karl Berry (karl at hayley) - - * xregex.texinfo (POSIX Translate Tables): remove this, as it - doesn't match the spec. - - * xregex.c (re_compile_fastmap): if we finish off a path, go - back to the top (to set can_be_null) instead of returning - immediately. - - * xregex.texinfo: changes from bob. - -Sat Feb 1 07:03:25 1992 Karl Berry (karl at hayley) - - * xregex.c (re_search_2): doc fix (from rms). - -Fri Jan 31 09:52:04 1992 Karl Berry (karl at hayley) - - * xregex.texinfo (GNU Searching): clarify the range arg. - - * xregex.c (re_match_2, at_endline_op_p): add extra parens to - get rid of GCC 2's (silly, IMHO) warning about && within ||. - - * xregex.c (common_op_match_empty_string_p): use - MATCH_NOTHING_UNSET_VALUE, not -1. - -Thu Jan 16 08:43:02 1992 Karl Berry (karl at hayley) - - * xregex.c (SET_REGS_MATCHED): only set the registers from - lowest to highest. - - * regexinc.c (MIN): new macro. - * xregex.c (re_match_2): only check min (num_regs, - regs->num_regs) when we set the returned regs. - - * xregex.c (re_match_2): set registers after the first - num_regs to -1 before we return. - -Tue Jan 14 16:01:42 1992 Karl Berry (karl at hayley) - - * xregex.c (re_match_2): initialize max (RE_NREGS, re_nsub + 1) - registers (from rms). - - * xregex.c, regex.h: don't abbreviate `19xx' to `xx'. - - * regexinc.c [!emacs]: include <sys/types.h> before <unistd.h>. - (from ro@thp.Uni-Koeln.DE). - -Thu Jan 9 07:23:00 1992 Karl Berry (karl at hayley) - - * xregex.c (*unmatchable): rename to `match_empty_string_p'. - (CAN_MATCH_NOTHING): rename to `REG_MATCH_EMPTY_STRING_P'. - - * regexinc.c (malloc, realloc): remove prototypes, as they can - cause clashes (from rms). - -Mon Jan 6 12:43:24 1992 Karl Berry (karl at claude.cs.umb.edu) - - * Version 0.2. - -Sun Jan 5 10:50:38 1992 Karl Berry (karl at hayley) - - * xregex.texinfo: bring more or less up-to-date. - * GNUmakefile (regex.texinfo): generate from regex.h and - xregex.texinfo. - * include.awk: new file. - - * xregex.c: change all calls to the fn extract_number_and_incr - to the macro. - - * xregex.c (re_match_2) [emacs]: in at_dot, use PTR_CHAR_POS + 1, - instead of bf_* and sl_*. Cast d to unsigned char *, to match - the declaration in Emacs' buffer.h. - [emacs19]: in before_dot, at_dot, and after_dot, likewise. - - * regexinc.c: unconditionally include <sys/types.h>. - - * regexinc.c (alloca) [!alloca]: Emacs config files sometimes - define this, so don't define it if it's already defined. - -Sun Jan 5 06:06:53 1992 Karl Berry (karl at fosse) - - * xregex.c (re_comp): fix type conflicts with regex_compile (we - haven't been compiling this). - - * regexinc.c (SIGN_EXTEND_CHAR): use `__CHAR_UNSIGNED__', not - `CHAR_UNSIGNED'. - - * regexinc.c (NULL) [!NULL]: define it (as zero). - - * regexinc.c (extract_number): remove the temporaries. - -Sun Jan 5 07:50:14 1992 Karl Berry (karl at hayley) - - * regex.h (regerror) [!__STDC__]: return a size_t, not a size_t *. - - * xregex.c (PUSH_FAILURE_POINT, ...): declare `destination' as - `char *' instead of `void *', to match alloca declaration. - - * xregex.c (regerror): use `size_t' for the intermediate values - as well as the return type. - - * xregex.c (regexec): cast the result of malloc. - - * xregex.c (regexec): don't initialize `private_preg' in the - declaration, as old C compilers can't do that. - - * xregex.c (main) [test]: declare printchar void. - - * xregex.c (assert) [!DEBUG]: define this to do nothing, and - remove #ifdef DEBUG's from around asserts. - - * xregex.c (re_match_2): remove error message when not debugging. - -Sat Jan 4 09:45:29 1992 Karl Berry (karl at hayley) - - * other.c: test the bizarre duplicate case in re_compile_fastmap - that I just noticed. - - * test.c (general_test): don't test registers beyond the end of - correct_regs, as well as regs. - - * xregex.c (regex_compile): at handle_close, don't assign to - *inner_group_loc if we didn't push a start_memory (because the - group number was too big). In fact, don't push or pop the - inner_group_offset in that case. - - * regex.c: rename to xregex.c, since it's not the whole thing. - * regex.texinfo: likewise. - * GNUmakefile: change to match. - - * regex.c [DEBUG]: only include <stdio.h> if debugging. - - * regexinc.c (SIGN_EXTEND_CHAR) [CHAR_UNSIGNED]: if it's already - defined, don't redefine it. - - * regex.c: define _GNU_SOURCE at the beginning. - * regexinc.c (isblank) [!isblank]: define it. - (isgraph) [!isgraph]: change conditional to this, and remove the - sequent stuff. - - * regex.c (regex_compile): add `blank' character class. - - * regex.c (regex_compile): don't use a uchar variable to loop - through all characters. - - * regex.c (regex_compile): at '[', improve logic for checking - that we have enough space for the charset. - - * regex.h (struct re_pattern_buffer): declare translate as char - * again. We only use it as an array subscript once, I think. - - * regex.c (TRANSLATE): new macro to cast the data character - before subscripting. - (num_internal_regs): rename to `num_regs'. - -Fri Jan 3 07:58:01 1992 Karl Berry (karl at hayley) - - * regex.h (struct re_pattern_buffer): declare `allocated' and - `used' as unsigned long, since these are never negative. - - * regex.c (compile_stack_element): rename to compile_stack_elt_t. - (failure_stack_element): similarly. - - * regexinc.c (TALLOC, RETALLOC): new macros to simplify - allocation of arrays. - - * regex.h (re_*) [__STDC__]: don't declare string args unsigned - char *; that makes them incompatible with string constants. - (struct re_pattern_buffer): declare the pattern and translate - table as unsigned char *. - * regex.c (most routines): use unsigned char vs. char consistently. - - * regex.h (re_compile_pattern): do not declare the length arg as - const. - * regex.c (re_compile_pattern): likewise. - - * regex.c (POINTER_TO_REG): rename to `POINTER_TO_OFFSET'. - - * regex.h (re_registers): declare `start' and `end' as - `regoff_t', instead of `int'. - - * regex.c (regexec): if either of the malloc's for the register - information fail, return failure. - - * regex.h (RE_NREGS): define this again, as 30 (from jla). - (RE_ALLOCATE_REGISTERS): remove this. - (RE_SYNTAX_*): remove it from definitions. - (re_pattern_buffer): remove `return_default_num_regs', add - `caller_allocated_regs'. - * regex.c (re_compile_pattern): clear no_sub and - caller_allocated_regs in the pattern. - (regcomp): set caller_allocated_regs. - (re_match_2): do all register allocation at the end of the - match; implement new semantics. - - * regex.c (MAX_REGNUM): new macro. - (regex_compile): at handle_open and handle_close, if the group - number is too large, don't push the start/stop memory. - -Thu Jan 2 07:56:10 1992 Karl Berry (karl at hayley) - - * regex.c (re_match_2): if the back reference is to a group that - never matched, then goto fail, not really_fail. Also, don't - test if the pattern can match the empty string. Why did we - ever do that? - (really_fail): this label no longer needed. - - * regexinc.c [STDC_HEADERS]: use only this to test if we should - include <stdlib.h>. - - * regex.c (DO_RANGE, regex_compile): translate in all cases - except the single character after a \. - - * regex.h (RE_AWK_CLASS_HACK): rename to - RE_BACKSLASH_ESCAPE_IN_LISTS. - * regex.c (regex_compile): change use. - - * regex.c (re_compile_fastmap): do not translate the characters - again; we already translated them at compilation. (From ylo@ngs.fi.) - - * regex.c (re_match_2): in case for at_dot, invert sense of - comparison and find the character number properly. (From - worley@compass.com.) - (re_match_2) [emacs]: remove the cases for before_dot and - after_dot, since there's no way to specify them, and the code - is wrong (judging from this change). - -Wed Jan 1 09:13:38 1992 Karl Berry (karl at hayley) - - * psx-{interf,basic,extend}.c, other.c: set `t' as the first - thing, so that if we run them in sucession, general_test's - kludge to see if we're doing POSIX tests works. - - * test.h (test_type): add `all_test'. - * main.c: add case for `all_test'. - - * regexinc.c (partial_compiled_pattern_printer, - double_string_printer): don't print anything if we're passed null. - - * regex.c (PUSH_FAILURE_POINT): do not scan for the highest and - lowest active registers. - (re_match_2): compute lowest/highest active regs at start_memory and - stop_memory. - (NO_{LOW,HIGH}EST_ACTIVE_REG): new sentinel values. - (pop_failure_point): return the lowest/highest active reg values - popped; change calls. - - * regex.c [DEBUG]: include <assert.h>. - (various routines) [DEBUG]: change conditionals to assertions. - - * regex.c (DEBUG_STATEMENT): new macro. - (PUSH_FAILURE_POINT): use it to increment num_regs_pushed. - (re_match_2) [DEBUG]: only declare num_regs_pushed if DEBUG. - - * regex.c (*can_match_nothing): rename to *unmatchable. - - * regex.c (re_match_2): at stop_memory, adjust argument reading. - - * regex.h (re_pattern_buffer): declare `can_be_null' as a 2-bit - bit field. - - * regex.h (re_pattern_buffer): declare `buffer' unsigned char *; - no, dumb idea. The pattern can have signed number. - - * regex.c (re_match_2): in maybe_pop_jump case, skip over the - right number of args to the group operators, and don't do - anything with endline if posix_newline is not set. - - * regex.c, regexinc.c (all the things we just changed): go back - to putting the inner group count after the start_memory, - because we need it in the on_failure_jump case in re_match_2. - But leave it after the stop_memory also, since we need it - there in re_match_2, and we don't have any way of getting back - to the start_memory. - - * regexinc.c (partial_compiled_pattern_printer): adjust argument - reading for start/stop_memory. - * regex.c (re_compile_fastmap, group_can_match_nothing): likewise. - -Tue Dec 31 10:15:08 1991 Karl Berry (karl at hayley) - - * regex.c (bits list routines): remove these. - (re_match_2): get the number of inner groups from the pattern, - instead of keeping track of it at start and stop_memory. - Put the count after the stop_memory, not after the - start_memory. - (compile_stack_element): remove `fixup_inner_group' member, - since we now put it in when we can compute it. - (regex_compile): at handle_open, don't push the inner group - offset, and at handle_close, don't pop it. - - * regex.c (level routines): remove these, and their uses in - regex_compile. This was another manifestation of having to find - $'s that were endlines. - - * regex.c (regexec): this does searching, not matching (a - well-disguised part of the standard). So rewrite to use - `re_search' instead of `re_match'. - * psx-interf.c (test_regexec): add tests to, uh, match. - - * regex.h (RE_TIGHT_ALT): remove this; nobody uses it. - * regex.c: remove the code that was supposed to implement it. - - * other.c (test_others): ^ and $ never match newline characters; - RE_CONTEXT_INVALID_OPS doesn't affect anchors. - - * psx-interf.c (test_regerror): update for new error messages. - - * psx-extend.c: it's now ok to have an alternative be just a $, - so remove all the tests which supposed that was invalid. - -Wed Dec 25 09:00:05 1991 Karl Berry (karl at hayley) - - * regex.c (regex_compile): in handle_open, don't skip over ^ and - $ when checking for an empty group. POSIX has changed the - grammar. - * psx-extend.c (test_posix_extended): thus, move (^$) tests to - valid section. - - * regexinc.c (boolean): move from here to test.h and regex.c. - * test files: declare verbose, omit_register_tests, and - test_should_match as boolean. - - * psx-interf.c (test_posix_c_interface): remove the `c_'. - * main.c: likewise. - - * psx-basic.c (test_posix_basic): ^ ($) is an anchor after - (before) an open (close) group. - - * regex.c (re_match_2): in endline, correct precedence of - posix_newline condition. - -Tue Dec 24 06:45:11 1991 Karl Berry (karl at hayley) - - * test.h: incorporate private-tst.h. - * test files: include test.h, not private-tst.h. - - * test.c (general_test): set posix_newline to zero if we are - doing POSIX tests (unfortunately, it's difficult to call - regcomp in this case, which is what we should really be doing). - - * regex.h (reg_syntax_t): make this an enumeration type which - defines the syntax bits; renames re_syntax_t. - - * regex.c (at_endline_op_p): don't preincrement p; then if it's - not an empty string op, we lose. - - * regex.h (reg_errcode_t): new enumeration type of the error - codes. - * regex.c (regex_compile): return that type. - - * regex.c (regex_compile): in [, initialize - just_had_a_char_class to false; somehow I had changed this to - true. - - * regex.h (RE_NO_CONSECUTIVE_REPEATS): remove this, since we - don't use it, and POSIX doesn't require this behavior anymore. - * regex.c (regex_compile): remove it from here. - - * regex.c (regex_compile): remove the no_op insertions for - verify_and_adjust_endlines, since that doesn't exist anymore. - - * regex.c (regex_compile) [DEBUG]: use printchar to print the - pattern, so unprintable bytes will print properly. - - * regex.c: move re_error_msg back. - * test.c (general_test): print the compile error if the pattern - was invalid. - -Mon Dec 23 08:54:53 1991 Karl Berry (karl at hayley) - - * regexinc.c: move re_error_msg here. - - * regex.c (re_error_msg): the ``message'' for success must be - NULL, to keep the interface to re_compile_pattern the same. - (regerror): if the msg is null, use "Success". - - * rename most test files for consistency. Change Makefile - correspondingly. - - * test.c (most routines): add casts to (unsigned char *) when we - call re_{match,search}{,_2}. - -Sun Dec 22 09:26:06 1991 Karl Berry (karl at hayley) - - * regex.c (re_match_2): declare string args as unsigned char * - again; don't declare non-pointer args const; declare the - pattern buffer const. - (re_match): likewise. - (re_search_2, re_search): likewise, except don't declare the - pattern const, since we make a fastmap. - * regex.h [__STDC__]: change prototypes. - - * regex.c (regex_compile): return an error code, not a string. - (re_err_list): new table to map from error codes to string. - (re_compile_pattern): return an element of re_err_list. - (regcomp): don't test all the strings. - (regerror): just use the list. - (put_in_buffer): remove this. - - * regex.c (equivalent_failure_points): remove this. - - * regex.c (re_match_2): don't copy the string arguments into - non-const pointers. We never alter the data. - - * regex.c (re_match_2): move assignment to `is_a_jump_n' out of - the main loop. Just initialize it right before we do - something with it. - - * regex.[ch] (re_match_2): don't declare the int parameters const. - -Sat Dec 21 08:52:20 1991 Karl Berry (karl at hayley) - - * regex.h (re_syntax_t): new type; declare to be unsigned - (previously we used int, but since we do bit operations on - this, unsigned is better, according to H&S). - (obscure_syntax, re_pattern_buffer): use that type. - * regex.c (re_set_syntax, regex_compile): likewise. - - * regex.h (re_pattern_buffer): new field `posix_newline'. - * regex.c (re_comp, re_compile_pattern): set to zero. - (regcomp): set to REG_NEWLINE. - * regex.h (RE_HAT_LISTS_NOT_NEWLINE): remove this (we can just - check `posix_newline' instead.) - - * regex.c (op_list_type, op_list, add_op): remove these. - (verify_and_adjust_endlines): remove this. - (pattern_offset_list_type, *pattern_offset* routines): and these. - These things all implemented the nonleading/nontrailing position - code, which was very long, had a few remaining problems, and - is no longer needed. So... - - * regexinc.c (STREQ): new macro to abbreviate strcmp(,)==0, for - brevity. Change various places in regex.c to use it. - - * regex{,inc}.c (enum regexpcode): change to a typedef - re_opcode_t, for brevity. - - * regex.h (re_syntax_table) [SYNTAX_TABLE]: remove this; it - should only be in regex.c, I think, since we don't define it - in this case. Maybe it should be conditional on !SYNTAX_TABLE? - - * regexinc.c (partial_compiled_pattern_printer): simplify and - distinguish the emacs/not-emacs (not)wordchar cases. - -Fri Dec 20 08:11:38 1991 Karl Berry (karl at hayley) - - * regexinc.c (regexpcode) [emacs]: only define the Emacs opcodes - if we are ifdef emacs. - - * regex.c (BUF_PUSH*): rename to PAT_PUSH*. - - * regex.c (regex_compile): in $ case, go back to essentially the - original code for deciding endline op vs. normal char. - (at_endline_op_p): new routine. - * regex.h (RE_ANCHORS_ONLY_AT_ENDS, RE_CONTEXT_INVALID_ANCHORS, - RE_REPEATED_ANCHORS_AWAY, RE_NO_ANCHOR_AT_NEWLINE): remove - these. POSIX has simplified the rules for anchors in draft - 11.2. - (RE_NEWLINE_ORDINARY): new syntax bit. - (RE_CONTEXT_INDEP_ANCHORS): change description to be compatible - with POSIX. - * regex.texinfo (Syntax Bits): remove the descriptions. - -Mon Dec 16 08:12:40 1991 Karl Berry (karl at hayley) - - * regex.c (re_match_2): in jump_past_next_alt, unconditionally - goto no_pop. The only register we were finding was one which - enclosed the whole alternative expression, not one around an - individual alternative. So we were never doing what we - thought we were doing, and this way makes (|a) against the - empty string fail. - - * regex.c (regex_compile): remove `highest_ever_regnum', and - don't restore regnum from the stack; just put it into a - temporary to put into the stop_memory. Otherwise, groups - aren't numbered consecutively. - - * regex.c (is_in_compile_stack): rename to - `group_in_compile_stack'; remove unnecessary test for the - stack being empty. - - * regex.c (re_match_2): in on_failure_jump, skip no_op's before - checking for the start_memory, in case we were called from - succeed_n. - -Sun Dec 15 16:20:48 1991 Karl Berry (karl at hayley) - - * regex.c (regex_compile): in duplicate case, use - highest_ever_regnum instead of regnum, since the latter is - reverted at stop_memory. - - * regex.c (re_match_2): in on_failure_jump, if the * applied to - a group, save the information for that group and all inner - groups (by making it active), even though we're not inside it - yet. - -Sat Dec 14 09:50:59 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_ITEM, POP_FAILURE_ITEM): new macros. - Use them instead of copying the stack manipulating a zillion - times. - - * regex.c (PUSH_FAILURE_POINT, pop_failure_point) [DEBUG]: save - and restore a unique identification value for each failure point. - - * regexinc.c (partial_compiled_pattern_printer): don't print an - extra / after duplicate commands. - - * regex.c (regex_compile): in back-reference case, allow a back - reference to register `regnum'. Otherwise, even `\(\)\1' - fails, since regnum is 1 at the back-reference. - - * regex.c (re_match_2): in fail, don't examine the pattern if we - restored to pend. - - * test_private.h: rename to private_tst.h. Change includes. - - * regex.c (extend_bits_list): compute existing size for realloc - in bytes, not blocks. - - * regex.c (re_match_2): in jump_past_next_alt, the for loop was - missing its (empty) statement. Even so, some register tests - still fail, although in a different way than in the previous change. - -Fri Dec 13 15:55:08 1991 Karl Berry (karl at hayley) - - * regex.c (re_match_2): in jump_past_next_alt, unconditionally - goto no_pop, since we weren't properly detecting if the - alternative matched something anyway. No, we need to not jump - to keep the register values correct; just change to not look at - register zero and not test RE_NO_EMPTY_ALTS (which is a - compile-time thing). - - * regex.c (SET_REGS_MATCHED): start the loop at 1, since we never - care about register zero until the very end. (I think.) - - * regex.c (PUSH_FAILURE_POINT, pop_failure_point): go back to - pushing and popping the active registers, instead of only doing - the registers before a group: (fooq|fo|o)*qbar against fooqbar - fails, since we restore back into the middle of group 1, yet it - isn't active, because the previous restore clobbered the active flag. - -Thu Dec 12 17:25:36 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_POINT): do not call - `equivalent_failure_points' after all; it causes the registers - to be ``wrong'' (according to POSIX), and an infinite loop on - `((a*)*)*' against `ab'. - - * regex.c (re_compile_fastmap): don't push `pend' on the failure - stack. - -Tue Dec 10 10:30:03 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_POINT): if pushing same failure point that - is on the top of the stack, fail. - (equivalent_failure_points): new routine. - - * regex.c (re_match_2): add debug statements for every opcode we - execute. - - * regex.c (regex_compile/handle_close): restore - `fixup_inner_group_count' and `regnum' from the stack. - -Mon Dec 9 13:51:15 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_POINT): declare `this_reg' as int, so - unsigned arithmetic doesn't happen when we don't want to save - the registers. - -Tue Dec 3 08:11:10 1991 Karl Berry (karl at hayley) - - * regex.c (extend_bits_list): divide size by bits/block. - - * regex.c (init_bits_list): remove redundant assignmen to - `bits_list_ptr'. - - * regexinc.c (partial_compiled_pattern_printer): don't do *p++ - twice in the same expr. - - * regex.c (re_match_2): at on_failure_jump, use the correct - pattern positions for getting the stuff following the start_memory. - - * regex.c (struct register_info): remove the bits_list for the - inner groups; make that a separate variable. - -Mon Dec 2 10:42:07 1991 Karl Berry (karl at hayley) - - * regex.c (PUSH_FAILURE_POINT): don't pass `failure_stack' as an - arg; change callers. - - * regex.c (PUSH_FAILURE_POINT): print items in order they are - pushed. - (pop_failure_point): likewise. - - * regex.c (main): prompt for the pattern and string. - - * regex.c (FREE_VARIABLES) [!REGEX_MALLOC]: declare as nothing; - remove #ifdefs from around calls. - - * regex.c (extract_number, extract_number_and_incr): declare static. - - * regex.c: remove the canned main program. - * main.c: new file. - * Makefile (COMMON): add main.o. - -Tue Sep 24 06:26:51 1991 Kathy Hargreaves (kathy at fosse) - - * regex.c (re_match_2): Made `pend' and `dend' not register variables. - Only set string2 to string1 if string1 isn't null. - Send address of p, d, regstart, regend, and reg_info to - pop_failure_point. - Put in more debug statements. - - * regex.c [debug]: Added global variable. - (DEBUG_*PRINT*): Only print if `debug' is true. - (DEBUG_DOUBLE_STRING_PRINTER): Changed DEBUG_STRING_PRINTER's - name to this. - Changed some comments. - (PUSH_FAILURE_POINT): Moved and added some debugging statements. - Was saving regstart on the stack twice instead of saving both - regstart and regend; remedied this. - [NUM_REGS_ITEMS]: Changed from 3 to 4, as now save lowest and - highest active registers instead of highest used one. - [NUM_NON_REG_ITEMS]: Changed name of NUM_OTHER_ITEMS to this. - (NUM_FAILURE_ITEMS): Use active registers instead of number 0 - through highest used one. - (re_match_2): Have pop_failure_point put things in the variables. - (pop_failure_point): Have it do what the fail case in re_match_2 - did with the failure stack, instead of throwing away the stuff - popped off. re_match_2 can ignore results when it doesn't - need them. - - -Thu Sep 5 13:23:28 1991 Kathy Hargreaves (kathy at fosse) - - * regex.c (banner): Changed copyright years to be separate. - - * regex.c [CHAR_UNSIGNED]: Put __ at both ends of this name. - [DEBUG, debug_count, *debug_p, DEBUG_PRINT_1, DEBUG_PRINT_2, - DEBUG_COMPILED_PATTERN_PRINTER ,DEBUG_STRING_PRINTER]: - defined these for debugging. - (extract_number): Added this (debuggable) routine version of - the macro EXTRACT_NUMBER. Ditto for EXTRACT_NUMBER_AND_INCR. - (re_compile_pattern): Set return_default_num_regs if the - syntax bit RE_ALLOCATE_REGISTERS is set. - [REGEX_MALLOC]: Renamed USE_ALLOCA to this. - (BUF_POP): Got rid of this, as don't ever use it. - (regex_compile): Made the type of `pattern' not be register. - If DEBUG, print the pattern to compile. - (re_match_2): If had a `$' in the pattern before a `^' then - don't record the `^' as an anchor. - Put (enum regexpcode) before references to b, as suggested - [RE_NO_BK_BRACES]: Changed RE_NO_BK_CURLY_BRACES to this. - (remove_pattern_offset): Removed this unused routine. - (PUSH_FAILURE_POINT): Changed to only save active registers. - Put in debugging statements. - (re_compile_fastmap): Made `pattern' not a register variable. - Use routine for extracting numbers instead of macro. - (re_match_2): Made `p', `mcnt' and `mcnt2' not register variables. - Added `num_regs_pushed' for debugging. - Only malloc registers if the syntax bit RE_ALLOCATE_REGISTERS is set. - Put in debug statements. - Put the macro NOTE_INNER_GROUP's code inline, as it was the - only called in one place. - For debugging, extract numbers using routines instead of macros. - In case fail: only restore pushed active registers, and added - debugging statements. - (pop_failure_point): Test for underfull stack. - (group_can_match_nothing, common_op_can_match_nothing): For - debugging, extract numbers using routines instead of macros. - (regexec): Changed formal parameters to not be prototypes. - Don't initialize `regs' or `private_preg' in their declarations. - -Tue Jul 23 18:38:36 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h [RE_CONTEX_INDEP_OPS]: Moved the anchor stuff out of - this bit. - [RE_UNMATCHED_RIGHT_PAREN_ORD]: Defined this bit. - [RE_CONTEXT_INVALID_ANCHORS]: Defined this bit. - [RE_CONTEXT_INDEP_ANCHORS]: Defined this bit. - Added RE_CONTEXT_INDEP_ANCHORS to all syntaxes which had - RE_CONTEXT_INDEP_OPS. - Took RE_ANCHORS_ONLY_AT_ENDS out of the POSIX basic syntax. - Added RE_UNMATCHED_RIGHT_PAREN_ORD to the POSIX extended - syntax. - Took RE_REPEATED_ANCHORS_AWAY out of the POSIX extended syntax. - Defined REG_NOERROR (which will probably have to go away again). - Changed the type `off_t' to `regoff_t'. - - * regex.c: Changed some commments. - (regex_compile): Added variable `had_an_endline' to keep track - of if hit a `$' since the beginning of the pattern or the last - alternative (if any). - Changed RE_CONTEXT_INVALID_OPS and RE_CONTEXT_INDEP_OPS to - RE_CONTEXT_INVALID_ANCHORS and RE_CONTEXT_INDEP_ANCHORS where - appropriate. - Put a `no_op' in the pattern if a repeat is only zero or one - times; in this case and if it is many times (whereupon a jump - backwards is pushed instead), keep track of the operator for - verify_and_adjust_endlines. - If RE_UNMATCHED_RIGHT_PAREN is set, make an unmatched - close-group operator match `)'. - Changed all error exits to exit (1). - (remove_pattern_offset): Added this routine, but don't use it. - (verify_and_adjust_endlines): At top of routine, if initialize - routines run out of memory, return true after setting - enough_memory false. - At end of endline, et al. case, don't set *p to no_op. - Repetition operators also set the level and active groups' - match statuses, unless RE_REPEATED_ANCHORS_AWAY is set. - (get_group_match_status): Put a return in front of call to get_bit. - (re_compile_fastmap): Changed is_a_succeed_n to a boolean. - If at end of pattern, then if the failure stack isn't empty, - go back to the failure point. - In *jump* case, only pop the stack if what's on top of it is - where we've just jumped to. - (re_search_2): Return -2 instead of val if val is -2. - (group_can_match_nothing, alternative_can_match_nothing, - common_op_can-match_nothing): Now pass in reg_info for the - `duplicate' case. - (re_match_2): Don't skip over the next alternative also if - empty alternatives aren't allowed. - In fail case, if failed to a backwards jump that's part of a - repetition loop, pop the current failure point and use the - next one. - (pop_failure_point): Check that there's as many register items - on the failure stack as the stack says there are. - (common_op_can_match_nothing): Added variables `ret' and - `reg_no' so can set reg_info for the group encountered. - Also break without doing anything if hit a no_op or the other - kinds of `endline's. - If not done already, set reg_info in start_memory case. - Put in no_pop_jump for an optimized succeed_n of zero repetitions. - In succeed_n case, if the number isn't zero, then return false. - Added `duplicate' case. - -Sat Jul 13 11:27:38 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (REG_NOERROR): Added this error code definition. - - * regex.c: Took some redundant parens out of macros. - (enum regexpcode): Added jump_past_next_alt. - Wrapped some macros in `do..while (0)'. - Changed some comments. - (regex_compile): Use `fixup_alt_jump' instead of `fixup_jump'. - Use `maybe_pop_jump' instead of `maybe_pop_failure_jump'. - Use `jump_past_next_alt' instead of `no_pop_jump' when at the - end of an alternative. - (re_match_2): Used REGEX_ALLOCATE for the registers stuff. - In stop_memory case: Add more boolean tests to see if the - group is in a loop. - Added jump_past_next_alt case, which doesn't jump over the - next alternative if the last one didn't match anything. - Unfortunately, to make this work with, e.g., `(a+?*|b)*' - against `bb', I also had to pop the alternative's failure - point, which in turn broke backtracking! - In fail case: Detect a dummy failure point by looking at - failure_stack.avail - 2, not stack[-2]. - (pop_failure_point): Only pop if the stack isn't empty; don't - give an error if it is. (Not sure yet this is correct.) - (group_can_match_nothing): Make it return a boolean instead of int. - Make it take an argument indicating the end of where it should look. - If find a group that can match nothing, set the pointer - argument to past the group in the pattern. - Took out cases which can share with alternative_can_match_nothing - and call common_op_can_match_nothing. - Took ++ out of switch, so could call common_op_can_match_nothing. - Wrote lots more for on_failure_jump case to handle alternatives. - Main loop now doesn't look for matching stop_memory, but - rather the argument END; return true if hit the matching - stop_memory; this way can call itself for inner groups. - (alternative_can_match_nothing): Added for alternatives. - (common_op_can_match_nothing): Added for previous two routines' - common operators. - (regerror): Returns a message saying there's no error if gets - sent REG_NOERROR. - -Wed Jul 3 10:43:15 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c: Removed unnecessary enclosing parens from several macros. - Put `do..while (0)' around a few. - Corrected some comments. - (INIT_FAILURE_STACK_SIZE): Deleted in favor of using - INIT_FAILURE_ALLOC. - (INIT_FAILURE_STACK, DOUBLE_FAILURE_STACK, PUSH_PATTERN_OP, - PUSH_FAILURE_POINT): Made routines of the same name (but with all - lowercase letters) into these macros, so could use `alloca' - when USE_ALLOCA is defined. The reason is stated below for - bits lists. Deleted analogous routines. - (re_compile_fastmap): Added variable void *destination for - PUSH_PATTERN_OP. - (re_match_2): Added variable void *destination for REGEX_REALLOCATE. - Used the failure stack macros in place of the routines. - Detected a dummy failure point by inspecting the failure stack's - (avail - 2)th element, not failure_stack.stack[-2]. This bug - arose when used the failure stack macros instead of the routines. - - * regex.c [USE_ALLOCA]: Put this conditional around previous - alloca stuff and defined these to work differently depending - on whether or not USE_ALLOCA is defined: - (REGEX_ALLOCATE): Uses either `alloca' or `malloc'. - (REGEX_REALLOCATE): Uses either `alloca' or `realloc'. - (INIT_BITS_LIST, EXTEND_BITS_LIST, SET_BIT_TO_VALUE): Defined - macro versions of routines with the same name (only with all - lowercase letters) so could use `alloc' in re_match_2. This - is to prevent core leaks when C-g is used in Emacs and to make - things faster and avoid storage fragmentation. These things - have to be macros because the results of `alloca' go away with - the routine by which it's called. - (BITS_BLOCK_SIZE, BITS_BLOCK, BITS_MASK): Moved to above the - above-mentioned macros instead of before the routines defined - below regex_compile. - (set_bit_to_value): Compacted some code. - (reg_info_type): Changed inner_groups field to be bits_list_type - so could be arbitrarily long and thus handle arbitrary nesting. - (NOTE_INNER_GROUP): Put `do...while (0)' around it so could - use as a statement. - Changed code to use bits lists. - Added variable void *destination for REGEX_REALLOCATE (whose call - is several levels in). - Changed variable name of `this_bit' to `this_reg'. - (FREE_VARIABLES): Only define and use if USE_ALLOCA is defined. - (re_match_2): Use REGEX_ALLOCATE instead of malloc. - Instead of setting INNER_GROUPS of reg_info to zero, have to - use INIT_BITS_LIST and return -2 (and free variables if - USE_ALLOCA isn't defined) if it fails. - -Fri Jun 28 13:45:07 1991 Karl Berry (karl at hayley) - - * regex.c (re_match_2): set value of `dend' when we restore `d'. - - * regex.c: remove declaration of alloca. - - * regex.c (MISSING_ISGRAPH): rename to `ISGRAPH_MISSING'. - - * regex.h [_POSIX_SOURCE]: remove these conditionals; always - define POSIX stuff. - * regex.c (_POSIX_SOURCE): change conditionals to use `POSIX' - instead. - -Sat Jun 1 16:56:50 1991 Kathy Hargreaves (kathy at hayley) - - * regex.*: Changed RE_CONTEXTUAL_* to RE_CONTEXT_*, - RE_TIGHT_VBAR to RE_TIGHT_ALT, RE_NEWLINE_OR to - RE_NEWLINE_ALT, and RE_DOT_MATCHES_NEWLINE to RE_DOT_NEWLINE. - -Wed May 29 09:24:11 1991 Karl Berry (karl at hayley) - - * regex.texinfo (POSIX Pattern Buffers): cross-reference the - correct node name (Match-beginning-of-line, not ..._line). - (Syntax Bits): put @code around all syntax bits. - -Sat May 18 16:29:58 1991 Karl Berry (karl at hayley) - - * regex.c (global): add casts to keep broken compilers from - complaining about malloc and realloc calls. - - * regex.c (isgraph) [MISSING_ISGRAPH]: change test to this, - instead of `#ifndef isgraph', since broken compilers can't - have both a macro and a symbol by the same name. - - * regex.c (re_comp, re_exec) [_POSIX_SOURCE]: do not define. - (regcomp, regfree, regexec, regerror) [_POSIX_SOURCE && !emacs]: - only define in this case. - -Mon May 6 17:37:04 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (re_search, re_search_2): Changed BUFFER to not be const. - - * regex.c (re_compile_pattern): `^' is in a leading position if - it precedes a newline. - (various routines): Added or changed header comments. - (double_pattern_offsets_list): Changed name from - `extend_pattern_offsets_list'. - (adjust_pattern_offsets_list): Changed return value from - unsigned to void. - (verify_and_adjust_endlines): Now returns `true' and `false' - instead of 1 and 0. - `$' is in a leading position if it follows a newline. - (set_bit_to_value, get_bit_value): Exit with error if POSITION < 0 - so now calling routines don't have to. - (init_failure_stack, inspect_failure_stack_top, - pop_failure_stack_top, push_pattern_op, double_failure_stack): - Now return value unsigned instead of boolean. - (re_search, re_search_2): Changed BUFP to not be const. - (re_search_2): Added variable const `private_bufp' to send to - re_match_2. - (push_failure_point): Made return value unsigned instead of boolean. - -Sat May 4 15:32:22 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (re_compile_fastmap): Added extern for this. - Changed some comments. - - * regex.c (re_compile_pattern): In case handle_bar: put invalid - pattern test before levels matching stuff. - Changed some commments. - Added optimizing test for detecting an empty alternative that - ends with a trailing '$' at the end of the pattern. - (re_compile_fastmap): Moved failure_stack stuff to before this - so could use it. Made its stack dynamic. - Made it return an int so that it could return -2 if its stack - couldn't be allocated. - Added to header comment (about the return values). - (init_failure_stack): Wrote so both re_match_2 and - re_compile_fastmap could use it similar stacks. - (double_failure_stack): Added for above reasons. - (push_pattern_op): Wrote for re_compile_fastmap. - (re_search_2): Now return -2 if re_compile_fastmap does. - (re_match_2): Made regstart and regend type failure_stack_element*. - (push_failure_point): Made pattern_place and string_place type - failure_stack_element*. - Call double_failure_stack now. - Return true instead of 1. - -Wed May 1 12:57:21 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c (remove_intervening_anchors): Avoid erroneously making - ops into no_op's by making them no_op only when they're beglines. - (verify_and_adjust_endlines): Don't make '$' a normal character - if it's before a newline. - Look for the endline op in *p, not p[1]. - (failure_stack_element): Added this declaration. - (failure_stack_type): Added this declaration. - (INIT_FAILURE_STACK_SIZE, FAILURE_STACK_EMPTY, - FAILURE_STACK_PTR_EMPTY, REMAINING_AVAIL_SLOTS): Added for - failure stack. - (FAILURE_ITEM_SIZE, PUSH_FAILURE_POINT): Deleted. - (FREE_VARIABLES): Now free failure_stack.stack instead of stackb. - (re_match_2): deleted variables `initial_stack', `stackb', - `stackp', and `stacke' and added `failure_stack' to replace them. - Replaced calls to PUSH_FAILURE_POINT with those to - push_failure_point. - (push_failure_point): Added for re_match_2. - (pop_failure_point): Rewrote to use a failure_stack_type of stack. - (can_match_nothing): Moved definition to below re_match_2. - (bcmp_translate): Moved definition to below re_match_2. - -Mon Apr 29 14:20:54 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c (enum regexpcode): Added codes endline_before_newline - and repeated_endline_before_newline so could detect these - types of endlines in the intermediate stages of a compiled - pattern. - (INIT_FAILURE_ALLOC): Renamed NFAILURES to this and set it to 5. - (BUF_PUSH): Put `do {...} while 0' around this. - (BUF_PUSH_2): Defined this to cut down on expansion of EXTEND_BUFFER. - (regex_compile): Changed some comments. - Now push endline_before_newline if find a `$' before a newline - in the pattern. - If a `$' might turn into an ordinary character, set laststart - to point to it. - In '^' case, if syntax bit RE_TIGHT_VBAR is set, then for `^' - to be in a leading position, it must be first in the pattern. - Don't have to check in one of the else clauses that it's not set. - If RE_CONTEXTUAL_INDEP_OPS isn't set but RE_ANCHORS_ONLY_AT_ENDS - is, make '^' a normal character if it isn't first in the pattern. - Can only detect at the end if a '$' after an alternation op is a - trailing one, so can't immediately detect empty alternatives - if a '$' follows a vbar. - Added a picture of the ``success jumps'' in alternatives. - Have to set bufp->used before calling verify_and_adjust_endlines. - Also do it before returning all error strings. - (remove_intervening_anchors): Now replaces the anchor with - repeated_endline_before_newline if it's an endline_before_newline. - (verify_and_adjust_endlines): Deleted SYNTAX parameter (could - use bufp's) and added GROUP_FORWARD_MATCH_STATUS so could - detect back references referring to empty groups. - Added variable `bend' to point past the end of the pattern buffer. - Added variable `previous_p' so wouldn't have to reinspect the - pattern buffer to see what op we just looked at. - Added endline_before_newline and repeated_endline_before_newline - cases. - When checking if in a trailing position, added case where '$' - has to be at the pattern's end if either of the syntax bits - RE_ANCHORS_ONLY_AT_ENDS or RE_TIGHT_VBAR are set. - Since `endline' can have the intermediate form `endline_in_repeat', - have to change it to `endline' if RE_REPEATED_ANCHORS_AWAY - isn't set. - Now disallow empty alternatives with trailing endlines in them - if RE_NO_EMPTY_ALTS is set. - Now don't make '$' an ordinary character if it precedes a newline. - Don't make it an ordinary character if it's before a newline. - Back references now affect the level matching something only if - they refer to nonempty groups. - (can_match_nothing): Now increment p1 in the switch, which - changes many of the cases, but makes the code more like what - it was derived from. - Adjust the return statement to reflect above. - (struct register_info): Made `can_match_nothing' field an int - instead of a bit so could have -1 in it if never set. - (MAX_FAILURE_ITEMS): Changed name from MAX_NUM_FAILURE_ITEMS. - (FAILURE_ITEM_SIZE): Defined how much space a failure items uses. - (PUSH_FAILURE_POINT): Changed variable `last_used_reg's name - to `highest_used_reg'. - Added variable `num_stack_items' and changed `len's name to - `stack_length'. - Test failure stack limit in terms of number of items in it, not - in terms of its length. rms' fix tested length against number - of items, which was a misunderstanding. - Use `realloc' instead of `alloca' to extend the failure stack. - Use shifts instead of multiplying by 2. - (FREE_VARIABLES): Free `stackb' instead of `initial_stack', as - might may have been reallocated. - (re_match_2): When mallocing `initial_stack', now multiply - the number of items wanted (what was there before) by - FAILURE_ITEM_SIZE. - (pop_failure_point): Need this procedure form of the macro of - the same name for debugging, so left it in and deleted the - macro. - (recomp): Don't free the pattern buffer's translate field. - -Mon Apr 15 09:47:47 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_DUP_MAX): Moved to outside of #ifdef _POSIX_SOURCE. - * regex.c (#include <sys/types.h>): Removed #ifdef _POSIX_SOURCE - condition. - (malloc, realloc): Made return type void* #ifdef __STDC__. - (enum regexpcode): Added endline_in_repeat for the compiler's - use; this never ends up on the final compiled pattern. - (INIT_PATTERN_OFFSETS_LIST_SIZE): Initial size for - pattern_offsets_list_type. - (pattern_offset_type): Type for pattern offsets. - (pattern_offsets_list_type): Type for keeping a list of - pattern offsets. - (anchor_list_type): Changed to above type. - (PATTERN_OFFSETS_LIST_PTR_FULL): Tests if a pattern offsets - list is full. - (ANCHOR_LIST_PTR_FULL): Changed to above. - (BIT_BLOCK_SIZE): Changed to BITS_BLOCK_SIZE and moved to - above bits list routines below regex_compile. - (op_list_type): Defined to be pattern_offsets_list_type. - (compile_stack_type): Changed offsets to be - pattern_offset_type instead of unsigned. - (pointer): Changed the name of all structure fields from this - to `avail'. - (COMPILE_STACK_FULL): Changed so the stack is full if `avail' - is equal to `size' instead of `size' - 1. - (GET_BUFFER_SPACE): Changed `>=' to `>' in the while statement. - (regex_compile): Added variable `enough_memory' so could check - that routine that verifies '$' positions could return an - allocation error. - (group_count): Deleted this variable, as `regnum' already does - this work. - (op_list): Added this variable to keep track of operations - needed for verifying '$' positions. - (anchor_list): Now initialize using routine - `init_pattern_offsets_list'. - Consolidated the three bits_list initializations. - In case '$': Instead of trying to go past constructs which can - follow '$', merely detect the special case where it has to be - at the pattern's end, fix up any fixup jumps if necessary, - record the anchor if necessary and add an `endline' (and - possibly two `no-op's) to the pattern; will call a routine at - the end to verify if it's in a valid position or not. - (init_pattern_offsets_list): Added to initialize pattern - offsets lists. - (extend_anchor_list): Renamed this extend_pattern_offsets_list - and renamed parameters and internal variables appropriately. - (add_pattern_offset): Added this routine which both - record_anchor_position and add_op call. - (adjust_pattern_offsets_list): Add this routine to adjust by - some increment all the pattern offsets a list of such after a - given position. - (record_anchor_position): Now send in offset instead of - calculating it and just call add_pattern_offset. - (adjust_anchor_list): Replaced by above routine. - (remove_intervening_anchors): If the anchor is an `endline' - then replace it with `endline_in_repeat' instead of `no_op'. - (add_op): Added this routine to call in regex_compile - wherever push something relevant to verifying '$' positions. - (verify_and_adjust_endlines): Added routine to (1) verify that - '$'s in a pattern buffer (represented by `endline') were in - valid positions and (2) whether or not they were anchors. - (BITS_BLOCK_SIZE): Renamed BIT_BLOCK_SIZE and moved to right - above bits list routines. - (BITS_BLOCK): Defines which array element of a bits list the - bit corresponding to a given position is in. - (BITS_MASK): Has a 1 where the bit (in a bit list array element) - for a given position is. - -Mon Apr 1 12:09:06 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c (BIT_BLOCK_SIZE): Defined this for using with - bits_list_type, abstracted from level_list_type so could use - for more things than just the level match status. - (regex_compile): Renamed `level_list' variable to - `level_match_status'. - Added variable `group_match_status' of type bits_list_type. - Kept track of whether or not for all groups any of them - matched other than the empty string, so detect if a back - reference in front of a '^' made it nonleading or not. - Do this by setting a match status bit for all active groups - whenever leave a group that matches other than the empty string. - Could detect which groups are active by going through the - stack each time, but or-ing a bits list of active groups with - a bits list of group match status is faster, so make a bits - list of active groups instead. - Have to check that '^' isn't in a leading position before - going to normal_char. - Whenever set level match status of the current level, also set - the match status of all active groups. - Increase the group count and make that group active whenever - open a group. - When close a group, only set the next level down if the - current level matches other than the empty string, and make - the current group inactive. - At a back reference, only set a level's match status if the - group to which the back reference refers matches other than - the empty string. - (init_bits_list): Added to initialize a bits list. - (get_level_value): Deleted this. (Made into - get_level_match_status.) - (extend_bits_list): Added to extend a bits list. (Made this - from deleted routine `extend_level_list'.) - (get_bit): Added to get a bit value from a bits list. (Made - this from deleted routine `get_level_value'.) - (set_bit_to_value): Added to set a bit in a bits list. (Made - this from deleted routine `set_level_value'.) - (get_level_match_status): Added this to get the match status - of a given level. (Made from get_level_value.) - (set_this_level, set_next_lower_level): Made all routines - which set bits extend the bits list if necessary, thus they - now return an unsigned value to indicate whether or not the - reallocation failed. - (increase_level): No longer extends the level list. - (make_group_active): Added to mark as active a given group in - an active groups list. - (make_group_inactive): Added to mark as inactive a given group - in an active groups list. - (set_match_status_of_active_groups): Added to set the match - status of all currently active groups. - (get_group_match_status): Added to get a given group's match status. - (no_levels_match_anything): Removed the paramenter LEVEL. - (PUSH_FAILURE_POINT): Added rms' bug fix and changed RE_NREGS - to num_internal_regs. - -Sun Mar 31 09:04:30 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_ANCHORS_ONLY_AT_ENDS): Added syntax so could - constrain '^' and '$' to only be anchors if at the beginning - and end of the pattern. - (RE_SYNTAX_POSIX_BASIC): Added the above bit. - - * regex.c (enum regexcode): Changed `unused' to `no_op'. - (this_and_lower_levels_match_nothing): Deleted forward reference. - (regex_compile): case '^': if the syntax bit RE_ANCHORS_ONLY_AT_ENDS - is set, then '^' is only an anchor if at the beginning of the - pattern; only record anchor position if the syntax bit - RE_REPEATED_ANCHORS_AWAY is set; the '^' is a normal char if - the syntax bit RE_ANCHORS_ONLY_AT_END is set and we're not at - the beginning of the pattern (and neither RE_CONTEXTUAL_INDEP_OPS - nor RE_CONTEXTUAL_INDEP_OPS syntax bits are set). - Only adjust the anchor list if the syntax bit - RE_REPEATED_ANCHORS_AWAY is set. - - * regex.c (level_list_type): Use to detect when '^' is - in a leading position. - (regex_compile): Added level_list_type level_list variable in - which we keep track of whether or not a grouping level (in its - current or most recent incarnation) matches anything besides the - empty string. Set the bit for the i-th level when detect it - should match something other than the empty string and the bit - for the (i-1)-th level when leave the i-th group. Clear all - bits for the i-th and higher levels if none of 0--(i - 1)-th's - bits are set when encounter an alternation operator on that - level. If no levels are set when hit a '^', then it is in a - leading position. We keep track of which level we're at by - increasing a variable current_level whenever we encounter an - open-group operator and decreasing it whenever we encounter a - close-group operator. - Have to adjust the anchor list contents whenever insert - something ahead of them (such as on_failure_jump's) in the - pattern. - (adjust_anchor_list): Adjusts the offsets in an anchor list by - a given increment starting at a given start position. - (get_level_value): Returns the bit setting of a given level. - (set_level_value): Sets the bit of a given level to a given value. - (set_this_level): Sets (to 1) the bit of a given level. - (set_next_lower_level): Sets (to 1) the bit of (LEVEL - 1) for a - given LEVEL. - (clear_this_and_higher_levels): Clears the bits for a given - level and any higher levels. - (extend_level_list): Adds sizeof(unsigned) more bits to a level list. - (increase_level): Increases by 1 the value of a given level variable. - (decrease_level): Decreases by 1 the value of a given level variable. - (lower_levels_match_nothing): Checks if any levels lower than - the given one match anything. - (no_levels_match_anything): Checks if any levels match anything. - (re_match_2): At case wordbeg: before looking at d-1, check that - we're not at the string's beginning. - At case wordend: Added some illuminating parentheses. - -Mon Mar 25 13:58:51 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_ANCHOR_AT_NEWLINE): Changed syntax bit name - from RE_ANCHOR_NOT_NEWLINE because an anchor never matches the - newline itself, just the empty string either before or after it. - (RE_REPEATED_ANCHORS_AWAY): Added this syntax bit for ignoring - anchors inside groups which are operated on by repetition - operators. - (RE_DOT_MATCHES_NEWLINE): Added this bit so the match-any-character - operator could match a newline when it's set. - (RE_SYNTAX_POSIX_BASIC): Set RE_DOT_MATCHES_NEWLINE in this. - (RE_SYNTAX_POSIX_EXTENDED): Set RE_DOT_MATCHES_NEWLINE and - RE_REPEATED_ANCHORS_AWAY in this. - (regerror): Changed prototypes to new POSIX spec. - - * regex.c (anchor_list_type): Added so could null out anchors inside - repeated groups. - (ANCHOR_LIST_PTR_FULL): Added for above type. - (compile_stack_element): Changed name from stack_element. - (compile_stack_type): Changed name from compile_stack. - (INIT_COMPILE_STACK_SIZE): Changed name from INIT_STACK_SIZE. - (COMPILE_STACK_EMPTY): Changed name from STACK_EMPTY. - (COMPILE_STACK_FULL): Changed name from STACK_FULL. - (regex_compile): Changed SYNTAX parameter to non-const. - Changed variable name `stack' to `compile_stack'. - If syntax bit RE_REPEATED_ANCHORS_AWAY is set, then naively put - anchors in a list when encounter them and then set them to - `unused' when detect they are within a group operated on by a - repetition operator. Need something more sophisticated than - this, as they should only get set to `unused' if they are in - positions where they would be anchors. Also need a better way to - detect contextually invalid anchors. - Changed some commments. - (is_in_compile_stack): Changed name from `is_in_stack'. - (extend_anchor_list): Added to do anchor stuff. - (record_anchor_position): Added to do anchor stuff. - (remove_intervening_anchors): Added to do anchor stuff. - (re_match_2): Now match a newline with the match-any-character - operator if RE_DOT_MATCHES_NEWLINE is set. - Compacted some code. - (regcomp): Added new POSIX newline information to the header - commment. - If REG_NEWLINE cflag is set, then now unset RE_DOT_MATCHES_NEWLINE - in syntax. - (put_in_buffer): Added to do new POSIX regerror spec. Called - by regerror. - (regerror): Changed to take a pattern buffer, error buffer and - its size, and return type `size_t', the size of the full error - message, and the first ERRBUF_SIZE - 1 characters of the full - error message in the error buffer. - -Wed Feb 27 16:38:33 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (#include <sys/types.h>): Removed this as new POSIX - standard has the user include it. - (RE_SYNTAX_POSIX_BASIC and RE_SYNTAX_POSIX_EXTENDED): Removed - RE_HAT_LISTS_NOT_NEWLINE as new POSIX standard has the cflag - REG_NEWLINE now set this. Similarly, added syntax bit - RE_ANCHOR_NOT_NEWLINE as this is now unset by REG_NEWLINE. - (RE_SYNTAX_POSIX_BASIC): Removed syntax bit - RE_NO_CONSECUTIVE_REPEATS as POSIX now allows them. - - * regex.c (#include <sys/types.h>): Added this as new POSIX - standard has the user include it instead of us putting it in - regex.h. - (extern char *re_syntax_table): Made into an extern so the - user could allocate it. - (DO_RANGE): If don't find a range end, now goto invalid_range_end - instead of unmatched_left_bracket. - (regex_compile): Made variable SYNTAX non-const.???? - Reformatted some code. - (re_compile_fastmap): Moved is_a_succeed_n's declaration to - inner braces. - Compacted some code. - (SET_NEWLINE_FLAG): Removed and put inline. - (regcomp): Made variable `syntax' non-const so can unset - RE_ANCHOR_NOT_NEWLINE syntax bit if cflag RE_NEWLINE is set. - If cflag RE_NEWLINE is set, set the RE_HAT_LISTS_NOT_NEWLINE - syntax bit and unset RE_ANCHOR_NOT_NEWLINE one of `syntax'. - -Wed Feb 20 16:33:38 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_CONSECUTIVE_REPEATS): Changed name from - RE_NO_CONSEC_REPEATS. - (REG_ENESTING): Deleted this POSIX return value, as the stack - is now unbounded. - (struct re_pattern_buffer): Changed some comments. - (re_compile_pattern): Changed a comment. - Deleted check on stack upper bound and corresponding error. - Now when there's no interval contents and it's the end of the - pattern, go to unmatched_left_curly_brace instead of end_of_pattern. - Removed nesting_too_deep error, as the stack is now unbounded. - (regcomp): Removed REG_ENESTING case, as the stack is now unbounded. - (regerror): Removed REG_ENESTING case, as the stack is now unbounded. - - * regex.c (MAX_STACK_SIZE): Deleted because don't need upper - bound on array indexed with an unsigned number. - -Sun Feb 17 15:50:24 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h: Changed and added some comments. - - * regex.c (init_syntax_once): Made `_' a word character. - (re_compile_pattern): Added a comment. - (re_match_2): Redid header comment. - (regexec): With header comment about PMATCH, corrected and - removed details found regex.h, adding a reference. - -Fri Feb 15 09:21:31 1991 Kathy Hargreaves (kathy at hayley) - - * regex.c (DO_RANGE): Removed argument parentheses. - Now get untranslated range start and end characters and set - list bits for the translated (if at all) versions of them and - all characters between them. - (re_match_2): Now use regs->num_regs instead of num_regs_wanted - wherever possible. - (regcomp): Now build case-fold translate table using isupper - and tolower facilities so will work on foreign language characters. - -Sat Feb 9 16:40:03 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_HAT_LISTS_NOT_NEWLINE): Changed syntax bit name - from RE_LISTS_NOT_NEWLINE as it only affects nonmatching lists. - Changed all references to the match-beginning-of-string - operator to match-beginning-of-line operator, as this is what - it does. - (RE_NO_CONSEC_REPEATS): Added this syntax bit. - (RE_SYNTAX_POSIX_BASIC): Added above bit to this. - (REG_PREMATURE_END): Changed name to REG_EEND. - (REG_EXCESS_NESTING): Changed name to REG_ENESTING. - (REG_TOO_BIG): Changed name to REG_ESIZE. - (REG_INVALID_PREV_RE): Deleted this return POSIX value. - Added and changed some comments. - - * regex.c (re_compile_pattern): Now sets the pattern buffer's - `return_default_num_regs' field. - (typedef struct stack_element, stack_type, INIT_STACK_SIZE, - MAX_STACK_SIZE, STACK_EMPTY, STACK_FULL): Added for regex_compile. - (INIT_BUF_SIZE): Changed value from 28 to 32. - (BUF_PUSH): Changed name from BUFPUSH. - (MAX_BUF_SIZE): Added so could use in many places. - (IS_CHAR_CLASS_STRING): Replaced is_char_class with this. - (regex_compile): Added a stack which could grow dynamically - and which has struct elements. - Go back to initializing `zero_times_ok' and `many_time_ok' to - 0 and |=ing them inside the loop. - Now disallow consecutive repetition operators if the syntax - bit RE_NO_CONSEC_REPEATS is set. - Now detect trailing backslash when the compiler is expecting a - `?' or a `+'. - Changed calls to GET_BUFFER_SPACE which asked for 6 to ask for - 3, as that's all they needed. - Now check for trailing backslash inside lists. - Now disallow an empty alternative right before an end-of-line - operator. - Now get buffer space before leaving space for a fixup jump. - Now check if at pattern end when at open-interval operator. - Added some comments. - Now check if non-interval repetition operators follow an - interval one if the syntax bit RE_NO_CONSEC_REPEATS is set. - Now only check if what precedes an interval repetition - operator isn't a regular expression which matches one - character if the syntax bit RE_NO_CONSEC_REPEATS is set. - Now return "Unmatched [ or [^" instead of "Unmatched [". - (is_in_stack): Added to check if a given register number is in - the stack. - (re_match_2): If initial variable allocations fail, return -2, - instead of -1. - Now set reg's `num_regs' field when allocating regs. - Now before allocating them, free regs->start and end if they - aren't NULL and return -2 if either allocation fails. - Now use regs->num_regs instead of num_regs_wanted to control - regs loops. - Now increment past the newline when matching it with an - end-of-line operator. - (recomp): Added to the header comment. - Now return REG_ESUBREG if regex_compile returns "Unmatched [ - or [^" instead of doing so if it returns "Unmatched [". - Now return REG_BADRPT if in addition to returning "Missing - preceding regular expression", regex_compile returns "Invalid - preceding regular expression". - Now return new return value names (see regex.h changes). - (regexec): Added to header comment. - Initialize regs structure. - Now match whole string. - Now always free regs.start and regs.end instead of just when - the string matched. - (regerror): Now return "Regex error: Unmatched [ or [^.\n" - instead of "Regex error: Unmatched [.\n". - Now return "Regex error: Preceding regular expression either - missing or not simple.\n" instead of "Regex error: Missing - preceding regular expression.\n". - Removed REG_INVALID_PREV_RE case (it got subsumed into the - REG_BADRPT case). - -Thu Jan 17 09:52:35 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h: Changed a comment. - - * regex.c: Changed and added large header comments. - (re_compile_pattern): Now if detect that `laststart' for an - interval points to a byte code for a regular expression which - matches more than one character, make it an internal error. - (regerror): Return error message, don't print it. - -Tue Jan 15 15:32:49 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (regcomp return codes): Added GNU ones. - Updated some comments. - - * regex.c (DO_RANGE): Changed `obscure_syntax' to `syntax'. - (regex_compile): Added `following_left_brace' to keep track of - where pseudo interval following a valid interval starts. - Changed some instances that returned "Invalid regular - expression" to instead return error strings coinciding with - POSIX error codes. - Changed some comments. - Now consider only things between `[:' and `:]' to be possible - character class names. - Now a character class expression can't end a pattern; at - least a `]' must close the list. - Now if the syntax bit RE_NO_BK_CURLY_BRACES is set, then a - valid interval must be followed by yet another to get an error - for preceding an interval (in this case, the second one) with - a regular expression that matches more than one character. - Now if what follows a valid interval begins with a open - interval operator but doesn't begin a valid interval, then set - following_left_bracket to it, put it in C and go to - normal_char label. - Added some comments. - Return "Invalid character class name" instead of "Invalid - character class". - (regerror): Return messages for all POSIX error codes except - REG_ECOLLATE and REG_NEWLINE, along with all GNU error codes. - Added `break's after all cases. - (main): Call re_set_syntax instead of setting `obscure_syntax' - directly. - -Sat Jan 12 13:37:59 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (Copyright): Updated date. - (#include <sys/types.h>): Include unconditionally. - (RE_CANNOT_MATCH_NEWLINE): Deleted this syntax bit. - (RE_SYNTAX_POSIX_BASIC, RE_SYNTAX_POSIX_EXTENDED): Removed - setting the RE_ANCHOR_NOT_NEWLINE syntax bit from these. - Changed and added some comments. - (struct re_pattern_buffer): Changed some flags from chars to bits. - Added field `syntax'; holds which syntax pattern was compiled with. - Added bit flag `return_default_num_regs'. - (externs for GNU and Berkeley UNIX routines): Added `const's to - parameter types to be compatible with POSIX. - (#define const): Added to support old C compilers. - - * regex.c (Copyright): Updated date. - (enum regexpcode): Deleted `newline'. - (regex_compile): Renamed re_compile_pattern to this, added a - syntax parameter so it can set the pattern buffer's `syntax' - field. - Made `pattern', and `size' `const's so could pass to POSIX - interface routines; also made `const' whatever interval - variables had to be to make this work. - Changed references to `obscure_syntax' to new parameter `syntax'. - Deleted putting `newline' in buffer when see `\n'. - Consider invalid character classes which have nothing wrong - except the character class name; if so, return character-class error. - (is_char_class): Added routine for regex_compile. - (re_compile_pattern): added a new one which calls - regex_compile with `obscure_syntax' as the actual parameter - for the formal `syntax'. - Gave this the old routine's header comments. - Made `pattern', and `size' `const's so could use POSIX interface - routine parameters. - (re_search, re_search_2, re_match, re_match_2): Changed - `pbufp' to `bufp'. - (re_search_2, re_match_2): Changed `mstop' to `stop'. - (re_search, re_search_2): Made all parameters except `regs' - `const's so could use POSIX interface routines parameters. - (re_search_2): Added private copies of `const' parameters so - could change their values. - (re_match_2): Made all parameters except `regs' `const's so - could use POSIX interface routines parameters. - Changed `size1' and `size2' parameters to `size1_arg' and - `size2_arg' and so could change; added local `size1' and - `size2' and set to these. - Added some comments. - Deleted `newline' case. - `begline' can also possibly match if `d' contains a newline; - if it does, we have to increment d to point past the newline. - Replaced references to `obscure_syntax' with `bufp->syntax'. - (re_comp, re_exec): Made parameter `s' a `const' so could use POSIX - interface routines parameters. - Now call regex_compile, passing `obscure_syntax' via the - `syntax' parameter. - (re_exec): Made local `len' a `const' so could pass to re_search. - (regcomp): Added header comment. - Added local `syntax' to set and pass to regex_compile rather - than setting global `obscure_syntax' and passing it. - Call regex_compile with its `syntax' parameter rather than - re_compile_pattern. - Return REG_ECTYPE if character-class error. - (regexec): Don't initialize `regs' to anything. - Made `private_preg' a nonpointer so could set to what the - constant `preg' points. - Initialize `private_preg's `return_default_num_regs' field to - zero because want to return `nmatch' registers, not however - many there are subexpressions in the pattern. - Also test if `nmatch' > 0 to see if should pass re_match `regs'. - -Tue Jan 8 15:57:17 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (struct re_pattern_buffer): Reworded comment. - - * regex.c (EXTEND_BUFFER): Also reset beg_interval. - (re_search_2): Return val if val = -2. - (NUM_REG_ITEMS): Listed items in comment. - (NUM_OTHER_ITEMS): Defined this for using in > 1 definition. - (MAX_NUM_FAILURE_ITEMS): Replaced `+ 2' with NUM_OTHER_ITEMS. - (NUM_FAILURE_ITEMS): As with definition above and added to - comment. - (PUSH_FAILURE_POINT): Replaced `* 2's with `<< 1's. - (re_match_2): Test with equality with 1 to see pbufp->bol and - pbufp->eol are set. - -Fri Jan 4 15:07:22 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (struct re_pattern_buffer): Reordered some fields. - Updated some comments. - Added not_bol and not_eol fields. - (extern regcomp, regexec, regerror): Added return types. - (extern regfree): Added `extern'. - - * regex.c (min): Deleted unused macro. - (re_match_2): Compacted some code. - Removed call to macro `min' from `for' loop. - Fixed so unused registers get filled with -1's. - Fail if the pattern buffer's `not_bol' field is set and - encounter a `begline'. - Fail if the pattern buffer's `not_eol' field is set and - encounter a `endline'. - Deleted redundant check for empty stack in fail case. - Don't free pattern buffer's components in re_comp. - (regexec): Initialize variable regs. - Added `private_preg' pattern buffer so could set `not_bol' and - `not_eol' fields and hand to re_match. - Deleted naive attempt to detect anchors. - Set private pattern buffer's `not_bol' and `not_eol' fields - according to eflags value. - `nmatch' must also be > 0 for us to bother allocating - registers to send to re_match and filling pmatch - with their results after the call to re_match. - Send private pattern buffer instead of argument to re_match. - If use the registers, always free them and then set them to NULL. - (regerror): Added this Posix routine. - (regfree): Added this Posix routine. - -Tue Jan 1 15:02:45 1991 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NREGS): Deleted this definition, as now the user - can choose how many registers to have. - (REG_NOTBOL, REG_NOTEOL): Defined these Posix eflag bits. - (REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, - REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, - REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_ENEWLINE): - Defined these return values for Posix's regcomp and regexec. - Updated some comments. - (struct re_pattern_buffer): Now typedef this as regex_t - instead of the other way around. - (struct re_registers): Added num_regs field. Made start and - end fields pointers to char instead of fixed size arrays. - (regmatch_t): Added this Posix register type. - (regcomp, regexec, regerror, regfree): Added externs for these - Posix routines. - - * regex.c (enum boolean): Typedefed this. - (re_pattern_buffer): Reformatted some comments. - (re_compile_pattern): Updated some comments. - Always push start_memory and its attendant number whenever - encounter a group, not just when its number is less than the - previous maximum number of registers; same for stop_memory. - Get 4 bytes of buffer space instead of 2 when pushing a - set_number_at. - (can_match_nothing): Added this to elaborate on and replace - code in re_match_2. - (reg_info_type): Made can_match_nothing field a bit instead of int. - (MIN): Added for re_match_2. - (re_match_2 macros): Changed all `for' loops which used - RE_NREGS to now use num_internal_regs as upper bounds. - (MAX_NUM_FAILURE_ITEMS): Use num_internal_regs instead of RE_NREGS. - (POP_FAILURE_POINT): Added check for empty stack. - (FREE_VARIABLES): Added this to free (and set to NULL) - variables allocated in re_match_2. - (re_match_2): Rearranged parameters to be in order. - Added variables num_regs_wanted (how many registers the user wants) - and num_internal_regs (how many groups there are). - Allocated initial_stack, regstart, regend, old_regstart, - old_regend, reginfo, best_regstart, and best_regend---all - which used to be fixed size arrays. Free them all and return - -1 if any fail. - Free above variables if starting position pos isn't valid. - Changed all `for' loops which used RE_NREGS to now use - num_internal_regs as upper bounds---except for the loops which - fill regs; then use num_regs_wanted. - Allocate regs if the user has passed it and wants more than 0 - registers filled. - Set regs->start[i] and regs->end[i] to -1 if either - regstart[i] or regend[i] equals -1, not just the first. - Free allocated variables before returning. - Updated some comments. - (regcomp): Return REG_ESPACE, REG_BADPAT, REG_EPAREN when - appropriate. - Free translate array. - (regexec): Added this Posix interface routine. - -Mon Dec 24 14:21:13 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: If _POSIX_SOURCE is defined then #include <sys/types.h>. - Added syntax bit RE_CANNOT_MATCH_NEWLINE. - Defined Posix cflags: REG_EXTENDED, REG_NEWLINE, REG_ICASE, and - REG_NOSUB. - Added fields re_nsub and no_sub to struct re_pattern_buffer. - Typedefed regex_t to be `struct re_pattern_buffer'. - - * regex.c (CHAR_SET_SIZE): Defined this to be 256 and replaced - incidences of this value with this constant. - (re_compile_pattern): Added switch case for `\n' and put - `newline' into the pattern buffer when encounter this. - Increment the pattern_buffer's `re_nsub' field whenever open a - group. - (re_match_2): Match a newline with `newline'---provided the - syntax bit RE_CANNOT_MATCH_NEWLINE isn't set. - (regcomp): Added this Posix interface routine. - (enum test_type): Added interface_test tag. - (main): Added Posix interface test. - -Tue Dec 18 12:58:12 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (struct re_pattern_buffer): reformatted so would fit - in texinfo documentation. - -Thu Nov 29 15:49:16 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_EMPTY_ALTS): Added this bit. - (RE_SYNTAX_POSIX_EXTENDED): Added above bit. - - * regex.c (re_compile_pattern): Disallow empty alternatives only - when RE_NO_EMPTY_ALTS is set, not when RE_CONTEXTUAL_INVALID_OPS is. - Changed RE_NO_BK_CURLY_BRACES to RE_NO_BK_PARENS when testing - for empty groups at label handle_open. - At label handle_bar: disallow empty alternatives if RE_NO_EMPTY_ALTS - is set. - Rewrote some comments. - - (re_compile_fastmap): cleaned up code. - - (re_search_2): Rewrote comment. - - (struct register_info): Added field `inner_groups'; it records - which groups are inside of the current one. - Added field can_match_nothing; it's set if the current group - can match nothing. - Added field ever_match_something; it's set if current group - ever matched something. - - (INNER_GROUPS): Added macro to access inner_groups field of - struct register_info. - - (CAN_MATCH_NOTHING): Added macro to access can_match_nothing - field of struct register_info. - - (EVER_MATCHED_SOMETHING): Added macro to access - ever_matched_something field of struct register_info. - - (NOTE_INNER_GROUP): Defined macro to record that a given group - is inside of all currently active groups. - - (re_match_2): Added variables *p1 and mcnt2 (multipurpose). - Added old_regstart and old_regend arrays to hold previous - register values if they need be restored. - Initialize added fields and variables. - case start_memory: Find out if the group can match nothing. - Save previous register values in old_restart and old_regend. - Record that current group is inside of all currently active - groups. - If the group is inside a loop and it ever matched anything, - restore its registers to values before the last failed match. - Restore the registers for the inner groups, too. - case duplicate: Can back reference to a group that never - matched if it can match nothing. - -Thu Nov 29 11:12:54 1990 Karl Berry (karl at hayley) - - * regex.c (bcopy, ...): define these if either _POSIX_SOURCE or - STDC_HEADERS is defined; same for including <stdlib.h>. - -Sat Oct 6 16:04:55 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (struct re_pattern_buffer): Changed field comments. - - * regex.c (re_compile_pattern): Allow a `$' to precede an - alternation operator (`|' or `\|'). - Disallow `^' and/or `$' in empty groups if the syntax bit - RE_NO_EMPTY_GROUPS is set. - Wait until have parsed a valid `\{...\}' interval expression - before testing RE_CONTEXTUAL_INVALID_OPS to see if it's - invalidated by that. - Don't use RE_NO_BK_CURLY_BRACES to test whether or not a validly - parsed interval expression is invalid if it has no preceding re; - rather, use RE_CONTEXTUAL_INVALID_OPS. - If an interval parses, but there is no preceding regular - expression, yet the syntax bit RE_CONTEXTUAL_INDEP_OPS is set, - then that interval can match the empty regular expression; if - the bit isn't set, then the characters in the interval - expression are parsed as themselves (sans the backslashes). - In unfetch_interval case: Moved PATFETCH to above the test for - RE_NO_BK_CURLY_BRACES being set, which would force a goto - normal_backslash; the code at both normal_backsl and normal_char - expect a character in `c.' - -Sun Sep 30 11:13:48 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Changed some comments to use the terms used in the - documentation. - (RE_CONTEXTUAL_INDEP_OPS): Changed name from `RE_CONTEXT_INDEP_OPS'. - (RE_LISTS_NOT_NEWLINE): Changed name from `RE_HAT_NOT_NEWLINE.' - (RE_ANCHOR_NOT_NEWLINE): Added this syntax bit. - (RE_NO_EMPTY_GROUPS): Added this syntax bit. - (RE_NO_HYPHEN_RANGE_END): Deleted this syntax bit. - (RE_SYNTAX_...): Reformatted. - (RE_SYNTAX_POSIX_BASIC, RE_SYNTAX_EXTENDED): Added syntax bits - RE_ANCHOR_NOT_NEWLINE and RE_NO_EMPTY_GROUPS, and deleted - RE_NO_HYPHEN_RANGE_END. - (RE_SYNTAX_POSIX_EXTENDED): Added syntax bit RE_DOT_NOT_NULL. - - * regex.c (bcopy, bcmp, bzero): Define if _POSIX_SOURCE is defined. - (_POSIX_SOURCE): ifdef this, #include <stdlib.h> - (#ifdef emacs): Changed comment of the #endif for the its #else - clause to be `not emacs', not `emacs.' - (no_pop_jump): Changed name from `jump'. - (pop_failure_jump): Changed name from `finalize_jump.' - (maybe_pop_failure_jump): Changed name from `maybe_finalize_jump'. - (no_pop_jump_n): Changed name from `jump_n.' - (EXTEND_BUFFER): Use shift instead of multiplication to double - buf->allocated. - (DO_RANGE, recompile_pattern): Added macro to set the list bits - for a range. - (re_compile_pattern): Fixed grammar problems in some comments. - Checked that RE_NO_BK_VBAR is set to make `$' valid before a `|' - and not set to make it valid before a `\|'. - Checked that RE_NO_BK_PARENS is set to make `$' valid before a ')' - and not set to make it valid before a `\)'. - Disallow ranges starting with `-', unless the range is the - first item in a list, rather than disallowing ranges which end - with `-'. - Disallow empty groups if the syntax bit RE_NO_EMPTY_GROUPS is set. - Disallow nothing preceding `{' and `\{' if they represent the - open-interval operator and RE_CONTEXTUAL_INVALID_OPS is set. - (register_info_type): typedef-ed this using `struct register_info.' - (SET_REGS_MATCHED): Compacted the code. - (re_match_2): Made it fail if back reference a group which we've - never matched. - Made `^' not match a newline if the syntax bit - RE_ANCHOR_NOT_NEWLINE is set. - (really_fail): Added this label so could force a final fail that - would not try to use the failure stack to recover. - -Sat Aug 25 14:23:01 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_CONTEXTUAL_OPS): Changed name from RE_CONTEXT_OPS. - (global): Rewrote comments and rebroke some syntax #define lines. - - * regex.c (isgraph): Added definition for sequents. - (global): Now refer to character set lists as ``lists.'' - Rewrote comments containing ``\('' or ``\)'' to now refer to - ``groups.'' - (RE_CONTEXTUAL_OPS): Changed name from RE_CONTEXT_OPS. - - (re_compile_pattern): Expanded header comment. - -Sun Jul 15 14:50:25 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_CONTEX_INDEP_OPS): the comment's sense got turned - around when we changed how it read; changed it to be correct. - -Sat Jul 14 16:38:06 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_EMPTY_BK_REF): changed name to - RE_NO_MISSING_BK_REF, as this describes it better. - - * regex.c (re_compile_pattern): changed RE_NO_EMPTY_BK_REF - to RE_NO_MISSING_BK_REF, as above. - -Thu Jul 12 11:45:05 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h (RE_NO_EMPTY_BRACKETS): removed this syntax bit, as - bracket expressions should *never* be empty regardless of the - syntax. Removes this bit from RE_SYNTAX_POSIX_BASIC and - RE_SYNTAX_POSIX_EXTENDED. - - * regex.c (SET_LIST_BIT): in the comment, now refer to character - sets as (non)matching sets, as bracket expressions can now match - other things in addition to characters. - (re_compile_pattern): refer to groups as such instead of `\(...\)' - or somesuch, because groups can now be enclosed in either plain - parens or backslashed ones, depending on the syntax. - In the '[' case, added a boolean just_had_a_char_class to detect - whether or not a character class begins a range (which is invalid). - Restore way of breaking out of a bracket expression to original way. - Add way to detect a range if the last thing in a bracket - expression was a character class. - Took out check for c != ']' at the end of a character class in - the else clause, as it had already been checked in the if part - that also checked the validity of the string. - Set or clear just_had_a_char_class as appropriate. - Added some comments. Changed references to character sets to - ``(non)matching lists.'' - -Sun Jul 1 12:11:29 1990 Karl Berry (karl at hayley) - - * regex.h (BYTEWIDTH): moved back to regex.c. - - * regex.h (re_compile_fastmap): removed declaration; this - shouldn't be advertised. - -Mon May 28 15:27:53 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (ifndef Sword): Made comments more specific. - (global): include <stdio.h> so can write fatal messages on - standard error. Replaced calls to assert with fprintfs to - stderr and exit (1)'s. - (PREFETCH): Reformatted to make more readable. - (AT_STRINGS_BEG): Defined to test if we're at the beginning of - the virtual concatenation of string1 and string2. - (AT_STRINGS_END): Defined to test if at the end of the virtual - concatenation of string1 and string2. - (AT_WORD_BOUNDARY): Defined to test if are at a word boundary. - (IS_A_LETTER(d)): Defined to test if the contents of the pointer D - is a letter. - (re_match_2): Rewrote the wordbound, notwordbound, wordbeg, wordend, - begbuf, and endbuf cases in terms of the above four new macros. - Called SET_REGS_MATCHED in the matchsyntax, matchnotsyntax, - wordchar, and notwordchar cases. - -Mon May 14 14:49:13 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (re_search_2): Fixed RANGE to not ever take STARTPOS - outside of virtual concatenation of STRING1 and STRING2. - Updated header comment as to this. - (re_match_2): Clarified comment about MSTOP in header. - -Sat May 12 15:39:00 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (re_search_2): Checked for out-of-range STARTPOS. - Added comments. - When searching backwards, not only get the character with which - to compare to the fastmap from string2 if the starting position - >= size1, but also if size1 is zero; this is so won't get a - segmentation fault if string1 is null. - Reformatted code at label advance. - -Thu Apr 12 20:26:21 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Added #pragma once and #ifdef...endif __REGEXP_LIBRARY. - (RE_EXACTN_VALUE): Added for search.c to use. - Reworded some comments. - - regex.c: Punctuated some comments correctly. - (NULL): Removed this. - (RE_EXACTN_VALUE): Added for search.c to use. - (<ctype.h>): Moved this include to top of file. - (<assert.h>): Added this include. - (struct regexpcode): Assigned 0 to unused and 1 to exactn - because of RE_EXACTN_VALUE. - Added comment. - (various macros): Lined up backslashes near end of line. - (insert_jump): Cleaned up the header comment. - (re_search): Corrected the header comment. - (re_search_2): Cleaned up and completed the header comment. - (re_max_failures): Updated comment. - (struct register_info): Constructed as bits so as to save space - on the stack when pushing register information. - (IS_ACTIVE): Macro for struct register_info. - (MATCHED_SOMETHING): Macro for struct register_info. - (NUM_REG_ITEMS): How many register information items for each - register we have to push on the stack at each failure. - (MAX_NUM_FAILURE_ITEMS): If push all the registers on failure, - this is how many items we push on the stack. - (PUSH_FAILURE_POINT): Now pushes whether or not the register is - currently active, and whether or not it matched something. - Checks that there's enough space allocated to accomodate all the - items we currently want to push. (Before, a test for an empty - stack sufficed because we always pushed and popped the same - number of items). - Replaced ``2'' with MAX_NUM_FAILURE_POINTS when ``2'' refers - to how many things get pushed on the stack each time. - When copy the stack into the newly allocated storage, now only copy - the area in use. - Clarified comment. - (POP_FAILURE_POINT): Defined to use in places where put number - of registers on the stack into a variable before using it to - decrement the stack, so as to not confuse the compiler. - (IS_IN_FIRST_STRING): Defined to check if a pointer points into - the first string. - (SET_REGS_MATCHED): Changed to use the struct register_info - bits; also set the matched-something bit to false if the - register isn't currently active. (This is a redundant setting.) - (re_match_2): Cleaned up and completed the header comment. - Updated the failure stack comment. - Replaced the ``2'' with MAX_NUM_FAILURE_ITEMS in the static - allocation of initial_stack, because now more than two (now up - to MAX_FAILURE_ITEMS) items get pushed on the failure stack each - time. - Ditto for stackb. - Trashed restart_seg1, regend_seg1, best_regstart_seg1, and - best_regend_seg1 because they could have erroneous information - in them, such as when matching ``a'' (in string1) and ``ab'' (in - string2) with ``(a)*ab''; before using IS_IN_FIRST_STRING to see - whether or not the register starts or ends in string1, - regstart[1] pointed past the end of string1, yet regstart_seg1 - was 0! - Added variable reg_info of type struct register_info to keep - track of currently active registers and whether or not they - currently match anything. - Commented best_regs_set. - Trashed reg_active and reg_matched_something and put the - information they held into reg_info; saves space on the stack. - Replaced NULL with '\000'. - In begline case, compacted the code. - Used assert to exit if had an internal error. - In begbuf case, because now force the string we're working on - into string2 if there aren't two strings, now allow d == string2 - if there is no string1 (and the check for that is size1 == 0!); - also now succeeds if there aren't any strings at all. - (main, ifdef canned): Put test type into a variable so could - change it while debugging. - -Sat Mar 24 12:24:13 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (GET_UNSIGNED_NUMBER): Deleted references to num_fetches. - (re_compile_pattern): Deleted num_fetches because could keep - track of the number of fetches done by saving a pointer into the - pattern. - Added variable beg_interval to be used as a pointer, as above. - Assert that beg_interval points to something when it's used as above. - Initialize succeed_n's to lower_bound because re_compile_fastmap - needs to know it. - (re_compile_fastmap): Deleted unnecessary variable is_a_jump_n. - Added comment. - (re_match_2): Put number of registers on the stack into a - variable before using it to decrement the stack, so as to not - confuse the compiler. - Updated comments. - Used error routine instead of printf and exit. - In exactn case, restored longer code from ``original'' regex.c - which doesn't test translate inside a loop. - - * regex.h: Moved #define NULL and the enum regexpcode definition - and to regex.c. Changed some comments. - - regex.c (global): Updated comments about compiling and for the - re_compile_pattern jump routines. - Added #define NULL and the enum regexpcode definition (from - regex.h). - (enum regexpcode): Added set_number_at to reset the n's of - succeed_n's and jump_n's. - (re_set_syntax): Updated its comment. - (re_compile_pattern): Moved its heading comment to after its macros. - Moved its include statement to the top of the file. - Commented or added to comments of its macros. - In start_memory case: Push laststart value before adding - start_memory and its register number to the buffer, as they - might not get added. - Added code to put a set_number_at before each succeed_n and one - after each jump_n; rewrote code in what seemed a more - straightforward manner to put all these things in the pattern so - the succeed_n's would correctly jump to the set_number_at's of - the matching jump_n's, and so the jump_n's would correctly jump - to after the set_number_at's of the matching succeed_n's. - Initialize succeed_n n's to -1. - (insert_op_2): Added this to insert an operation followed by - two integers. - (re_compile_fastmap): Added set_number_at case. - (re_match_2): Moved heading comment to after macros. - Added mention of REGS to heading comment. - No longer turn a succeed_n with n = 0 into an on_failure_jump, - because n needs to be reset each time through a loop. - Check to see if a succeed_n's n is set by its set_number_at. - Added set_number_at case. - Updated some comments. - (main): Added another main to run posix tests, which is compiled - ifdef both test and canned. (Old main is still compiled ifdef - test only). - -Tue Mar 19 09:22:55 1990 Kathy Hargreaves (kathy at hayley) - - * regex.[hc]: Change all instances of the word ``legal'' to - ``valid'' and all instances of ``illegal'' to ``invalid.'' - -Sun Mar 4 12:11:31 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Added syntax bit RE_NO_EMPTY_RANGES which is set if - an ending range point has to collate higher or equal to the - starting range point. - Added syntax bit RE_NO_HYPHEN_RANGE_END which is set if a hyphen - can't be an ending range point. - Set to two above bits in RE_SYNTAX_POSIX_BASIC and - RE_SYNTAX_POSIX_EXTENDED. - - regex.c: (re_compile_pattern): Don't allow empty ranges if the - RE_NO_EMPTY_RANGES syntax bit is set. - Don't let a hyphen be a range end if the RE_NO_HYPHEN_RANGE_END - syntax bit is set. - (ESTACK_PUSH_2): renamed this PUSH_FAILURE_POINT and made it - push all the used registers on the stack, as well as the number - of the highest numbered register used, and (as before) the two - failure points. - (re_match_2): Fixed up comments. - Added arrays best_regstart[], best_regstart_seg1[], best_regend[], - and best_regend_seg1[] to keep track of the best match so far - whenever reach the end of the pattern but not the end of the - string, and there are still failure points on the stack with - which to backtrack; if so, do the saving and force a fail. - If reach the end of the pattern but not the end of the string, - but there are no more failure points to try, restore the best - match so far, set the registers and return. - Compacted some code. - In stop_memory case, if the subexpression we've just left is in - a loop, push onto the stack the loop's on_failure_jump failure - point along with the current pointer into the string (d). - In finalize_jump case, in addition to popping the failure - points, pop the saved registers. - In the fail case, restore the registers, as well as the failure - points. - -Sun Feb 18 15:08:10 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c: (global): Defined a macro GET_BUFFER_SPACE which - makes sure you have a specified number of buffer bytes - allocated. - Redefined the macro BUFPUSH to use this. - Added comments. - - (re_compile_pattern): Call GET_BUFFER_SPACE before storing or - inserting any jumps. - - (re_match_2): Set d to string1 + pos and dend to end_match_1 - only if string1 isn't null. - Force exit from a loop if it's around empty parentheses. - In stop_memory case, if found some jumps, increment p2 before - extracting address to which to jump. Also, don't need to know - how many more times can jump_n. - In begline case, d must equal string1 or string2, in that order, - only if they are not null. - In maybe_finalize_jump case, skip over start_memorys' and - stop_memorys' register numbers, too. - -Thu Feb 15 15:53:55 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c (BUFPUSH): off by one goof in deciding whether to - EXTEND_BUFFER. - -Wed Jan 24 17:07:46 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Moved definition of NULL to here. - Got rid of ``In other words...'' comment. - Added to some comments. - - regex.c: (re_compile_pattern): Tried to bulletproof some code, - i.e., checked if backward references (e.g., p[-1]) were within - the range of pattern. - - (re_compile_fastmap): Fixed a bug in succeed_n part where was - getting the amount to jump instead of how many times to jump. - - (re_search_2): Changed the name of the variable ``total'' to - ``total_size.'' - Condensed some code. - - (re_match_2): Moved the comment about duplicate from above the - start_memory case to above duplicate case. - - (global): Rewrote some comments. - Added commandline arguments to testing. - -Wed Jan 17 11:47:27 1990 Kathy Hargreaves (kathy at hayley) - - * regex.c: (global): Defined a macro STORE_NUMBER which stores a - number into two contiguous bytes. Also defined STORE_NUMBER_AND_INCR - which does the same thing and then increments the pointer to the - storage place to point after the number. - Defined a macro EXTRACT_NUMBER which extracts a number from two - continguous bytes. Also defined EXTRACT_NUMBER_AND_INCR which - does the same thing and then increments the pointer to the - source to point to after where the number was. - -Tue Jan 16 12:09:19 1990 Kathy Hargreaves (kathy at hayley) - - * regex.h: Incorporated rms' changes. - Defined RE_NO_BK_REFS syntax bit which is set when want to - interpret back reference patterns as literals. - Defined RE_NO_EMPTY_BRACKETS syntax bit which is set when want - empty bracket expressions to be illegal. - Defined RE_CONTEXTUAL_ILLEGAL_OPS syntax bit which is set when want - it to be illegal for *, +, ? and { to be first in an re or come - immediately after a | or a (, and for ^ not to appear in a - nonleading position and $ in a nontrailing position (outside of - bracket expressions, that is). - Defined RE_LIMITED_OPS syntax bit which is set when want +, ? - and | to always be literals instead of ops. - Fixed up the Posix syntax. - Changed the syntax bit comments from saying, e.g., ``0 means...'' - to ``If this bit is set, it means...''. - Changed the syntax bit defines to use shifts instead of integers. - - * regex.c: (global): Incorporated rms' changes. - - (re_compile_pattern): Incorporated rms' changes - Made it illegal for a $ to appear anywhere but inside a bracket - expression or at the end of an re when RE_CONTEXTUAL_ILLEGAL_OPS - is set. Made the same hold for $ except it has to be at the - beginning of an re instead of the end. - Made the re "[]" illegal if RE_NO_EMPTY_BRACKETS is set. - Made it illegal for | to be first or last in an re, or immediately - follow another | or a (. - Added and embellished some comments. - Allowed \{ to be interpreted as a literal if RE_NO_BK_CURLY_BRACES - is set. - Made it illegal for *, +, ?, and { to appear first in an re, or - immediately follow a | or a ( when RE_CONTEXTUAL_ILLEGAL_OPS is set. - Made back references interpreted as literals if RE_NO_BK_REFS is set. - Made recursive intervals either illegal (if RE_NO_BK_CURLY_BRACES - isn't set) or interpreted as literals (if is set), if RE_INTERVALS - is set. - Made it treat +, ? and | as literals if RE_LIMITED_OPS is set. - Cleaned up some code. - -Thu Dec 21 15:31:32 1989 Kathy Hargreaves (kathy at hayley) - - * regex.c: (global): Moved RE_DUP_MAX to regex.h and made it - equal 2^15 - 1 instead of 1000. - Defined NULL to be zero. - Moved the definition of BYTEWIDTH to regex.h. - Made the global variable obscure_syntax nonstatic so the tests in - another file could use it. - - (re_compile_pattern): Defined a maximum length (CHAR_CLASS_MAX_LENGTH) - for character class strings (i.e., what's between the [: and the - :]'s). - Defined a macro SET_LIST_BIT(c) which sets the bit for C in a - character set list. - Took out comments that EXTEND_BUFFER clobbers C. - Made the string "^" match itself, if not RE_CONTEXT_IND_OPS. - Added character classes to bracket expressions. - Change the laststart pointer saved with the start of each - subexpression to point to start_memory instead of after the - following register number. This is because the subexpression - might be in a loop. - Added comments and compacted some code. - Made intervals only work if preceded by an re matching a single - character or a subexpression. - Made back references to nonexistent subexpressions illegal if - using POSIX syntax. - Made intervals work on the last preceding character of a - concatenation of characters, e.g., ab{0,} matches abbb, not abab. - Moved macro PREFETCH to outside the routine. - - (re_compile_fastmap): Added succeed_n to work analogously to - on_failure_jump if n is zero and jump_n to work analogously to - the other backward jumps. - - (re_match_2): Defined macro SET_REGS_MATCHED to set which - current subexpressions had matches within them. - Changed some comments. - Added reg_active and reg_matched_something arrays to keep track - of in which subexpressions currently have matched something. - Defined MATCHING_IN_FIRST_STRING and replaced ``dend == end_match_1'' - with it to make code easier to understand. - Fixed so can apply * and intervals to arbitrarily nested - subexpressions. (Lots of previous bugs here.) - Changed so won't match a newline if syntax bit RE_DOT_NOT_NULL is set. - Made the upcase array nonstatic so the testing file could use it also. - - (main.c): Moved the tests out to another file. - - (tests.c): Moved all the testing stuff here. - -Sat Nov 18 19:30:30 1989 Kathy Hargreaves (kathy at hayley) - - * regex.c: (re_compile_pattern): Defined RE_DUP_MAX, the maximum - number of times an interval can match a pattern. - Added macro GET_UNSIGNED_NUMBER (used to get below): - Added variables lower_bound and upper_bound for upper and lower - bounds of intervals. - Added variable num_fetches so intervals could do backtracking. - Added code to handle '{' and "\{" and intervals. - Added to comments. - - (store_jump_n): (Added) Stores a jump with a number following the - relative address (for intervals). - - (insert_jump_n): (Added) Inserts a jump_n. - - (re_match_2): Defined a macro ESTACK_PUSH_2 for the error stack; - it checks for overflow and reallocates if necessary. - - * regex.h: Added bits (RE_INTERVALS and RE_NO_BK_CURLY_BRACES) - to obscure syntax to indicate whether or not - a syntax handles intervals and recognizes either \{ and - \} or { and } as operators. Also added two syntaxes - RE_SYNTAX_POSIX_BASIC and RE_POSIX_EXTENDED and two command codes - to the enumeration regexpcode; they are succeed_n and jump_n. - -Sat Nov 18 19:30:30 1989 Kathy Hargreaves (kathy at hayley) - - * regex.c: (re_compile_pattern): Defined INIT_BUFF_SIZE to get rid - of repeated constants in code. Tested with value 1. - Renamed PATPUSH as BUFPUSH, since it pushes things onto the - buffer, not the pattern. Also made this macro extend the buffer - if it's full (so could do the following): - Took out code at top of loop that checks to see if buffer is going - to be full after 10 additions (and reallocates if necessary). - - (insert_jump): Rearranged declaration lines so comments would read - better. - - (re_match_2): Compacted exactn code and added more comments. - - (main): Defined macros TEST_MATCH and MATCH_SELF to do - testing; took out loop so could use these instead. - -Tue Oct 24 20:57:18 1989 Kathy Hargreaves (kathy at hayley) - - * regex.c (re_set_syntax): Gave argument `syntax' a type. - (store_jump, insert_jump): made them void functions. - -Local Variables: -mode: indented-text -left-margin: 8 -version-control: never -End: diff --git a/gnu/libregex/INSTALL b/gnu/libregex/INSTALL deleted file mode 100644 index 014e0f728ad5..000000000000 --- a/gnu/libregex/INSTALL +++ /dev/null @@ -1,117 +0,0 @@ -This is a generic INSTALL file for utilities distributions. -If this package does not come with, e.g., installable documentation or -data files, please ignore the references to them below. - -To compile this package: - -1. Configure the package for your system. In the directory that this -file is in, type `./configure'. If you're using `csh' on an old -version of System V, you might need to type `sh configure' instead to -prevent `csh' from trying to execute `configure' itself. - -The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation, and -creates the Makefile(s) (one in each subdirectory of the source -directory). In some packages it creates a C header file containing -system-dependent definitions. It also creates a file `config.status' -that you can run in the future to recreate the current configuration. - -Running `configure' takes a minute or two. While it is running, it -prints some messages that tell what it is doing. If you don't want to -see the messages, run `configure' with its standard output redirected -to `/dev/null'; for example, `./configure >/dev/null'. - -To compile the package in a different directory from the one -containing the source code, you must use a version of `make' that -supports the VPATH variable, such as GNU `make'. `cd' to the directory -where you want the object files and executables to go and run -`configure'. `configure' automatically checks for the source code in -the directory that `configure' is in and in `..'. If for some reason -`configure' is not in the source code directory that you are -configuring, then it will report that it can't find the source code. -In that case, run `configure' with the option `--srcdir=DIR', where -DIR is the directory that contains the source code. - -By default, `make install' will install the package's files in -/usr/local/bin, /usr/local/lib, /usr/local/man, etc. You can specify -an installation prefix other than /usr/local by giving `configure' the -option `--prefix=PATH'. Alternately, you can do so by giving a value -for the `prefix' variable when you run `make', e.g., - make prefix=/usr/gnu - -You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If -you give `configure' the option `--exec-prefix=PATH' or set the -`make' variable `exec_prefix' to PATH, the package will use PATH as -the prefix for installing programs and libraries. Data files and -documentation will still use the regular prefix. Normally, all files -are installed using the regular prefix. - -Another `configure' option is useful mainly in `Makefile' rules for -updating `config.status' and `Makefile'. The `--no-create' option -figures out the configuration for your system and records it in -`config.status', without actually configuring the package (creating -`Makefile's and perhaps a configuration header file). Later, you can -run `./config.status' to actually configure the package. You can also -give `config.status' the `--recheck' option, which makes it re-run -`configure' with the same arguments you used before. This option is -useful if you change `configure'. - -Some packages pay attention to `--with-PACKAGE' options to `configure', -where PACKAGE is something like `gnu-libc' or `x' (for the X Window System). -The README should mention any --with- options that the package recognizes. - -`configure' ignores any other arguments that you give it. - -If your system requires unusual options for compilation or linking -that `configure' doesn't know about, you can give `configure' initial -values for some variables by setting them in the environment. In -Bourne-compatible shells, you can do that on the command line like -this: - CC='gcc -traditional' DEFS=-D_POSIX_SOURCE ./configure - -The `make' variables that you might want to override with environment -variables when running `configure' are: - -(For these variables, any value given in the environment overrides the -value that `configure' would choose:) -CC C compiler program. - Default is `cc', or `gcc' if `gcc' is in your PATH. -INSTALL Program to use to install files. - Default is `install' if you have it, `cp' otherwise. - -(For these variables, any value given in the environment is added to -the value that `configure' chooses:) -DEFS Configuration options, in the form `-Dfoo -Dbar ...' - Do not use this variable in packages that create a - configuration header file. -LIBS Libraries to link with, in the form `-lfoo -lbar ...' - -If you need to do unusual things to compile the package, we encourage -you to figure out how `configure' could check whether to do them, and -mail diffs or instructions to the address given in the README so we -can include them in the next release. - -2. Type `make' to compile the package. If you want, you can override -the `make' variables CFLAGS and LDFLAGS like this: - - make CFLAGS=-O2 LDFLAGS=-s - -3. If the package comes with self-tests and you want to run them, -type `make check'. If you're not sure whether there are any, try it; -if `make' responds with something like - make: *** No way to make target `check'. Stop. -then the package does not come with self-tests. - -4. Type `make install' to install programs, data files, and -documentation. - -5. You can remove the program binaries and object files from the -source directory by typing `make clean'. To also remove the -Makefile(s), the header file containing system-dependent definitions -(if the package uses one), and `config.status' (all the files that -`configure' created), type `make distclean'. - -The file `configure.in' is used as a template to create `configure' by -a program called `autoconf'. You will only need it if you want to -regenerate `configure' using a newer version of `autoconf'. diff --git a/gnu/libregex/Makefile b/gnu/libregex/Makefile deleted file mode 100644 index b0c6819a214c..000000000000 --- a/gnu/libregex/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# $Header: /a/cvs/386BSD/src/gnu/libregex/Makefile,v 1.3 1993/08/23 05:26:19 rgrimes Exp $ - -LIB= gnuregex -CFLAGS+=-DHAVE_STRING_H=1 -SRCS= regex.c -NOMAN= noman - -beforeinstall: - install -c -o ${BINOWN} -g ${BINGRP} -m 444 ${.CURDIR}/regex.h \ - ${DESTDIR}/usr/include - -.include <bsd.lib.mk> diff --git a/gnu/libregex/Makefile.gnu b/gnu/libregex/Makefile.gnu deleted file mode 100644 index 0976aa8bdc48..000000000000 --- a/gnu/libregex/Makefile.gnu +++ /dev/null @@ -1,99 +0,0 @@ -# Generated automatically from Makefile.in by configure. -# Makefile for regex. -# -# Copyright (C) 1992, 1993 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -version = 0.12 - -# You can define CPPFLAGS on the command line. Aside from system-specific -# flags, you can define: -# -DREGEX_MALLOC to use malloc/realloc/free instead of alloca. -# -DDEBUG to enable the compiled pattern disassembler and execution -# tracing; code runs substantially slower. -# -DEXTRACT_MACROS to use the macros EXTRACT_* (as opposed to -# the corresponding C procedures). If not -DDEBUG, the macros -# are used. -CPPFLAGS = - -# Likewise, you can override CFLAGS to optimize, use -Wall, etc. -CFLAGS = -g - -# Ditto for LDFLAGS and LOADLIBES. -LDFLAGS = -LOADLIBES = - -srcdir = . -VPATH = . - -CC = gcc -DEFS = -DHAVE_STRING_H=1 - -SHELL = /bin/sh - -subdirs = doc test - -default all:: regex.o -.PHONY: default all - -regex.o: regex.c regex.h - $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) -I. -I$(srcdir) -c $< - -clean mostlyclean:: - rm -f *.o - -distclean:: clean - rm -f Makefile config.status - -extraclean:: distclean - rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out - -configure: configure.in - autoconf - -config.status: configure - sh configure --no-create - -Makefile: Makefile.in config.status - sh config.status - -makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' \ -DEFS='$(DEFS)' LDFLAGS='$(LDFLAGS)' LOADLIBES='$(LOADLIBES)' - -default all install \ -mostlyclean clean distclean extraclean realclean \ -TAGS check:: - for d in $(subdirs); do (cd $$d; $(MAKE) $(makeargs) $@); done -.PHONY: install mostlyclean clean distclean extraclean realclean TAGS check - -# Prevent GNU make 3 from overflowing arg limit on system V. -.NOEXPORT: - -distfiles = AUTHORS ChangeLog COPYING INSTALL NEWS README \ - *.in configure regex.c regex.h -distdir = regex-$(version) -distargs = version=$(version) distdir=../$(distdir)/$$d -dist: TAGS configure - @echo "Version numbers in: Makefile.in, ChangeLog, NEWS," - @echo " regex.c, regex.h," - @echo " and doc/xregex.texi (if modified)." - rm -rf $(distdir) - mkdir $(distdir) - ln $(distfiles) $(distdir) - for d in $(subdirs); do (cd $$d; $(MAKE) $(distargs) dist); done - tar czhf $(distdir).tar.Z $(distdir) - rm -rf $(distdir) -.PHONY: dist diff --git a/gnu/libregex/Makefile.in b/gnu/libregex/Makefile.in deleted file mode 100644 index 836e6de0ba5a..000000000000 --- a/gnu/libregex/Makefile.in +++ /dev/null @@ -1,98 +0,0 @@ -# Makefile for regex. -# -# Copyright (C) 1992, 1993 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -version = 0.12 - -# You can define CPPFLAGS on the command line. Aside from system-specific -# flags, you can define: -# -DREGEX_MALLOC to use malloc/realloc/free instead of alloca. -# -DDEBUG to enable the compiled pattern disassembler and execution -# tracing; code runs substantially slower. -# -DEXTRACT_MACROS to use the macros EXTRACT_* (as opposed to -# the corresponding C procedures). If not -DDEBUG, the macros -# are used. -CPPFLAGS = - -# Likewise, you can override CFLAGS to optimize, use -Wall, etc. -CFLAGS = -g - -# Ditto for LDFLAGS and LOADLIBES. -LDFLAGS = -LOADLIBES = - -srcdir = @srcdir@ -VPATH = @srcdir@ - -CC = @CC@ -DEFS = @DEFS@ - -SHELL = /bin/sh - -subdirs = doc test - -default all:: regex.o -.PHONY: default all - -regex.o: regex.c regex.h - $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) -I. -I$(srcdir) -c $< - -clean mostlyclean:: - rm -f *.o - -distclean:: clean - rm -f Makefile config.status - -extraclean:: distclean - rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out - -configure: configure.in - autoconf - -config.status: configure - sh configure --no-create - -Makefile: Makefile.in config.status - sh config.status - -makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' \ -DEFS='$(DEFS)' LDFLAGS='$(LDFLAGS)' LOADLIBES='$(LOADLIBES)' - -default all install \ -mostlyclean clean distclean extraclean realclean \ -TAGS check:: - for d in $(subdirs); do (cd $$d; $(MAKE) $(makeargs) $@); done -.PHONY: install mostlyclean clean distclean extraclean realclean TAGS check - -# Prevent GNU make 3 from overflowing arg limit on system V. -.NOEXPORT: - -distfiles = AUTHORS ChangeLog COPYING INSTALL NEWS README \ - *.in configure regex.c regex.h -distdir = regex-$(version) -distargs = version=$(version) distdir=../$(distdir)/$$d -dist: TAGS configure - @echo "Version numbers in: Makefile.in, ChangeLog, NEWS," - @echo " regex.c, regex.h," - @echo " and doc/xregex.texi (if modified)." - rm -rf $(distdir) - mkdir $(distdir) - ln $(distfiles) $(distdir) - for d in $(subdirs); do (cd $$d; $(MAKE) $(distargs) dist); done - tar czhf $(distdir).tar.Z $(distdir) - rm -rf $(distdir) -.PHONY: dist diff --git a/gnu/libregex/NEWS b/gnu/libregex/NEWS deleted file mode 100644 index b3a899b4568c..000000000000 --- a/gnu/libregex/NEWS +++ /dev/null @@ -1,62 +0,0 @@ -Version 0.12 - -* regex.c does not #define bcmp/bcopy/bzero if they already are. - -* regex.h does not redefine `const' if it is already defined, even if - __STDC__ is not defined. - -* RE_SYNTAX_ED added (same as POSIX BRE's). - -* The following bugs have been fixed, among others: - * The pattern \w+ doesn't infinite loop. - * The pattern ".+\n" is compiled correctly. - * Expressions with more than MAX_REGNUM groups are compiled correctly. - -* Patterns that end in a repetition operator (e.g., `*') match - slightly faster if no looping is actually necessary. - -Version 0.11 (17 Sep 92) - -* Back-references to nonexistent subexpressions, as in the r.e. `abc\1', - are always invalid. Previously, they could match the literal digit, - e.g., the stated r.e. might have matched `abc1'. - -* Empty subexpressions are always valid (POSIX leaves this undefined). - -* Simplified rules for ^ and $ being anchors. - -* One minor speedup (rewriting the C procedure `pop_failure_point' as a - macro again). - -* Bug fixes involving: - - Declarations in regex.h and non-ANSI compilers. - - Bracket expressions with characters between 0x80-0xff. - - Memory leak in re_match_2 on systems requiring `alloca (0)' to - free alloca'd storage. - -* Test and documentation files moved into subdirectories. - -Version 0.10 (9 Sep 92) - -* `obscure_syntax' is now called `re_default_syntax'. - -* `re_comp's return type is no longer `const', for compatibility with BSD. - -* POSIX syntaxes now include as much functionality as possible - (consistent with the standard). - -* Compilation conditionals normalized to what the rest of GNU is - migrating towards these days. - -* Bug fixes involving: - - Ranges with characters between 0x80 and 0xff, e.g., [\001-\377]. - - `re_compile_fastmap' and the sequence `.*\n'. - - Intervals with exact counts, e.g., a{5}. - -* Changed distribution to use a standard Makefile, install the info - files, use a configure script, etc. - -Version 0.9 - -* The longest match was not always chosen: `a*|ab' didn't match `aab'. - diff --git a/gnu/libregex/README b/gnu/libregex/README deleted file mode 100644 index 918e1a03b821..000000000000 --- a/gnu/libregex/README +++ /dev/null @@ -1,60 +0,0 @@ -This directory contains the GNU regex library. It is compliant with -POSIX.2, except for internationalization features. - -See the file NEWS for a list of major changes in the current release. - -See the file INSTALL for compilation instructions. (The only thing -installed is the documentation; regex.c is compiled into regex.o, but -not installed anywhere.) - -The subdirectory `doc' contains a (programmers') manual for the library. -It's probably out-of-date. Improvements are welcome. - -The subdirectory `test' contains the various tests we've written. - -We know this code is not as fast as it might be. If you have specific -suggestions, profiling results, or other such useful information to -report, please do. - -Emacs 18 is not going use this revised regex (but Emacs 19 will). If -you want to try it with Emacs 18, apply the patch at the end of this -file first. - -Mail bug reports to bug-gnu-utils@prep.ai.mit.edu. - -Please include an actual regular expression that fails (and the syntax -used to compile it); without that, there's no way to reproduce the bug, -so there's no way we can fix it. Even if you include a patch, also -include the regular expression in error; otherwise, we can't know for -sure what you're trying to fix. - -Here is the patch to make this version of regex work with Emacs 18. - -*** ORIG/search.c Tue Jan 8 13:04:55 1991 ---- search.c Sun Jan 5 10:57:00 1992 -*************** -*** 25,26 **** ---- 25,28 ---- - #include "commands.h" -+ -+ #include <sys/types.h> - #include "regex.h" -*************** -*** 477,479 **** - /* really needed. */ -! && *(searchbuf.buffer) == (char) exactn /* first item is "exact match" */ - && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */ ---- 479,482 ---- - /* really needed. */ -! /* first item is "exact match" */ -! && *(searchbuf.buffer) == (char) RE_EXACTN_VALUE - && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */ -*************** -*** 1273,1275 **** - searchbuf.allocated = 100; -! searchbuf.buffer = (char *) malloc (searchbuf.allocated); - searchbuf.fastmap = search_fastmap; ---- 1276,1278 ---- - searchbuf.allocated = 100; -! searchbuf.buffer = (unsigned char *) malloc (searchbuf.allocated); - searchbuf.fastmap = search_fastmap; diff --git a/gnu/libregex/VERSION b/gnu/libregex/VERSION deleted file mode 100644 index 7182be23ebdd..000000000000 --- a/gnu/libregex/VERSION +++ /dev/null @@ -1,3 +0,0 @@ -GNU regex version 0.12 - -complete, unmodified regex sources are available from prep.ai.mit.edu. diff --git a/gnu/libregex/config.status b/gnu/libregex/config.status deleted file mode 100644 index 1b276aacf34c..000000000000 --- a/gnu/libregex/config.status +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/sh -# Generated automatically by configure. -# Run this file to recreate the current configuration. -# This directory was configured as follows, -# on host sun-lamp.cs.berkeley.edu: -# -# configure - -for arg -do - case "$arg" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - exec /bin/sh configure ;; - *) echo "Usage: config.status --recheck" 2>&1; exit 1 ;; - esac -done - -trap 'rm -f Makefile doc/Makefile test/Makefile; exit 1' 1 3 15 -CC='gcc' -INSTALL='/usr/bin/install -c' -INSTALL_PROGRAM='$(INSTALL)' -INSTALL_DATA='$(INSTALL) -m 644' -CPP='${CC-cc} -E' -ALLOCA='' -LIBS='' -srcdir='.' -DEFS=' -DHAVE_STRING_H=1' -prefix='/usr' -exec_prefix='${prefix}' -prsub='s%^prefix\([ ]*\)=\([ ]*\).*$%prefix\1=\2/usr% -s%^exec_prefix\([ ]*\)=\([ ]*\).*$%exec_prefix\1=\2${prefix}%' - -top_srcdir=$srcdir -for file in .. Makefile doc/Makefile test/Makefile; do if [ "x$file" != "x.." ]; then - srcdir=$top_srcdir - # Remove last slash and all that follows it. Not all systems have dirname. - dir=`echo $file|sed 's%/[^/][^/]*$%%'` - if test "$dir" != "$file"; then - test "$top_srcdir" != . && srcdir=$top_srcdir/$dir - test ! -d $dir && mkdir $dir - fi - echo creating $file - rm -f $file - echo "# Generated automatically from `echo $file|sed 's|.*/||'`.in by configure." > $file - sed -e " -$prsub -s%@CC@%$CC%g -s%@INSTALL@%$INSTALL%g -s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g -s%@INSTALL_DATA@%$INSTALL_DATA%g -s%@CPP@%$CPP%g -s%@ALLOCA@%$ALLOCA%g -s%@LIBS@%$LIBS%g -s%@srcdir@%$srcdir%g -s%@DEFS@%$DEFS% -" $top_srcdir/${file}.in >> $file -fi; done - -exit 0 diff --git a/gnu/libregex/configure b/gnu/libregex/configure deleted file mode 100644 index 29c5b803c329..000000000000 --- a/gnu/libregex/configure +++ /dev/null @@ -1,462 +0,0 @@ -#!/bin/sh -# Guess values for system-dependent variables and create Makefiles. -# Generated automatically using autoconf. -# Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -# Usage: configure [--srcdir=DIR] [--host=HOST] [--gas] [--nfp] [--no-create] -# [--prefix=PREFIX] [--exec-prefix=PREFIX] [--with-PACKAGE] [TARGET] -# Ignores all args except --srcdir, --prefix, --exec-prefix, --no-create, and -# --with-PACKAGE unless this script has special code to handle it. - - -for arg -do - # Handle --exec-prefix with a space before the argument. - if test x$next_exec_prefix = xyes; then exec_prefix=$arg; next_exec_prefix= - # Handle --host with a space before the argument. - elif test x$next_host = xyes; then next_host= - # Handle --prefix with a space before the argument. - elif test x$next_prefix = xyes; then prefix=$arg; next_prefix= - # Handle --srcdir with a space before the argument. - elif test x$next_srcdir = xyes; then srcdir=$arg; next_srcdir= - else - case $arg in - # For backward compatibility, also recognize exact --exec_prefix. - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* | --exec=* | --exe=* | --ex=* | --e=*) - exec_prefix=`echo $arg | sed 's/[-a-z_]*=//'` ;; - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- | --exec | --exe | --ex | --e) - next_exec_prefix=yes ;; - - -gas | --gas | --ga | --g) ;; - - -host=* | --host=* | --hos=* | --ho=* | --h=*) ;; - -host | --host | --hos | --ho | --h) - next_host=yes ;; - - -nfp | --nfp | --nf) ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre | --no-cr | --no-c | --no- | --no) - no_create=1 ;; - - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix=`echo $arg | sed 's/[-a-z_]*=//'` ;; - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - next_prefix=yes ;; - - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=* | --s=*) - srcdir=`echo $arg | sed 's/[-a-z_]*=//'` ;; - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr | --s) - next_srcdir=yes ;; - - -with-* | --with-*) - package=`echo $arg|sed 's/-*with-//'` - # Delete all the valid chars; see if any are left. - if test -n "`echo $package|sed 's/[-a-zA-Z0-9_]*//g'`"; then - echo "configure: $package: invalid package name" >&2; exit 1 - fi - eval "with_`echo $package|sed s/-/_/g`=1" ;; - - *) ;; - esac - fi -done - -trap 'rm -f conftest* core; exit 1' 1 3 15 - -rm -f conftest* -compile='${CC-cc} $CFLAGS $DEFS conftest.c -o conftest $LIBS >/dev/null 2>&1' - -# A filename unique to this package, relative to the directory that -# configure is in, which we can look for to find out if srcdir is correct. -unique_file=regex.c - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - srcdirdefaulted=yes - # Try the directory containing this script, then `..'. - prog=$0 - confdir=`echo $prog|sed 's%/[^/][^/]*$%%'` - test "X$confdir" = "X$prog" && confdir=. - srcdir=$confdir - if test ! -r $srcdir/$unique_file; then - srcdir=.. - fi -fi -if test ! -r $srcdir/$unique_file; then - if test x$srcdirdefaulted = xyes; then - echo "configure: Can not find sources in \`${confdir}' or \`..'." 1>&2 - else - echo "configure: Can not find sources in \`${srcdir}'." 1>&2 - fi - exit 1 -fi -# Preserve a srcdir of `.' to avoid automounter screwups with pwd. -# But we can't avoid them for `..', to make subdirectories work. -case $srcdir in - .|/*|~*) ;; - *) srcdir=`cd $srcdir; pwd` ;; # Make relative path absolute. -esac - - -if test -z "$CC"; then - echo checking for gcc - saveifs="$IFS"; IFS="${IFS}:" - for dir in $PATH; do - test -z "$dir" && dir=. - if test -f $dir/gcc; then - CC="gcc" - break - fi - done - IFS="$saveifs" -fi -test -z "$CC" && CC="cc" - -# Find out if we are using GNU C, under whatever name. -cat > conftest.c <<EOF -#ifdef __GNUC__ - yes -#endif -EOF -${CC-cc} -E conftest.c > conftest.out 2>&1 -if egrep yes conftest.out >/dev/null 2>&1; then - GCC=1 # For later tests. -fi -rm -f conftest* - -# Make sure to not get the incompatible SysV /etc/install and -# /usr/sbin/install, which might be in PATH before a BSD-like install, -# or the SunOS /usr/etc/install directory, or the AIX /bin/install, -# or the AFS install, which mishandles nonexistent args. (Sigh.) -if test -z "$INSTALL"; then - echo checking for install - saveifs="$IFS"; IFS="${IFS}:" - for dir in $PATH; do - test -z "$dir" && dir=. - case $dir in - /etc|/usr/sbin|/usr/etc|/usr/afsws/bin) ;; - *) - if test -f $dir/install; then - if grep dspmsg $dir/install >/dev/null 2>&1; then - : # AIX - else - INSTALL="$dir/install -c" - INSTALL_PROGRAM='$(INSTALL)' - INSTALL_DATA='$(INSTALL) -m 644' - break - fi - fi - ;; - esac - done - IFS="$saveifs" -fi -INSTALL=${INSTALL-cp} -INSTALL_PROGRAM=${INSTALL_PROGRAM-'$(INSTALL)'} -INSTALL_DATA=${INSTALL_DATA-'$(INSTALL)'} - - -echo checking for AIX -echo checking how to run the C preprocessor -if test -z "$CPP"; then - CPP='${CC-cc} -E' - cat > conftest.c <<EOF -#include <stdio.h> -EOF -err=`eval "$CPP $DEFS conftest.c 2>&1 >/dev/null"` -if test -z "$err"; then - : -else - CPP=/lib/cpp -fi -rm -f conftest* -fi - -cat > conftest.c <<EOF -#ifdef _AIX - yes -#endif - -EOF -eval "$CPP $DEFS conftest.c > conftest.out 2>&1" -if egrep "yes" conftest.out >/dev/null 2>&1; then - DEFS="$DEFS -D_ALL_SOURCE=1" -fi -rm -f conftest* - - -echo checking for DYNIX/ptx libseq -cat > conftest.c <<EOF -#if defined(_SEQUENT_) - yes -#endif - -EOF -eval "$CPP $DEFS conftest.c > conftest.out 2>&1" -if egrep "yes" conftest.out >/dev/null 2>&1; then - SEQUENT=1 -fi -rm -f conftest* - -test -n "$SEQUENT" && test -f /usr/lib/libseq.a && - LIBS="$LIBS -lseq" - -echo checking for POSIXized ISC -if test -d /etc/conf/kconfig.d && - grep _POSIX_VERSION /usr/include/sys/unistd.h >/dev/null 2>&1 -then - ISC=1 # If later tests want to check for ISC. - DEFS="$DEFS -D_POSIX_SOURCE=1" - if test -n "$GCC"; then - CC="$CC -posix" - else - CC="$CC -Xp" - fi -fi - -echo checking for minix/config.h -cat > conftest.c <<EOF -#include <minix/config.h> -EOF -err=`eval "$CPP $DEFS conftest.c 2>&1 >/dev/null"` -if test -z "$err"; then - MINIX=1 -fi -rm -f conftest* - -# The Minix shell can't assign to the same variable on the same line! -if test -n "$MINIX"; then - DEFS="$DEFS -D_POSIX_SOURCE=1" - DEFS="$DEFS -D_POSIX_1_SOURCE=2" - DEFS="$DEFS -D_MINIX=1" -fi - - -echo checking for ANSI C header files -cat > conftest.c <<EOF -#include <stdlib.h> -#include <stdarg.h> -#include <string.h> -#include <float.h> -EOF -err=`eval "$CPP $DEFS conftest.c 2>&1 >/dev/null"` -if test -z "$err"; then - # SunOS string.h does not declare mem*, contrary to ANSI. -echo '#include <string.h>' > conftest.c -eval "$CPP $DEFS conftest.c > conftest.out 2>&1" -if egrep "memchr" conftest.out >/dev/null 2>&1; then - # SGI's /bin/cc from Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. -cat > conftest.c <<EOF -#include <ctype.h> -#define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#define XOR(e,f) (((e) && !(f)) || (!(e) && (f))) -int main () { int i; for (i = 0; i < 256; i++) -if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); -exit (0); } - -EOF -eval $compile -if test -s conftest && (./conftest; exit) 2>/dev/null; then - DEFS="$DEFS -DSTDC_HEADERS=1" -fi -rm -f conftest* -fi -rm -f conftest* - -fi -rm -f conftest* - -for hdr in string.h -do -trhdr=HAVE_`echo $hdr | tr '[a-z]./' '[A-Z]__'` -echo checking for ${hdr} -cat > conftest.c <<EOF -#include <${hdr}> -EOF -err=`eval "$CPP $DEFS conftest.c 2>&1 >/dev/null"` -if test -z "$err"; then - DEFS="$DEFS -D${trhdr}=1" -fi -rm -f conftest* -done - - -# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works -# for constant arguments. Useless! -echo checking for working alloca.h -cat > conftest.c <<EOF -#include <alloca.h> -main() { exit(0); } -t() { char *p = alloca(2 * sizeof(int)); } -EOF -if eval $compile; then - DEFS="$DEFS -DHAVE_ALLOCA_H=1" -fi -rm -f conftest* - -decl="#ifdef __GNUC__ -#define alloca __builtin_alloca -#else -#if HAVE_ALLOCA_H -#include <alloca.h> -#else -#ifdef _AIX - #pragma alloca -#else -char *alloca (); -#endif -#endif -#endif -" -echo checking for alloca -cat > conftest.c <<EOF -$decl -main() { exit(0); } -t() { char *p = (char *) alloca(1); } -EOF -if eval $compile; then - : -else - alloca_missing=1 -fi -rm -f conftest* - -if test -n "$alloca_missing"; then - # The SVR3 libPW and SVR4 libucb both contain incompatible functions - # that cause trouble. Some versions do not even contain alloca or - # contain a buggy version. If you still want to use their alloca, - # use ar to extract alloca.o from them instead of compiling alloca.c. - ALLOCA=alloca.o -fi - -prog='/* Ultrix mips cc rejects this. */ -typedef int charset[2]; const charset x; -/* SunOS 4.1.1 cc rejects this. */ -char const *const *p; -char **p2; -/* HPUX 7.0 cc rejects these. */ -++p; -p2 = (char const* const*) p;' -echo checking for working const -cat > conftest.c <<EOF - -main() { exit(0); } -t() { $prog } -EOF -if eval $compile; then - : -else - DEFS="$DEFS -Dconst=" -fi -rm -f conftest* - - -if test -z "$prefix" -then - echo checking for gcc to derive installation directory prefix - saveifs="$IFS"; IFS="$IFS:" - for dir in $PATH; do - test -z "$dir" && dir=. - if test $dir != . && test -f $dir/gcc; then - # Not all systems have dirname. - prefix=`echo $dir|sed 's%/[^/][^/]*$%%'` - break - fi - done - IFS="$saveifs" -fi - - -if test -n "$prefix"; then - test -z "$exec_prefix" && exec_prefix='${prefix}' - prsub="s%^prefix\\([ ]*\\)=\\([ ]*\\).*$%prefix\\1=\\2$prefix%" -fi -if test -n "$exec_prefix"; then - prsub="$prsub -s%^exec_prefix\\([ ]*\\)=\\([ ]*\\).*$%\ -exec_prefix\\1=\\2$exec_prefix%" -fi - -trap 'rm -f config.status; exit 1' 1 3 15 -echo creating config.status -rm -f config.status -cat > config.status <<EOF -#!/bin/sh -# Generated automatically by configure. -# Run this file to recreate the current configuration. -# This directory was configured as follows, -# on host `(hostname || uname -n) 2>/dev/null`: -# -# $0 $* - -for arg -do - case "\$arg" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - exec /bin/sh $0 $* ;; - *) echo "Usage: config.status --recheck" 2>&1; exit 1 ;; - esac -done - -trap 'rm -f Makefile doc/Makefile test/Makefile; exit 1' 1 3 15 -CC='$CC' -INSTALL='$INSTALL' -INSTALL_PROGRAM='$INSTALL_PROGRAM' -INSTALL_DATA='$INSTALL_DATA' -CPP='$CPP' -ALLOCA='$ALLOCA' -LIBS='$LIBS' -srcdir='$srcdir' -DEFS='$DEFS' -prefix='$prefix' -exec_prefix='$exec_prefix' -prsub='$prsub' -EOF -cat >> config.status <<\EOF - -top_srcdir=$srcdir -for file in .. Makefile doc/Makefile test/Makefile; do if [ "x$file" != "x.." ]; then - srcdir=$top_srcdir - # Remove last slash and all that follows it. Not all systems have dirname. - dir=`echo $file|sed 's%/[^/][^/]*$%%'` - if test "$dir" != "$file"; then - test "$top_srcdir" != . && srcdir=$top_srcdir/$dir - test ! -d $dir && mkdir $dir - fi - echo creating $file - rm -f $file - echo "# Generated automatically from `echo $file|sed 's|.*/||'`.in by configure." > $file - sed -e " -$prsub -s%@CC@%$CC%g -s%@INSTALL@%$INSTALL%g -s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g -s%@INSTALL_DATA@%$INSTALL_DATA%g -s%@CPP@%$CPP%g -s%@ALLOCA@%$ALLOCA%g -s%@LIBS@%$LIBS%g -s%@srcdir@%$srcdir%g -s%@DEFS@%$DEFS% -" $top_srcdir/${file}.in >> $file -fi; done - -exit 0 -EOF -chmod +x config.status -test -n "$no_create" || ./config.status - diff --git a/gnu/libregex/configure.in b/gnu/libregex/configure.in deleted file mode 100644 index f0fc78025095..000000000000 --- a/gnu/libregex/configure.in +++ /dev/null @@ -1,23 +0,0 @@ -dnl Process this file with autoconf to produce a configure script. -AC_INIT(regex.c) - -AC_PROG_CC -AC_PROG_INSTALL - -dnl I'm not sure if AC_AIX and AC_DYNIX_SEQ are really necessary. The -dnl Autoconf documentation isn't specific about which BSD functions they -dnl provide. -AC_AIX -AC_DYNIX_SEQ -AC_ISC_POSIX -AC_MINIX - -AC_STDC_HEADERS -AC_HAVE_HEADERS(string.h) - -AC_ALLOCA -AC_CONST - -AC_PREFIX(gcc) - -AC_OUTPUT(Makefile doc/Makefile test/Makefile) diff --git a/gnu/libregex/doc/Makefile b/gnu/libregex/doc/Makefile deleted file mode 100644 index 13753ae8ff1d..000000000000 --- a/gnu/libregex/doc/Makefile +++ /dev/null @@ -1,93 +0,0 @@ -# Generated automatically from Makefile.in by configure. -# Makefile for regex documentation. -# -# Copyright (C) 1992 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -# Installation directories. -prefix = /usr -infodir = $(prefix)/info - -srcdir = . -VPATH = .:../. - -INSTALL = /usr/bin/install -c -INSTALL_DATA = $(INSTALL) -m 644 - -MAKEINFO = makeinfo --no-split -SHELL = /bin/sh -TEX = tex -TEXINDEX = texindex - -default all: regex.info regex.dvi -.PHONY: default all - -# We need to include some code from regex.h. -regex.texi: xregex.texi - rm -f $@ - gawk -f include.awk -vsource=../$(srcdir)/regex.h \ - <../$(srcdir)/doc/xregex.texi \ - | expand >$@ - chmod a-w $@ - -regex.dvi: regex.cps - $(TEX) regex.texi -regex.cps: regex.cp - $(TEXINDEX) regex.?? -regex.cp: regex.texi - $(TEX) ../$(srcdir)/doc/regex.texi - -regex.info: regex.texi - $(MAKEINFO) ../$(srcdir)/doc/regex.texi - -# I know of no way to make a good TAGS file from Texinfo source. -TAGS: - -check: -.PHONY: check - -install: regex.info - -mkdir $(prefix) $(infodir) - for i in *.info*; do $(INSTALL_DATA) $$i $(infodir)/$$i; done -.PHONY: install - -clean mostlyclean: - rm -f regex.?? *.dvi *.log *.toc - -distclean: clean - rm -f Makefile - for f in regex.??s; do if test -z "`cat $$f`"; then rm -f $$f; fi; done - -realclean: distclean - rm -f *.info* regex.??? regex.texi TAGS - -extraclean: distclean - rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out -.PHONY: mostlyclean clean distclean realclean extraclean - -Makefile: Makefile.in ../config.status - (cd ..; sh config.status) - -# Prevent GNU make 3 from overflowing arg limit on system V. -.NOEXPORT: - -# Assumes $(distdir) is the place to put our files. -distfiles = Makefile.in *.texi texinfo.tex include.awk \ - regex.info* regex.aux regex.cps -dist: Makefile regex.info regex.cps - mkdir $(distdir) - ln $(distfiles) $(distdir) -.PHONY: dist diff --git a/gnu/libregex/doc/Makefile.in b/gnu/libregex/doc/Makefile.in deleted file mode 100644 index 2f5d382c06e5..000000000000 --- a/gnu/libregex/doc/Makefile.in +++ /dev/null @@ -1,92 +0,0 @@ -# Makefile for regex documentation. -# -# Copyright (C) 1992 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -# Installation directories. -prefix = /usr/local -infodir = $(prefix)/info - -srcdir = @srcdir@ -VPATH = @srcdir@:../@srcdir@ - -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ - -MAKEINFO = makeinfo --no-split -SHELL = /bin/sh -TEX = tex -TEXINDEX = texindex - -default all: regex.info regex.dvi -.PHONY: default all - -# We need to include some code from regex.h. -regex.texi: xregex.texi - rm -f $@ - gawk -f include.awk -vsource=../$(srcdir)/regex.h \ - <../$(srcdir)/doc/xregex.texi \ - | expand >$@ - chmod a-w $@ - -regex.dvi: regex.cps - $(TEX) regex.texi -regex.cps: regex.cp - $(TEXINDEX) regex.?? -regex.cp: regex.texi - $(TEX) ../$(srcdir)/doc/regex.texi - -regex.info: regex.texi - $(MAKEINFO) ../$(srcdir)/doc/regex.texi - -# I know of no way to make a good TAGS file from Texinfo source. -TAGS: - -check: -.PHONY: check - -install: regex.info - -mkdir $(prefix) $(infodir) - for i in *.info*; do $(INSTALL_DATA) $$i $(infodir)/$$i; done -.PHONY: install - -clean mostlyclean: - rm -f regex.?? *.dvi *.log *.toc - -distclean: clean - rm -f Makefile - for f in regex.??s; do if test -z "`cat $$f`"; then rm -f $$f; fi; done - -realclean: distclean - rm -f *.info* regex.??? regex.texi TAGS - -extraclean: distclean - rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out -.PHONY: mostlyclean clean distclean realclean extraclean - -Makefile: Makefile.in ../config.status - (cd ..; sh config.status) - -# Prevent GNU make 3 from overflowing arg limit on system V. -.NOEXPORT: - -# Assumes $(distdir) is the place to put our files. -distfiles = Makefile.in *.texi texinfo.tex include.awk \ - regex.info* regex.aux regex.cps -dist: Makefile regex.info regex.cps - mkdir $(distdir) - ln $(distfiles) $(distdir) -.PHONY: dist diff --git a/gnu/libregex/doc/include.awk b/gnu/libregex/doc/include.awk deleted file mode 100644 index a1df3f84634f..000000000000 --- a/gnu/libregex/doc/include.awk +++ /dev/null @@ -1,19 +0,0 @@ -# Assume `source' is set with -vsource=filename on the command line. -# -/^\[\[\[/ { inclusion = $2; # name of the thing to include. - printing = 0; - while ((getline line < source) > 0) - { - if (match (line, "\\[\\[\\[end " inclusion "\\]\\]\\]")) - printing = 0; - - if (printing) - print line; - - if (match (line,"\\[\\[\\[begin " inclusion "\\]\\]\\]")) - printing = 1; - } - close (source); - next; - } - { print } diff --git a/gnu/libregex/doc/regex.aux b/gnu/libregex/doc/regex.aux deleted file mode 100644 index fd6a245eb111..000000000000 --- a/gnu/libregex/doc/regex.aux +++ /dev/null @@ -1,136 +0,0 @@ -'xrdef {Overview-pg}{1} -'xrdef {Overview-snt}{Chapter'tie1} -'xrdef {Regular Expression Syntax-pg}{2} -'xrdef {Regular Expression Syntax-snt}{Chapter'tie2} -'xrdef {Syntax Bits-pg}{2} -'xrdef {Syntax Bits-snt}{Section'tie2.1} -'xrdef {Predefined Syntaxes-pg}{5} -'xrdef {Predefined Syntaxes-snt}{Section'tie2.2} -'xrdef {Collating Elements vs. Characters-pg}{6} -'xrdef {Collating Elements vs. Characters-snt}{Section'tie2.3} -'xrdef {The Backslash Character-pg}{7} -'xrdef {The Backslash Character-snt}{Section'tie2.4} -'xrdef {Common Operators-pg}{9} -'xrdef {Common Operators-snt}{Chapter'tie3} -'xrdef {Match-self Operator-pg}{9} -'xrdef {Match-self Operator-snt}{Section'tie3.1} -'xrdef {Match-any-character Operator-pg}{9} -'xrdef {Match-any-character Operator-snt}{Section'tie3.2} -'xrdef {Concatenation Operator-pg}{10} -'xrdef {Concatenation Operator-snt}{Section'tie3.3} -'xrdef {Repetition Operators-pg}{10} -'xrdef {Repetition Operators-snt}{Section'tie3.4} -'xrdef {Match-zero-or-more Operator-pg}{10} -'xrdef {Match-zero-or-more Operator-snt}{Section'tie3.4.1} -'xrdef {Match-one-or-more Operator-pg}{11} -'xrdef {Match-one-or-more Operator-snt}{Section'tie3.4.2} -'xrdef {Match-zero-or-one Operator-pg}{11} -'xrdef {Match-zero-or-one Operator-snt}{Section'tie3.4.3} -'xrdef {Interval Operators-pg}{12} -'xrdef {Interval Operators-snt}{Section'tie3.4.4} -'xrdef {Alternation Operator-pg}{13} -'xrdef {Alternation Operator-snt}{Section'tie3.5} -'xrdef {List Operators-pg}{13} -'xrdef {List Operators-snt}{Section'tie3.6} -'xrdef {Character Class Operators-pg}{14} -'xrdef {Character Class Operators-snt}{Section'tie3.6.1} -'xrdef {Range Operator-pg}{15} -'xrdef {Range Operator-snt}{Section'tie3.6.2} -'xrdef {Grouping Operators-pg}{16} -'xrdef {Grouping Operators-snt}{Section'tie3.7} -'xrdef {Back-reference Operator-pg}{17} -'xrdef {Back-reference Operator-snt}{Section'tie3.8} -'xrdef {Anchoring Operators-pg}{18} -'xrdef {Anchoring Operators-snt}{Section'tie3.9} -'xrdef {Match-beginning-of-line Operator-pg}{18} -'xrdef {Match-beginning-of-line Operator-snt}{Section'tie3.9.1} -'xrdef {Match-end-of-line Operator-pg}{18} -'xrdef {Match-end-of-line Operator-snt}{Section'tie3.9.2} -'xrdef {GNU Operators-pg}{20} -'xrdef {GNU Operators-snt}{Chapter'tie4} -'xrdef {Word Operators-pg}{20} -'xrdef {Word Operators-snt}{Section'tie4.1} -'xrdef {Non-Emacs Syntax Tables-pg}{20} -'xrdef {Non-Emacs Syntax Tables-snt}{Section'tie4.1.1} -'xrdef {Match-word-boundary Operator-pg}{20} -'xrdef {Match-word-boundary Operator-snt}{Section'tie4.1.2} -'xrdef {Match-within-word Operator-pg}{20} -'xrdef {Match-within-word Operator-snt}{Section'tie4.1.3} -'xrdef {Match-beginning-of-word Operator-pg}{21} -'xrdef {Match-beginning-of-word Operator-snt}{Section'tie4.1.4} -'xrdef {Match-end-of-word Operator-pg}{21} -'xrdef {Match-end-of-word Operator-snt}{Section'tie4.1.5} -'xrdef {Match-word-constituent Operator-pg}{21} -'xrdef {Match-word-constituent Operator-snt}{Section'tie4.1.6} -'xrdef {Match-non-word-constituent Operator-pg}{21} -'xrdef {Match-non-word-constituent Operator-snt}{Section'tie4.1.7} -'xrdef {Buffer Operators-pg}{21} -'xrdef {Buffer Operators-snt}{Section'tie4.2} -'xrdef {Match-beginning-of-buffer Operator-pg}{21} -'xrdef {Match-beginning-of-buffer Operator-snt}{Section'tie4.2.1} -'xrdef {Match-end-of-buffer Operator-pg}{21} -'xrdef {Match-end-of-buffer Operator-snt}{Section'tie4.2.2} -'xrdef {GNU Emacs Operators-pg}{22} -'xrdef {GNU Emacs Operators-snt}{Chapter'tie5} -'xrdef {Syntactic Class Operators-pg}{22} -'xrdef {Syntactic Class Operators-snt}{Section'tie5.1} -'xrdef {Emacs Syntax Tables-pg}{22} -'xrdef {Emacs Syntax Tables-snt}{Section'tie5.1.1} -'xrdef {Match-syntactic-class Operator-pg}{22} -'xrdef {Match-syntactic-class Operator-snt}{Section'tie5.1.2} -'xrdef {Match-not-syntactic-class Operator-pg}{22} -'xrdef {Match-not-syntactic-class Operator-snt}{Section'tie5.1.3} -'xrdef {What Gets Matched?-pg}{23} -'xrdef {What Gets Matched?-snt}{Chapter'tie6} -'xrdef {Programming with Regex-pg}{24} -'xrdef {Programming with Regex-snt}{Chapter'tie7} -'xrdef {GNU Regex Functions-pg}{24} -'xrdef {GNU Regex Functions-snt}{Section'tie7.1} -'xrdef {GNU Pattern Buffers-pg}{24} -'xrdef {GNU Pattern Buffers-snt}{Section'tie7.1.1} -'xrdef {GNU Regular Expression Compiling-pg}{26} -'xrdef {GNU Regular Expression Compiling-snt}{Section'tie7.1.2} -'xrdef {GNU Matching-pg}{27} -'xrdef {GNU Matching-snt}{Section'tie7.1.3} -'xrdef {GNU Searching-pg}{28} -'xrdef {GNU Searching-snt}{Section'tie7.1.4} -'xrdef {Matching/Searching with Split Data-pg}{29} -'xrdef {Matching/Searching with Split Data-snt}{Section'tie7.1.5} -'xrdef {Searching with Fastmaps-pg}{30} -'xrdef {Searching with Fastmaps-snt}{Section'tie7.1.6} -'xrdef {GNU Translate Tables-pg}{31} -'xrdef {GNU Translate Tables-snt}{Section'tie7.1.7} -'xrdef {Using Registers-pg}{32} -'xrdef {Using Registers-snt}{Section'tie7.1.8} -'xrdef {Freeing GNU Pattern Buffers-pg}{34} -'xrdef {Freeing GNU Pattern Buffers-snt}{Section'tie7.1.9} -'xrdef {POSIX Regex Functions-pg}{35} -'xrdef {POSIX Regex Functions-snt}{Section'tie7.2} -'xrdef {POSIX Pattern Buffers-pg}{35} -'xrdef {POSIX Pattern Buffers-snt}{Section'tie7.2.1} -'xrdef {POSIX Regular Expression Compiling-pg}{35} -'xrdef {POSIX Regular Expression Compiling-snt}{Section'tie7.2.2} -'xrdef {POSIX Matching-pg}{37} -'xrdef {POSIX Matching-snt}{Section'tie7.2.3} -'xrdef {Reporting Errors-pg}{38} -'xrdef {Reporting Errors-snt}{Section'tie7.2.4} -'xrdef {Using Byte Offsets-pg}{39} -'xrdef {Using Byte Offsets-snt}{Section'tie7.2.5} -'xrdef {Freeing POSIX Pattern Buffers-pg}{39} -'xrdef {Freeing POSIX Pattern Buffers-snt}{Section'tie7.2.6} -'xrdef {BSD Regex Functions-pg}{40} -'xrdef {BSD Regex Functions-snt}{Section'tie7.3} -'xrdef {BSD Regular Expression Compiling-pg}{40} -'xrdef {BSD Regular Expression Compiling-snt}{Section'tie7.3.1} -'xrdef {BSD Searching-pg}{40} -'xrdef {BSD Searching-snt}{Section'tie7.3.2} -'xrdef {Copying-pg}{42} -'xrdef {Copying-snt}{Appendix'tie'char65{}} -'xrdef {Copying-pg}{42} -'xrdef {Copying-snt}{} -'xrdef {Copying-pg}{43} -'xrdef {Copying-snt}{} -'xrdef {Copying-pg}{48} -'xrdef {Copying-snt}{} -'xrdef {Index-pg}{50} -'xrdef {Index-snt}{} diff --git a/gnu/libregex/doc/regex.cps b/gnu/libregex/doc/regex.cps deleted file mode 100644 index 8b2e57c64e47..000000000000 --- a/gnu/libregex/doc/regex.cps +++ /dev/null @@ -1,152 +0,0 @@ -\initial {$} -\entry {\code {$}}{18} -\initial {(} -\entry {\code {(}}{16} -\initial {)} -\entry {\code {)}}{16} -\initial {*} -\entry {\samp {*}}{10} -\initial {-} -\entry {\samp {-}}{13} -\initial {.} -\entry {\samp {.}}{9} -\initial {:} -\entry {\samp {:]} in regex}{14} -\initial {?} -\entry {\samp {?}}{11} -\initial {[} -\entry {\samp {[}}{13} -\entry {\samp {[:} in regex}{14} -\entry {\samp {[{\tt\hat}}}{13} -\initial {]} -\entry {\samp {]}}{13} -\initial {{\tt\char'173}} -\entry {\samp {{\tt\char'173}}}{12} -\initial {{\tt\char'174}} -\entry {\code {{\tt\char'174}}}{13} -\initial {{\tt\char'175}} -\entry {\samp {{\tt\char'175}}}{12} -\initial {{\tt\char43}} -\entry {\samp {{\tt\char43}}}{11} -\initial {{\tt\hat}} -\entry {\samp {{\tt\hat}}}{13} -\entry {\code {{\tt\hat}}}{18} -\initial {{\tt\indexbackslash }} -\entry {{\tt\indexbackslash }}{7} -\entry {\samp {{\tt\indexbackslash }}}{13} -\entry {\samp {{\tt\indexbackslash }'}}{21} -\entry {\code {{\tt\indexbackslash }(}}{16} -\entry {\code {{\tt\indexbackslash })}}{16} -\entry {\samp {{\tt\indexbackslash }`}}{21} -\entry {\samp {{\tt\indexbackslash }{\tt\char'173}}}{12} -\entry {\code {{\tt\indexbackslash }{\tt\char'174}}}{13} -\entry {\samp {{\tt\indexbackslash }{\tt\char'175}}}{12} -\entry {\samp {{\tt\indexbackslash }{\tt\gtr}}}{21} -\entry {\samp {{\tt\indexbackslash }{\tt\less}}}{21} -\entry {\samp {{\tt\indexbackslash }b}}{20} -\entry {\samp {{\tt\indexbackslash }B}}{20} -\entry {\samp {{\tt\indexbackslash }s}}{22} -\entry {\samp {{\tt\indexbackslash }S}}{22} -\entry {\samp {{\tt\indexbackslash }w}}{21} -\entry {\samp {{\tt\indexbackslash }W}}{21} -\initial {A} -\entry {\code {allocated \r {initialization}}}{26} -\entry {alternation operator}{13} -\entry {alternation operator and \samp {{\tt\hat}}}{18} -\entry {anchoring}{18} -\entry {anchors}{18} -\entry {Awk}{5} -\initial {B} -\entry {back references}{17} -\entry {backtracking}{10, 13} -\entry {beginning-of-line operator}{18} -\entry {bracket expression}{13} -\entry {\code {buffer \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {\code {buffer \r {initialization}}}{26} -\initial {C} -\entry {character classes}{14} -\initial {E} -\entry {Egrep}{5} -\entry {Emacs}{5} -\entry {end-of-line operator}{18} -\entry {\code {end\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32} -\initial {F} -\entry {\code {fastmap \r {initialization}}}{26} -\entry {\code {fastmap{\_}accurate \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {fastmaps}{30} -\initial {G} -\entry {Grep}{5} -\entry {grouping}{16} -\initial {I} -\entry {ignoring case}{35} -\entry {interval expression}{12} -\initial {M} -\entry {matching list}{13} -\entry {matching newline}{13} -\entry {matching with GNU functions}{27} -\initial {N} -\entry {\code {newline{\_}anchor \r {field in pattern buffer}}}{18} -\entry {nonmatching list}{13} -\entry {\code {not{\_}bol \r {field in pattern buffer}}}{18} -\entry {\code {num_regs\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32} -\initial {O} -\entry {open-group operator and \samp {{\tt\hat}}}{18} -\entry {or operator}{13} -\initial {P} -\entry {parenthesizing}{16} -\entry {pattern buffer initialization}{26} -\entry {pattern buffer, definition of}{24} -\entry {POSIX Awk}{5} -\initial {R} -\entry {\code {range \r {argument to \code {re{\_}search}}}}{28} -\entry {\code {re_registers}}{32} -\entry {\code {RE{\_}BACKSLASH{\_}ESCAPE{\_}IN{\_}LIST}}{3} -\entry {\code {RE{\_}BK{\_}PLUS{\_}QM}}{3} -\entry {\code {RE{\_}CHAR{\_}CLASSES}}{3} -\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS}}{3} -\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS \r {(and \samp {{\tt\hat}})}}}{18} -\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}OPS}}{3} -\entry {\code {RE{\_}CONTEXT{\_}INVALID{\_}OPS}}{3} -\entry {\code {RE{\_}DOT{\_}NEWLINE}}{3} -\entry {\code {RE{\_}DOT{\_}NOT{\_}NULL}}{4} -\entry {\code {RE{\_}INTERVALS}}{4} -\entry {\code {RE{\_}LIMITED{\_}OPS}}{4} -\entry {\code {RE{\_}NEWLINE{\_}ALT}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}BRACES}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}PARENS}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}REFS}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}VBAR}}{4} -\entry {\code {RE{\_}NO{\_}EMPTY{\_}RANGES}}{4} -\entry {\code {re{\_}nsub \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {\code {re{\_}pattern{\_}buffer \r {definition}}}{24} -\entry {\code {re{\_}syntax{\_}options \r {initialization}}}{26} -\entry {\code {RE{\_}UNMATCHED{\_}RIGHT{\_}PAREN{\_}ORD}}{4} -\entry {\code {REG{\_}EXTENDED}}{35} -\entry {\code {REG{\_}ICASE}}{35} -\entry {\code {REG{\_}NEWLINE}}{36} -\entry {\code {REG{\_}NOSUB}}{35} -\entry {\code {regex.c}}{1} -\entry {\code {regex.h}}{1} -\entry {regexp anchoring}{18} -\entry {\code {regmatch{\_}t}}{39} -\entry {\code {regs{\_}allocated}}{32} -\entry {\code {REGS{\_}FIXED}}{33} -\entry {\code {REGS{\_}REALLOCATE}}{32} -\entry {\code {REGS{\_}UNALLOCATED}}{32} -\entry {regular expressions, syntax of}{2} -\initial {S} -\entry {searching with GNU functions}{28} -\entry {\code {start \r {argument to \code {re{\_}search}}}}{28} -\entry {\code {start\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32} -\entry {\code {struct re{\_}pattern{\_}buffer \r {definition}}}{24} -\entry {subexpressions}{16} -\entry {syntax bits}{2} -\entry {\code {syntax \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {syntax initialization}{26} -\entry {syntax of regular expressions}{2} -\initial {T} -\entry {\code {translate \r {initialization}}}{26} -\initial {U} -\entry {\code {used \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\initial {W} -\entry {word boundaries, matching}{20} diff --git a/gnu/libregex/doc/regex.info b/gnu/libregex/doc/regex.info deleted file mode 100644 index 90deedeaf44f..000000000000 --- a/gnu/libregex/doc/regex.info +++ /dev/null @@ -1,2836 +0,0 @@ -This is Info file regex.info, produced by Makeinfo-1.52 from the input -file .././doc/regex.texi. - - This file documents the GNU regular expression library. - - Copyright (C) 1992, 1993 Free Software Foundation, Inc. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - - Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled "GNU General Public License" is included exactly as in -the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this -one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that the section entitled "GNU General Public License" -may be included in a translation approved by the Free Software -Foundation instead of in the original English. - - -File: regex.info, Node: Top, Next: Overview, Prev: (dir), Up: (dir) - -Regular Expression Library -************************** - - This manual documents how to program with the GNU regular expression -library. This is edition 0.12a of the manual, 19 September 1992. - - The first part of this master menu lists the major nodes in this Info -document, including the index. The rest of the menu lists all the -lower level nodes in the document. - -* Menu: - -* Overview:: -* Regular Expression Syntax:: -* Common Operators:: -* GNU Operators:: -* GNU Emacs Operators:: -* What Gets Matched?:: -* Programming with Regex:: -* Copying:: Copying and sharing Regex. -* Index:: General index. - -- The Detailed Node Listing -- - -Regular Expression Syntax - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - -Common Operators - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? {} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - -Repetition Operators - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: {} - -List Operators (`[' ... `]' and `[^' ... `]') - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - -Anchoring Operators - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - -GNU Operators - -* Word Operators:: -* Buffer Operators:: - -Word Operators - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - -Buffer Operators - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - -GNU Emacs Operators - -* Syntactic Class Operators:: - -Syntactic Class Operators - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - -Programming with Regex - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - -GNU Regex Functions - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - -POSIX Regex Functions - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - -BSD Regex Functions - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () - - -File: regex.info, Node: Overview, Next: Regular Expression Syntax, Prev: Top, Up: Top - -Overview -******** - - A "regular expression" (or "regexp", or "pattern") is a text string -that describes some (mathematical) set of strings. A regexp R -"matches" a string S if S is in the set of strings described by R. - - Using the Regex library, you can: - - * see if a string matches a specified pattern as a whole, and - - * search within a string for a substring matching a specified - pattern. - - Some regular expressions match only one string, i.e., the set they -describe has only one member. For example, the regular expression -`foo' matches the string `foo' and no others. Other regular -expressions match more than one string, i.e., the set they describe has -more than one member. For example, the regular expression `f*' matches -the set of strings made up of any number (including zero) of `f's. As -you can see, some characters in regular expressions match themselves -(such as `f') and some don't (such as `*'); the ones that don't match -themselves instead let you specify patterns that describe many -different strings. - - To either match or search for a regular expression with the Regex -library functions, you must first compile it with a Regex pattern -compiling function. A "compiled pattern" is a regular expression -converted to the internal format used by the library functions. Once -you've compiled a pattern, you can use it for matching or searching any -number of times. - - The Regex library consists of two source files: `regex.h' and -`regex.c'. Regex provides three groups of functions with which you can -operate on regular expressions. One group--the GNU group--is more -powerful but not completely compatible with the other two, namely the -POSIX and Berkeley UNIX groups; its interface was designed specifically -for GNU. The other groups have the same interfaces as do the regular -expression functions in POSIX and Berkeley UNIX. - - We wrote this chapter with programmers in mind, not users of -programs--such as Emacs--that use Regex. We describe the Regex library -in its entirety, not how to write regular expressions that a particular -program understands. - - -File: regex.info, Node: Regular Expression Syntax, Next: Common Operators, Prev: Overview, Up: Top - -Regular Expression Syntax -************************* - - "Characters" are things you can type. "Operators" are things in a -regular expression that match one or more characters. You compose -regular expressions from operators, which in turn you specify using one -or more characters. - - Most characters represent what we call the match-self operator, i.e., -they match themselves; we call these characters "ordinary". Other -characters represent either all or parts of fancier operators; e.g., -`.' represents what we call the match-any-character operator (which, no -surprise, matches (almost) any character); we call these characters -"special". Two different things determine what characters represent -what operators: - - 1. the regular expression syntax your program has told the Regex - library to recognize, and - - 2. the context of the character in the regular expression. - - In the following sections, we describe these things in more detail. - -* Menu: - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - - -File: regex.info, Node: Syntax Bits, Next: Predefined Syntaxes, Up: Regular Expression Syntax - -Syntax Bits -=========== - - In any particular syntax for regular expressions, some characters are -always special, others are sometimes special, and others are never -special. The particular syntax that Regex recognizes for a given -regular expression depends on the value in the `syntax' field of the -pattern buffer of that regular expression. - - You get a pattern buffer by compiling a regular expression. *Note -GNU Pattern Buffers::, and *Note POSIX Pattern Buffers::, for more -information on pattern buffers. *Note GNU Regular Expression -Compiling::, *Note POSIX Regular Expression Compiling::, and *Note BSD -Regular Expression Compiling::, for more information on compiling. - - Regex considers the value of the `syntax' field to be a collection of -bits; we refer to these bits as "syntax bits". In most cases, they -affect what characters represent what operators. We describe the -meanings of the operators to which we refer in *Note Common Operators::, -*Note GNU Operators::, and *Note GNU Emacs Operators::. - - For reference, here is the complete list of syntax bits, in -alphabetical order: - -`RE_BACKSLASH_ESCAPE_IN_LISTS' - If this bit is set, then `\' inside a list (*note List Operators::. - quotes (makes ordinary, if it's special) the following character; - if this bit isn't set, then `\' is an ordinary character inside - lists. (*Note The Backslash Character::, for what `\' does - outside of lists.) - -`RE_BK_PLUS_QM' - If this bit is set, then `\+' represents the match-one-or-more - operator and `\?' represents the match-zero-or-more operator; if - this bit isn't set, then `+' represents the match-one-or-more - operator and `?' represents the match-zero-or-one operator. This - bit is irrelevant if `RE_LIMITED_OPS' is set. - -`RE_CHAR_CLASSES' - If this bit is set, then you can use character classes in lists; - if this bit isn't set, then you can't. - -`RE_CONTEXT_INDEP_ANCHORS' - If this bit is set, then `^' and `$' are special anywhere outside - a list; if this bit isn't set, then these characters are special - only in certain contexts. *Note Match-beginning-of-line - Operator::, and *Note Match-end-of-line Operator::. - -`RE_CONTEXT_INDEP_OPS' - If this bit is set, then certain characters are special anywhere - outside a list; if this bit isn't set, then those characters are - special only in some contexts and are ordinary elsewhere. - Specifically, if this bit isn't set then `*', and (if the syntax - bit `RE_LIMITED_OPS' isn't set) `+' and `?' (or `\+' and `\?', - depending on the syntax bit `RE_BK_PLUS_QM') represent repetition - operators only if they're not first in a regular expression or - just after an open-group or alternation operator. The same holds - for `{' (or `\{', depending on the syntax bit `RE_NO_BK_BRACES') if - it is the beginning of a valid interval and the syntax bit - `RE_INTERVALS' is set. - -`RE_CONTEXT_INVALID_OPS' - If this bit is set, then repetition and alternation operators - can't be in certain positions within a regular expression. - Specifically, the regular expression is invalid if it has: - - * a repetition operator first in the regular expression or just - after a match-beginning-of-line, open-group, or alternation - operator; or - - * an alternation operator first or last in the regular - expression, just before a match-end-of-line operator, or just - after an alternation or open-group operator. - - If this bit isn't set, then you can put the characters - representing the repetition and alternation characters anywhere in - a regular expression. Whether or not they will in fact be - operators in certain positions depends on other syntax bits. - -`RE_DOT_NEWLINE' - If this bit is set, then the match-any-character operator matches - a newline; if this bit isn't set, then it doesn't. - -`RE_DOT_NOT_NULL' - If this bit is set, then the match-any-character operator doesn't - match a null character; if this bit isn't set, then it does. - -`RE_INTERVALS' - If this bit is set, then Regex recognizes interval operators; if - this bit isn't set, then it doesn't. - -`RE_LIMITED_OPS' - If this bit is set, then Regex doesn't recognize the - match-one-or-more, match-zero-or-one or alternation operators; if - this bit isn't set, then it does. - -`RE_NEWLINE_ALT' - If this bit is set, then newline represents the alternation - operator; if this bit isn't set, then newline is ordinary. - -`RE_NO_BK_BRACES' - If this bit is set, then `{' represents the open-interval operator - and `}' represents the close-interval operator; if this bit isn't - set, then `\{' represents the open-interval operator and `\}' - represents the close-interval operator. This bit is relevant only - if `RE_INTERVALS' is set. - -`RE_NO_BK_PARENS' - If this bit is set, then `(' represents the open-group operator and - `)' represents the close-group operator; if this bit isn't set, - then `\(' represents the open-group operator and `\)' represents - the close-group operator. - -`RE_NO_BK_REFS' - If this bit is set, then Regex doesn't recognize `\'DIGIT as the - back reference operator; if this bit isn't set, then it does. - -`RE_NO_BK_VBAR' - If this bit is set, then `|' represents the alternation operator; - if this bit isn't set, then `\|' represents the alternation - operator. This bit is irrelevant if `RE_LIMITED_OPS' is set. - -`RE_NO_EMPTY_RANGES' - If this bit is set, then a regular expression with a range whose - ending point collates lower than its starting point is invalid; if - this bit isn't set, then Regex considers such a range to be empty. - -`RE_UNMATCHED_RIGHT_PAREN_ORD' - If this bit is set and the regular expression has no matching - open-group operator, then Regex considers what would otherwise be - a close-group operator (based on how `RE_NO_BK_PARENS' is set) to - match `)'. - - -File: regex.info, Node: Predefined Syntaxes, Next: Collating Elements vs. Characters, Prev: Syntax Bits, Up: Regular Expression Syntax - -Predefined Syntaxes -=================== - - If you're programming with Regex, you can set a pattern buffer's -(*note GNU Pattern Buffers::., and *Note POSIX Pattern Buffers::) -`syntax' field either to an arbitrary combination of syntax bits (*note -Syntax Bits::.) or else to the configurations defined by Regex. These -configurations define the syntaxes used by certain programs--GNU Emacs, -POSIX Awk, traditional Awk, Grep, Egrep--in addition to syntaxes for -POSIX basic and extended regular expressions. - - The predefined syntaxes-taken directly from `regex.h'--are: - - #define RE_SYNTAX_EMACS 0 - - #define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - - #define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) - - #define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - - #define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - - #define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) - - /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ - #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - - #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - - /* Syntax bits common to both basic and extended POSIX regex syntax. */ - #define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - - #define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - - /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ - #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - - #define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - - /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ - #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) - - -File: regex.info, Node: Collating Elements vs. Characters, Next: The Backslash Character, Prev: Predefined Syntaxes, Up: Regular Expression Syntax - -Collating Elements vs. Characters -================================= - - POSIX generalizes the notion of a character to that of a collating -element. It defines a "collating element" to be "a sequence of one or -more bytes defined in the current collating sequence as a unit of -collation." - - This generalizes the notion of a character in two ways. First, a -single character can map into two or more collating elements. For -example, the German "es-zet" collates as the collating element `s' -followed by another collating element `s'. Second, two or more -characters can map into one collating element. For example, the -Spanish `ll' collates after `l' and before `m'. - - Since POSIX's "collating element" preserves the essential idea of a -"character," we use the latter, more familiar, term in this document. - - -File: regex.info, Node: The Backslash Character, Prev: Collating Elements vs. Characters, Up: Regular Expression Syntax - -The Backslash Character -======================= - - The `\' character has one of four different meanings, depending on -the context in which you use it and what syntax bits are set (*note -Syntax Bits::.). It can: 1) stand for itself, 2) quote the next -character, 3) introduce an operator, or 4) do nothing. - - 1. It stands for itself inside a list (*note List Operators::.) if - the syntax bit `RE_BACKSLASH_ESCAPE_IN_LISTS' is not set. For - example, `[\]' would match `\'. - - 2. It quotes (makes ordinary, if it's special) the next character - when you use it either: - - * outside a list,(1) or - - * inside a list and the syntax bit - `RE_BACKSLASH_ESCAPE_IN_LISTS' is set. - - 3. It introduces an operator when followed by certain ordinary - characters--sometimes only when certain syntax bits are set. See - the cases `RE_BK_PLUS_QM', `RE_NO_BK_BRACES', `RE_NO_BK_VAR', - `RE_NO_BK_PARENS', `RE_NO_BK_REF' in *Note Syntax Bits::. Also: - - * `\b' represents the match-word-boundary operator (*note - Match-word-boundary Operator::.). - - * `\B' represents the match-within-word operator (*note - Match-within-word Operator::.). - - * `\<' represents the match-beginning-of-word operator - (*note Match-beginning-of-word Operator::.). - - * `\>' represents the match-end-of-word operator (*note - Match-end-of-word Operator::.). - - * `\w' represents the match-word-constituent operator (*note - Match-word-constituent Operator::.). - - * `\W' represents the match-non-word-constituent operator - (*note Match-non-word-constituent Operator::.). - - * `\`' represents the match-beginning-of-buffer operator and - `\'' represents the match-end-of-buffer operator (*note - Buffer Operators::.). - - * If Regex was compiled with the C preprocessor symbol `emacs' - defined, then `\sCLASS' represents the match-syntactic-class - operator and `\SCLASS' represents the - match-not-syntactic-class operator (*note Syntactic Class - Operators::.). - - 4. In all other cases, Regex ignores `\'. For example, `\n' matches - `n'. - - - ---------- Footnotes ---------- - - (1) Sometimes you don't have to explicitly quote special characters -to make them ordinary. For instance, most characters lose any special -meaning inside a list (*note List Operators::.). In addition, if the -syntax bits `RE_CONTEXT_INVALID_OPS' and `RE_CONTEXT_INDEP_OPS' aren't -set, then (for historical reasons) the matcher considers special -characters ordinary if they are in contexts where the operations they -represent make no sense; for example, then the match-zero-or-more -operator (represented by `*') matches itself in the regular expression -`*foo' because there is no preceding expression on which it can -operate. It is poor practice, however, to depend on this behavior; if -you want a special character to be ordinary outside a list, it's better -to always quote it, regardless. - - -File: regex.info, Node: Common Operators, Next: GNU Operators, Prev: Regular Expression Syntax, Up: Top - -Common Operators -**************** - - You compose regular expressions from operators. In the following -sections, we describe the regular expression operators specified by -POSIX; GNU also uses these. Most operators have more than one -representation as characters. *Note Regular Expression Syntax::, for -what characters represent what operators under what circumstances. - - For most operators that can be represented in two ways, one -representation is a single character and the other is that character -preceded by `\'. For example, either `(' or `\(' represents the -open-group operator. Which one does depends on the setting of a syntax -bit, in this case `RE_NO_BK_PARENS'. Why is this so? Historical -reasons dictate some of the varying representations, while POSIX -dictates others. - - Finally, almost all characters lose any special meaning inside a list -(*note List Operators::.). - -* Menu: - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? {} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - - -File: regex.info, Node: Match-self Operator, Next: Match-any-character Operator, Up: Common Operators - -The Match-self Operator (ORDINARY CHARACTER) -============================================ - - This operator matches the character itself. All ordinary characters -(*note Regular Expression Syntax::.) represent this operator. For -example, `f' is always an ordinary character, so the regular expression -`f' matches only the string `f'. In particular, it does *not* match -the string `ff'. - - -File: regex.info, Node: Match-any-character Operator, Next: Concatenation Operator, Prev: Match-self Operator, Up: Common Operators - -The Match-any-character Operator (`.') -====================================== - - This operator matches any single printing or nonprinting character -except it won't match a: - -newline - if the syntax bit `RE_DOT_NEWLINE' isn't set. - -null - if the syntax bit `RE_DOT_NOT_NULL' is set. - - The `.' (period) character represents this operator. For example, -`a.b' matches any three-character string beginning with `a' and ending -with `b'. - - -File: regex.info, Node: Concatenation Operator, Next: Repetition Operators, Prev: Match-any-character Operator, Up: Common Operators - -The Concatenation Operator -========================== - - This operator concatenates two regular expressions A and B. No -character represents this operator; you simply put B after A. The -result is a regular expression that will match a string if A matches -its first part and B matches the rest. For example, `xy' (two -match-self operators) matches `xy'. - - -File: regex.info, Node: Repetition Operators, Next: Alternation Operator, Prev: Concatenation Operator, Up: Common Operators - -Repetition Operators -==================== - - Repetition operators repeat the preceding regular expression a -specified number of times. - -* Menu: - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: {} - - -File: regex.info, Node: Match-zero-or-more Operator, Next: Match-one-or-more Operator, Up: Repetition Operators - -The Match-zero-or-more Operator (`*') -------------------------------------- - - This operator repeats the smallest possible preceding regular -expression as many times as necessary (including zero) to match the -pattern. `*' represents this operator. For example, `o*' matches any -string made up of zero or more `o's. Since this operator operates on -the smallest preceding regular expression, `fo*' has a repeating `o', -not a repeating `fo'. So, `fo*' matches `f', `fo', `foo', and so on. - - Since the match-zero-or-more operator is a suffix operator, it may be -useless as such when no regular expression precedes it. This is the -case when it: - - * is first in a regular expression, or - - * follows a match-beginning-of-line, open-group, or alternation - operator. - -Three different things can happen in these cases: - - 1. If the syntax bit `RE_CONTEXT_INVALID_OPS' is set, then the - regular expression is invalid. - - 2. If `RE_CONTEXT_INVALID_OPS' isn't set, but `RE_CONTEXT_INDEP_OPS' - is, then `*' represents the match-zero-or-more operator (which - then operates on the empty string). - - 3. Otherwise, `*' is ordinary. - - - The matcher processes a match-zero-or-more operator by first matching -as many repetitions of the smallest preceding regular expression as it -can. Then it continues to match the rest of the pattern. - - If it can't match the rest of the pattern, it backtracks (as many -times as necessary), each time discarding one of the matches until it -can either match the entire pattern or be certain that it cannot get a -match. For example, when matching `ca*ar' against `caaar', the matcher -first matches all three `a's of the string with the `a*' of the regular -expression. However, it cannot then match the final `ar' of the -regular expression against the final `r' of the string. So it -backtracks, discarding the match of the last `a' in the string. It can -then match the remaining `ar'. - - -File: regex.info, Node: Match-one-or-more Operator, Next: Match-zero-or-one Operator, Prev: Match-zero-or-more Operator, Up: Repetition Operators - -The Match-one-or-more Operator (`+' or `\+') --------------------------------------------- - - If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM' -isn't set, then `+' represents this operator; if it is, then `\+' does. - - This operator is similar to the match-zero-or-more operator except -that it repeats the preceding regular expression at least once; *note -Match-zero-or-more Operator::., for what it operates on, how some -syntax bits affect it, and how Regex backtracks to match it. - - For example, supposing that `+' represents the match-one-or-more -operator; then `ca+r' matches, e.g., `car' and `caaaar', but not `cr'. - - -File: regex.info, Node: Match-zero-or-one Operator, Next: Interval Operators, Prev: Match-one-or-more Operator, Up: Repetition Operators - -The Match-zero-or-one Operator (`?' or `\?') --------------------------------------------- - - If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM' -isn't set, then `?' represents this operator; if it is, then `\?' does. - - This operator is similar to the match-zero-or-more operator except -that it repeats the preceding regular expression once or not at all; -*note Match-zero-or-more Operator::., to see what it operates on, how -some syntax bits affect it, and how Regex backtracks to match it. - - For example, supposing that `?' represents the match-zero-or-one -operator; then `ca?r' matches both `car' and `cr', but nothing else. - - -File: regex.info, Node: Interval Operators, Prev: Match-zero-or-one Operator, Up: Repetition Operators - -Interval Operators (`{' ... `}' or `\{' ... `\}') -------------------------------------------------- - - If the syntax bit `RE_INTERVALS' is set, then Regex recognizes -"interval expressions". They repeat the smallest possible preceding -regular expression a specified number of times. - - If the syntax bit `RE_NO_BK_BRACES' is set, `{' represents the -"open-interval operator" and `}' represents the "close-interval -operator" ; otherwise, `\{' and `\}' do. - - Specifically, supposing that `{' and `}' represent the open-interval -and close-interval operators; then: - -`{COUNT}' - matches exactly COUNT occurrences of the preceding regular - expression. - -`{MIN,}' - matches MIN or more occurrences of the preceding regular - expression. - -`{MIN, MAX}' - matches at least MIN but no more than MAX occurrences of the - preceding regular expression. - - The interval expression (but not necessarily the regular expression -that contains it) is invalid if: - - * MIN is greater than MAX, or - - * any of COUNT, MIN, or MAX are outside the range zero to - `RE_DUP_MAX' (which symbol `regex.h' defines). - - If the interval expression is invalid and the syntax bit -`RE_NO_BK_BRACES' is set, then Regex considers all the characters in -the would-be interval to be ordinary. If that bit isn't set, then the -regular expression is invalid. - - If the interval expression is valid but there is no preceding regular -expression on which to operate, then if the syntax bit -`RE_CONTEXT_INVALID_OPS' is set, the regular expression is invalid. If -that bit isn't set, then Regex considers all the characters--other than -backslashes, which it ignores--in the would-be interval to be ordinary. - - -File: regex.info, Node: Alternation Operator, Next: List Operators, Prev: Repetition Operators, Up: Common Operators - -The Alternation Operator (`|' or `\|') -====================================== - - If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit `RE_NO_BK_VBAR' -is set, then `|' represents this operator; otherwise, `\|' does. - - Alternatives match one of a choice of regular expressions: if you put -the character(s) representing the alternation operator between any two -regular expressions A and B, the result matches the union of the -strings that A and B match. For example, supposing that `|' is the -alternation operator, then `foo|bar|quux' would match any of `foo', -`bar' or `quux'. - - The alternation operator operates on the *largest* possible -surrounding regular expressions. (Put another way, it has the lowest -precedence of any regular expression operator.) Thus, the only way you -can delimit its arguments is to use grouping. For example, if `(' and -`)' are the open and close-group operators, then `fo(o|b)ar' would -match either `fooar' or `fobar'. (`foo|bar' would match `foo' or -`bar'.) - - The matcher usually tries all combinations of alternatives so as to -match the longest possible string. For example, when matching -`(fooq|foo)*(qbarquux|bar)' against `fooqbarquux', it cannot take, say, -the first ("depth-first") combination it could match, since then it -would be content to match just `fooqbar'. - - -File: regex.info, Node: List Operators, Next: Grouping Operators, Prev: Alternation Operator, Up: Common Operators - -List Operators (`[' ... `]' and `[^' ... `]') -============================================= - - "Lists", also called "bracket expressions", are a set of one or more -items. An "item" is a character, a character class expression, or a -range expression. The syntax bits affect which kinds of items you can -put in a list. We explain the last two items in subsections below. -Empty lists are invalid. - - A "matching list" matches a single character represented by one of -the list items. You form a matching list by enclosing one or more items -within an "open-matching-list operator" (represented by `[') and a -"close-list operator" (represented by `]'). - - For example, `[ab]' matches either `a' or `b'. `[ad]*' matches the -empty string and any string composed of just `a's and `d's in any -order. Regex considers invalid a regular expression with a `[' but no -matching `]'. - - "Nonmatching lists" are similar to matching lists except that they -match a single character *not* represented by one of the list items. -You use an "open-nonmatching-list operator" (represented by `[^'(1)) -instead of an open-matching-list operator to start a nonmatching list. - - For example, `[^ab]' matches any character except `a' or `b'. - - If the `posix_newline' field in the pattern buffer (*note GNU Pattern -Buffers::. is set, then nonmatching lists do not match a newline. - - Most characters lose any special meaning inside a list. The special -characters inside a list follow. - -`]' - ends the list if it's not the first list item. So, if you want to - make the `]' character a list item, you must put it first. - -`\' - quotes the next character if the syntax bit - `RE_BACKSLASH_ESCAPE_IN_LISTS' is set. - -`[:' - represents the open-character-class operator (*note Character - Class Operators::.) if the syntax bit `RE_CHAR_CLASSES' is set and - what follows is a valid character class expression. - -`:]' - represents the close-character-class operator if the syntax bit - `RE_CHAR_CLASSES' is set and what precedes it is an - open-character-class operator followed by a valid character class - name. - -`-' - represents the range operator (*note Range Operator::.) if it's - not first or last in a list or the ending point of a range. - -All other characters are ordinary. For example, `[.*]' matches `.' and -`*'. - -* Menu: - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - - ---------- Footnotes ---------- - - (1) Regex therefore doesn't consider the `^' to be the first -character in the list. If you put a `^' character first in (what you -think is) a matching list, you'll turn it into a nonmatching list. - - -File: regex.info, Node: Character Class Operators, Next: Range Operator, Up: List Operators - -Character Class Operators (`[:' ... `:]') ------------------------------------------ - - If the syntax bit `RE_CHARACTER_CLASSES' is set, then Regex -recognizes character class expressions inside lists. A "character -class expression" matches one character from a given class. You form a -character class expression by putting a character class name between an -"open-character-class operator" (represented by `[:') and a -"close-character-class operator" (represented by `:]'). The character -class names and their meanings are: - -`alnum' - letters and digits - -`alpha' - letters - -`blank' - system-dependent; for GNU, a space or tab - -`cntrl' - control characters (in the ASCII encoding, code 0177 and codes - less than 040) - -`digit' - digits - -`graph' - same as `print' except omits space - -`lower' - lowercase letters - -`print' - printable characters (in the ASCII encoding, space tilde--codes - 040 through 0176) - -`punct' - neither control nor alphanumeric characters - -`space' - space, carriage return, newline, vertical tab, and form feed - -`upper' - uppercase letters - -`xdigit' - hexadecimal digits: `0'-`9', `a'-`f', `A'-`F' - -These correspond to the definitions in the C library's `<ctype.h>' -facility. For example, `[:alpha:]' corresponds to the standard -facility `isalpha'. Regex recognizes character class expressions only -inside of lists; so `[[:alpha:]]' matches any letter, but `[:alpha:]' -outside of a bracket expression and not followed by a repetition -operator matches just itself. - - -File: regex.info, Node: Range Operator, Prev: Character Class Operators, Up: List Operators - -The Range Operator (`-') ------------------------- - - Regex recognizes "range expressions" inside a list. They represent -those characters that fall between two elements in the current -collating sequence. You form a range expression by putting a "range -operator" between two characters.(1) `-' represents the range operator. -For example, `a-f' within a list represents all the characters from `a' -through `f' inclusively. - - If the syntax bit `RE_NO_EMPTY_RANGES' is set, then if the range's -ending point collates less than its starting point, the range (and the -regular expression containing it) is invalid. For example, the regular -expression `[z-a]' would be invalid. If this bit isn't set, then Regex -considers such a range to be empty. - - Since `-' represents the range operator, if you want to make a `-' -character itself a list item, you must do one of the following: - - * Put the `-' either first or last in the list. - - * Include a range whose starting point collates strictly lower than - `-' and whose ending point collates equal or higher. Unless a - range is the first item in a list, a `-' can't be its starting - point, but *can* be its ending point. That is because Regex - considers `-' to be the range operator unless it is preceded by - another `-'. For example, in the ASCII encoding, `)', `*', `+', - `,', `-', `.', and `/' are contiguous characters in the collating - sequence. You might think that `[)-+--/]' has two ranges: `)-+' - and `--/'. Rather, it has the ranges `)-+' and `+--', plus the - character `/', so it matches, e.g., `,', not `.'. - - * Put a range whose starting point is `-' first in the list. - - For example, `[-a-z]' matches a lowercase letter or a hyphen (in -English, in ASCII). - - ---------- Footnotes ---------- - - (1) You can't use a character class for the starting or ending point -of a range, since a character class is not a single character. - - -File: regex.info, Node: Grouping Operators, Next: Back-reference Operator, Prev: List Operators, Up: Common Operators - -Grouping Operators (`(' ... `)' or `\(' ... `\)') -================================================= - - A "group", also known as a "subexpression", consists of an -"open-group operator", any number of other operators, and a -"close-group operator". Regex treats this sequence as a unit, just as -mathematics and programming languages treat a parenthesized expression -as a unit. - - Therefore, using "groups", you can: - - * delimit the argument(s) to an alternation operator (*note - Alternation Operator::.) or a repetition operator (*note - Repetition Operators::.). - - * keep track of the indices of the substring that matched a given - group. *Note Using Registers::, for a precise explanation. This - lets you: - - * use the back-reference operator (*note Back-reference - Operator::.). - - * use registers (*note Using Registers::.). - - If the syntax bit `RE_NO_BK_PARENS' is set, then `(' represents the -open-group operator and `)' represents the close-group operator; -otherwise, `\(' and `\)' do. - - If the syntax bit `RE_UNMATCHED_RIGHT_PAREN_ORD' is set and a -close-group operator has no matching open-group operator, then Regex -considers it to match `)'. - - -File: regex.info, Node: Back-reference Operator, Next: Anchoring Operators, Prev: Grouping Operators, Up: Common Operators - -The Back-reference Operator ("\"DIGIT) -====================================== - - If the syntax bit `RE_NO_BK_REF' isn't set, then Regex recognizes -back references. A back reference matches a specified preceding group. -The back reference operator is represented by `\DIGIT' anywhere after -the end of a regular expression's DIGIT-th group (*note Grouping -Operators::.). - - DIGIT must be between `1' and `9'. The matcher assigns numbers 1 -through 9 to the first nine groups it encounters. By using one of `\1' -through `\9' after the corresponding group's close-group operator, you -can match a substring identical to the one that the group does. - - Back references match according to the following (in all examples -below, `(' represents the open-group, `)' the close-group, `{' the -open-interval and `}' the close-interval operator): - - * If the group matches a substring, the back reference matches an - identical substring. For example, `(a)\1' matches `aa' and - `(bana)na\1bo\1' matches `bananabanabobana'. Likewise, `(.*)\1' - matches any (newline-free if the syntax bit `RE_DOT_NEWLINE' isn't - set) string that is composed of two identical halves; the `(.*)' - matches the first half and the `\1' matches the second half. - - * If the group matches more than once (as it might if followed by, - e.g., a repetition operator), then the back reference matches the - substring the group *last* matched. For example, `((a*)b)*\1\2' - matches `aabababa'; first group 1 (the outer one) matches `aab' - and group 2 (the inner one) matches `aa'. Then group 1 matches - `ab' and group 2 matches `a'. So, `\1' matches `ab' and `\2' - matches `a'. - - * If the group doesn't participate in a match, i.e., it is part of an - alternative not taken or a repetition operator allows zero - repetitions of it, then the back reference makes the whole match - fail. For example, `(one()|two())-and-(three\2|four\3)' matches - `one-and-three' and `two-and-four', but not `one-and-four' or - `two-and-three'. For example, if the pattern matches `one-and-', - then its group 2 matches the empty string and its group 3 doesn't - participate in the match. So, if it then matches `four', then - when it tries to back reference group 3--which it will attempt to - do because `\3' follows the `four'--the match will fail because - group 3 didn't participate in the match. - - You can use a back reference as an argument to a repetition operator. -For example, `(a(b))\2*' matches `a' followed by two or more `b's. -Similarly, `(a(b))\2{3}' matches `abbbb'. - - If there is no preceding DIGIT-th subexpression, the regular -expression is invalid. - - -File: regex.info, Node: Anchoring Operators, Prev: Back-reference Operator, Up: Common Operators - -Anchoring Operators -=================== - - These operators can constrain a pattern to match only at the -beginning or end of the entire string or at the beginning or end of a -line. - -* Menu: - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - - -File: regex.info, Node: Match-beginning-of-line Operator, Next: Match-end-of-line Operator, Up: Anchoring Operators - -The Match-beginning-of-line Operator (`^') ------------------------------------------- - - This operator can match the empty string either at the beginning of -the string or after a newline character. Thus, it is said to "anchor" -the pattern to the beginning of a line. - - In the cases following, `^' represents this operator. (Otherwise, -`^' is ordinary.) - - * It (the `^') is first in the pattern, as in `^foo'. - - * The syntax bit `RE_CONTEXT_INDEP_ANCHORS' is set, and it is outside - a bracket expression. - - * It follows an open-group or alternation operator, as in `a\(^b\)' - and `a\|^b'. *Note Grouping Operators::, and *Note Alternation - Operator::. - - These rules imply that some valid patterns containing `^' cannot be -matched; for example, `foo^bar' if `RE_CONTEXT_INDEP_ANCHORS' is set. - - If the `not_bol' field is set in the pattern buffer (*note GNU -Pattern Buffers::.), then `^' fails to match at the beginning of the -string. *Note POSIX Matching::, for when you might find this useful. - - If the `newline_anchor' field is set in the pattern buffer, then `^' -fails to match after a newline. This is useful when you do not regard -the string to be matched as broken into lines. - - -File: regex.info, Node: Match-end-of-line Operator, Prev: Match-beginning-of-line Operator, Up: Anchoring Operators - -The Match-end-of-line Operator (`$') ------------------------------------- - - This operator can match the empty string either at the end of the -string or before a newline character in the string. Thus, it is said -to "anchor" the pattern to the end of a line. - - It is always represented by `$'. For example, `foo$' usually -matches, e.g., `foo' and, e.g., the first three characters of -`foo\nbar'. - - Its interaction with the syntax bits and pattern buffer fields is -exactly the dual of `^''s; see the previous section. (That is, -"beginning" becomes "end", "next" becomes "previous", and "after" -becomes "before".) - - -File: regex.info, Node: GNU Operators, Next: GNU Emacs Operators, Prev: Common Operators, Up: Top - -GNU Operators -************* - - Following are operators that GNU defines (and POSIX doesn't). - -* Menu: - -* Word Operators:: -* Buffer Operators:: - - -File: regex.info, Node: Word Operators, Next: Buffer Operators, Up: GNU Operators - -Word Operators -============== - - The operators in this section require Regex to recognize parts of -words. Regex uses a syntax table to determine whether or not a -character is part of a word, i.e., whether or not it is -"word-constituent". - -* Menu: - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - - -File: regex.info, Node: Non-Emacs Syntax Tables, Next: Match-word-boundary Operator, Up: Word Operators - -Non-Emacs Syntax Tables ------------------------ - - A "syntax table" is an array indexed by the characters in your -character set. In the ASCII encoding, therefore, a syntax table has -256 elements. Regex always uses a `char *' variable `re_syntax_table' -as its syntax table. In some cases, it initializes this variable and -in others it expects you to initialize it. - - * If Regex is compiled with the preprocessor symbols `emacs' and - `SYNTAX_TABLE' both undefined, then Regex allocates - `re_syntax_table' and initializes an element I either to `Sword' - (which it defines) if I is a letter, number, or `_', or to zero if - it's not. - - * If Regex is compiled with `emacs' undefined but `SYNTAX_TABLE' - defined, then Regex expects you to define a `char *' variable - `re_syntax_table' to be a valid syntax table. - - * *Note Emacs Syntax Tables::, for what happens when Regex is - compiled with the preprocessor symbol `emacs' defined. - - -File: regex.info, Node: Match-word-boundary Operator, Next: Match-within-word Operator, Prev: Non-Emacs Syntax Tables, Up: Word Operators - -The Match-word-boundary Operator (`\b') ---------------------------------------- - - This operator (represented by `\b') matches the empty string at -either the beginning or the end of a word. For example, `\brat\b' -matches the separate word `rat'. - - -File: regex.info, Node: Match-within-word Operator, Next: Match-beginning-of-word Operator, Prev: Match-word-boundary Operator, Up: Word Operators - -The Match-within-word Operator (`\B') -------------------------------------- - - This operator (represented by `\B') matches the empty string within a -word. For example, `c\Brat\Be' matches `crate', but `dirty \Brat' -doesn't match `dirty rat'. - - -File: regex.info, Node: Match-beginning-of-word Operator, Next: Match-end-of-word Operator, Prev: Match-within-word Operator, Up: Word Operators - -The Match-beginning-of-word Operator (`\<') -------------------------------------------- - - This operator (represented by `\<') matches the empty string at the -beginning of a word. - - -File: regex.info, Node: Match-end-of-word Operator, Next: Match-word-constituent Operator, Prev: Match-beginning-of-word Operator, Up: Word Operators - -The Match-end-of-word Operator (`\>') -------------------------------------- - - This operator (represented by `\>') matches the empty string at the -end of a word. - - -File: regex.info, Node: Match-word-constituent Operator, Next: Match-non-word-constituent Operator, Prev: Match-end-of-word Operator, Up: Word Operators - -The Match-word-constituent Operator (`\w') ------------------------------------------- - - This operator (represented by `\w') matches any word-constituent -character. - - -File: regex.info, Node: Match-non-word-constituent Operator, Prev: Match-word-constituent Operator, Up: Word Operators - -The Match-non-word-constituent Operator (`\W') ----------------------------------------------- - - This operator (represented by `\W') matches any character that is not -word-constituent. - - -File: regex.info, Node: Buffer Operators, Prev: Word Operators, Up: GNU Operators - -Buffer Operators -================ - - Following are operators which work on buffers. In Emacs, a "buffer" -is, naturally, an Emacs buffer. For other programs, Regex considers the -entire string to be matched as the buffer. - -* Menu: - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - - -File: regex.info, Node: Match-beginning-of-buffer Operator, Next: Match-end-of-buffer Operator, Up: Buffer Operators - -The Match-beginning-of-buffer Operator (`\`') ---------------------------------------------- - - This operator (represented by `\`') matches the empty string at the -beginning of the buffer. - - -File: regex.info, Node: Match-end-of-buffer Operator, Prev: Match-beginning-of-buffer Operator, Up: Buffer Operators - -The Match-end-of-buffer Operator (`\'') ---------------------------------------- - - This operator (represented by `\'') matches the empty string at the -end of the buffer. - - -File: regex.info, Node: GNU Emacs Operators, Next: What Gets Matched?, Prev: GNU Operators, Up: Top - -GNU Emacs Operators -******************* - - Following are operators that GNU defines (and POSIX doesn't) that you -can use only when Regex is compiled with the preprocessor symbol -`emacs' defined. - -* Menu: - -* Syntactic Class Operators:: - - -File: regex.info, Node: Syntactic Class Operators, Up: GNU Emacs Operators - -Syntactic Class Operators -========================= - - The operators in this section require Regex to recognize the syntactic -classes of characters. Regex uses a syntax table to determine this. - -* Menu: - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - - -File: regex.info, Node: Emacs Syntax Tables, Next: Match-syntactic-class Operator, Up: Syntactic Class Operators - -Emacs Syntax Tables -------------------- - - A "syntax table" is an array indexed by the characters in your -character set. In the ASCII encoding, therefore, a syntax table has -256 elements. - - If Regex is compiled with the preprocessor symbol `emacs' defined, -then Regex expects you to define and initialize the variable -`re_syntax_table' to be an Emacs syntax table. Emacs' syntax tables -are more complicated than Regex's own (*note Non-Emacs Syntax -Tables::.). *Note Syntax: (emacs)Syntax, for a description of Emacs' -syntax tables. - - -File: regex.info, Node: Match-syntactic-class Operator, Next: Match-not-syntactic-class Operator, Prev: Emacs Syntax Tables, Up: Syntactic Class Operators - -The Match-syntactic-class Operator (`\s'CLASS) ----------------------------------------------- - - This operator matches any character whose syntactic class is -represented by a specified character. `\sCLASS' represents this -operator where CLASS is the character representing the syntactic class -you want. For example, `w' represents the syntactic class of -word-constituent characters, so `\sw' matches any word-constituent -character. - - -File: regex.info, Node: Match-not-syntactic-class Operator, Prev: Match-syntactic-class Operator, Up: Syntactic Class Operators - -The Match-not-syntactic-class Operator (`\S'CLASS) --------------------------------------------------- - - This operator is similar to the match-syntactic-class operator except -that it matches any character whose syntactic class is *not* -represented by the specified character. `\SCLASS' represents this -operator. For example, `w' represents the syntactic class of -word-constituent characters, so `\Sw' matches any character that is not -word-constituent. - - -File: regex.info, Node: What Gets Matched?, Next: Programming with Regex, Prev: GNU Emacs Operators, Up: Top - -What Gets Matched? -****************** - - Regex usually matches strings according to the "leftmost longest" -rule; that is, it chooses the longest of the leftmost matches. This -does not mean that for a regular expression containing subexpressions -that it simply chooses the longest match for each subexpression, left to -right; the overall match must also be the longest possible one. - - For example, `(ac*)(c*d[ac]*)\1' matches `acdacaaa', not `acdac', as -it would if it were to choose the longest match for the first -subexpression. - - -File: regex.info, Node: Programming with Regex, Next: Copying, Prev: What Gets Matched?, Up: Top - -Programming with Regex -********************** - - Here we describe how you use the Regex data structures and functions -in C programs. Regex has three interfaces: one designed for GNU, one -compatible with POSIX and one compatible with Berkeley UNIX. - -* Menu: - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - - -File: regex.info, Node: GNU Regex Functions, Next: POSIX Regex Functions, Up: Programming with Regex - -GNU Regex Functions -=================== - - If you're writing code that doesn't need to be compatible with either -POSIX or Berkeley UNIX, you can use these functions. They provide more -options than the other interfaces. - -* Menu: - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - - -File: regex.info, Node: GNU Pattern Buffers, Next: GNU Regular Expression Compiling, Up: GNU Regex Functions - -GNU Pattern Buffers -------------------- - - To compile, match, or search for a given regular expression, you must -supply a pattern buffer. A "pattern buffer" holds one compiled regular -expression.(1) - - You can have several different pattern buffers simultaneously, each -holding a compiled pattern for a different regular expression. - - `regex.h' defines the pattern buffer `struct' as follows: - - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ - #define REGS_UNALLOCATED 0 - #define REGS_REALLOCATE 1 - #define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - - ---------- Footnotes ---------- - - (1) Regular expressions are also referred to as "patterns," hence -the name "pattern buffer." - - -File: regex.info, Node: GNU Regular Expression Compiling, Next: GNU Matching, Prev: GNU Pattern Buffers, Up: GNU Regex Functions - -GNU Regular Expression Compiling --------------------------------- - - In GNU, you can both match and search for a given regular expression. -To do either, you must first compile it in a pattern buffer (*note GNU -Pattern Buffers::.). - - Regular expressions match according to the syntax with which they were -compiled; with GNU, you indicate what syntax you want by setting the -variable `re_syntax_options' (declared in `regex.h' and defined in -`regex.c') before calling the compiling function, `re_compile_pattern' -(see below). *Note Syntax Bits::, and *Note Predefined Syntaxes::. - - You can change the value of `re_syntax_options' at any time. -Usually, however, you set its value once and then never change it. - - `re_compile_pattern' takes a pattern buffer as an argument. You must -initialize the following fields: - -`translate initialization' -`translate' - Initialize this to point to a translate table if you want one, or - to zero if you don't. We explain translate tables in *Note GNU - Translate Tables::. - -`fastmap' - Initialize this to nonzero if you want a fastmap, or to zero if you - don't. - -`buffer' -`allocated' - If you want `re_compile_pattern' to allocate memory for the - compiled pattern, set both of these to zero. If you have an - existing block of memory (allocated with `malloc') you want Regex - to use, set `buffer' to its address and `allocated' to its size (in - bytes). - - `re_compile_pattern' uses `realloc' to extend the space for the - compiled pattern as necessary. - - To compile a pattern buffer, use: - - char * - re_compile_pattern (const char *REGEX, const int REGEX_SIZE, - struct re_pattern_buffer *PATTERN_BUFFER) - -REGEX is the regular expression's address, REGEX_SIZE is its length, -and PATTERN_BUFFER is the pattern buffer's address. - - If `re_compile_pattern' successfully compiles the regular expression, -it returns zero and sets `*PATTERN_BUFFER' to the compiled pattern. It -sets the pattern buffer's fields as follows: - -`buffer' - to the compiled pattern. - -`used' - to the number of bytes the compiled pattern in `buffer' occupies. - -`syntax' - to the current value of `re_syntax_options'. - -`re_nsub' - to the number of subexpressions in REGEX. - -`fastmap_accurate' - to zero on the theory that the pattern you're compiling is - different than the one previously compiled into `buffer'; in that - case (since you can't make a fastmap without a compiled pattern), - `fastmap' would either contain an incompatible fastmap, or nothing - at all. - - If `re_compile_pattern' can't compile REGEX, it returns an error -string corresponding to one of the errors listed in *Note POSIX Regular -Expression Compiling::. - - -File: regex.info, Node: GNU Matching, Next: GNU Searching, Prev: GNU Regular Expression Compiling, Up: GNU Regex Functions - -GNU Matching ------------- - - Matching the GNU way means trying to match as much of a string as -possible starting at a position within it you specify. Once you've -compiled a pattern into a pattern buffer (*note GNU Regular Expression -Compiling::.), you can ask the matcher to match that pattern against a -string using: - - int - re_match (struct re_pattern_buffer *PATTERN_BUFFER, - const char *STRING, const int SIZE, - const int START, struct re_registers *REGS) - -PATTERN_BUFFER is the address of a pattern buffer containing a compiled -pattern. STRING is the string you want to match; it can contain -newline and null characters. SIZE is the length of that string. START -is the string index at which you want to begin matching; the first -character of STRING is at index zero. *Note Using Registers::, for a -explanation of REGS; you can safely pass zero. - - `re_match' matches the regular expression in PATTERN_BUFFER against -the string STRING according to the syntax in PATTERN_BUFFERS's `syntax' -field. (*Note GNU Regular Expression Compiling::, for how to set it.) -The function returns -1 if the compiled pattern does not match any part -of STRING and -2 if an internal error happens; otherwise, it returns -how many (possibly zero) characters of STRING the pattern matched. - - An example: suppose PATTERN_BUFFER points to a pattern buffer -containing the compiled pattern for `a*', and STRING points to `aaaaab' -(whereupon SIZE should be 6). Then if START is 2, `re_match' returns 3, -i.e., `a*' would have matched the last three `a's in STRING. If START -is 0, `re_match' returns 5, i.e., `a*' would have matched all the `a's -in STRING. If START is either 5 or 6, it returns zero. - - If START is not between zero and SIZE, then `re_match' returns -1. - - -File: regex.info, Node: GNU Searching, Next: Matching/Searching with Split Data, Prev: GNU Matching, Up: GNU Regex Functions - -GNU Searching -------------- - - "Searching" means trying to match starting at successive positions -within a string. The function `re_search' does this. - - Before calling `re_search', you must compile your regular expression. -*Note GNU Regular Expression Compiling::. - - Here is the function declaration: - - int - re_search (struct re_pattern_buffer *PATTERN_BUFFER, - const char *STRING, const int SIZE, - const int START, const int RANGE, - struct re_registers *REGS) - -whose arguments are the same as those to `re_match' (*note GNU -Matching::.) except that the two arguments START and RANGE replace -`re_match''s argument START. - - If RANGE is positive, then `re_search' attempts a match starting -first at index START, then at START + 1 if that fails, and so on, up to -START + RANGE; if RANGE is negative, then it attempts a match starting -first at index START, then at START -1 if that fails, and so on. - - If START is not between zero and SIZE, then `re_search' returns -1. -When RANGE is positive, `re_search' adjusts RANGE so that START + RANGE -- 1 is between zero and SIZE, if necessary; that way it won't search -outside of STRING. Similarly, when RANGE is negative, `re_search' -adjusts RANGE so that START + RANGE + 1 is between zero and SIZE, if -necessary. - - If the `fastmap' field of PATTERN_BUFFER is zero, `re_search' matches -starting at consecutive positions; otherwise, it uses `fastmap' to make -the search more efficient. *Note Searching with Fastmaps::. - - If no match is found, `re_search' returns -1. If a match is found, -it returns the index where the match began. If an internal error -happens, it returns -2. - - -File: regex.info, Node: Matching/Searching with Split Data, Next: Searching with Fastmaps, Prev: GNU Searching, Up: GNU Regex Functions - -Matching and Searching with Split Data --------------------------------------- - - Using the functions `re_match_2' and `re_search_2', you can match or -search in data that is divided into two strings. - - The function: - - int - re_match_2 (struct re_pattern_buffer *BUFFER, - const char *STRING1, const int SIZE1, - const char *STRING2, const int SIZE2, - const int START, - struct re_registers *REGS, - const int STOP) - -is similar to `re_match' (*note GNU Matching::.) except that you pass -*two* data strings and sizes, and an index STOP beyond which you don't -want the matcher to try matching. As with `re_match', if it succeeds, -`re_match_2' returns how many characters of STRING it matched. Regard -STRING1 and STRING2 as concatenated when you set the arguments START and -STOP and use the contents of REGS; `re_match_2' never returns a value -larger than SIZE1 + SIZE2. - - The function: - - int - re_search_2 (struct re_pattern_buffer *BUFFER, - const char *STRING1, const int SIZE1, - const char *STRING2, const int SIZE2, - const int START, const int RANGE, - struct re_registers *REGS, - const int STOP) - -is similarly related to `re_search'. - - -File: regex.info, Node: Searching with Fastmaps, Next: GNU Translate Tables, Prev: Matching/Searching with Split Data, Up: GNU Regex Functions - -Searching with Fastmaps ------------------------ - - If you're searching through a long string, you should use a fastmap. -Without one, the searcher tries to match at consecutive positions in the -string. Generally, most of the characters in the string could not start -a match. It takes much longer to try matching at a given position in -the string than it does to check in a table whether or not the -character at that position could start a match. A "fastmap" is such a -table. - - More specifically, a fastmap is an array indexed by the characters in -your character set. Under the ASCII encoding, therefore, a fastmap has -256 elements. If you want the searcher to use a fastmap with a given -pattern buffer, you must allocate the array and assign the array's -address to the pattern buffer's `fastmap' field. You either can -compile the fastmap yourself or have `re_search' do it for you; when -`fastmap' is nonzero, it automatically compiles a fastmap the first -time you search using a particular compiled pattern. - - To compile a fastmap yourself, use: - - int - re_compile_fastmap (struct re_pattern_buffer *PATTERN_BUFFER) - -PATTERN_BUFFER is the address of a pattern buffer. If the character C -could start a match for the pattern, `re_compile_fastmap' makes -`PATTERN_BUFFER->fastmap[C]' nonzero. It returns 0 if it can compile a -fastmap and -2 if there is an internal error. For example, if `|' is -the alternation operator and PATTERN_BUFFER holds the compiled pattern -for `a|b', then `re_compile_fastmap' sets `fastmap['a']' and -`fastmap['b']' (and no others). - - `re_search' uses a fastmap as it moves along in the string: it checks -the string's characters until it finds one that's in the fastmap. Then -it tries matching at that character. If the match fails, it repeats -the process. So, by using a fastmap, `re_search' doesn't waste time -trying to match at positions in the string that couldn't start a match. - - If you don't want `re_search' to use a fastmap, store zero in the -`fastmap' field of the pattern buffer before calling `re_search'. - - Once you've initialized a pattern buffer's `fastmap' field, you need -never do so again--even if you compile a new pattern in it--provided -the way the field is set still reflects whether or not you want a -fastmap. `re_search' will still either do nothing if `fastmap' is null -or, if it isn't, compile a new fastmap for the new pattern. - - -File: regex.info, Node: GNU Translate Tables, Next: Using Registers, Prev: Searching with Fastmaps, Up: GNU Regex Functions - -GNU Translate Tables --------------------- - - If you set the `translate' field of a pattern buffer to a translate -table, then the GNU Regex functions to which you've passed that pattern -buffer use it to apply a simple transformation to all the regular -expression and string characters at which they look. - - A "translate table" is an array indexed by the characters in your -character set. Under the ASCII encoding, therefore, a translate table -has 256 elements. The array's elements are also characters in your -character set. When the Regex functions see a character C, they use -`translate[C]' in its place, with one exception: the character after a -`\' is not translated. (This ensures that, the operators, e.g., `\B' -and `\b', are always distinguishable.) - - For example, a table that maps all lowercase letters to the -corresponding uppercase ones would cause the matcher to ignore -differences in case.(1) Such a table would map all characters except -lowercase letters to themselves, and lowercase letters to the -corresponding uppercase ones. Under the ASCII encoding, here's how you -could initialize such a table (we'll call it `case_fold'): - - for (i = 0; i < 256; i++) - case_fold[i] = i; - for (i = 'a'; i <= 'z'; i++) - case_fold[i] = i - ('a' - 'A'); - - You tell Regex to use a translate table on a given pattern buffer by -assigning that table's address to the `translate' field of that buffer. -If you don't want Regex to do any translation, put zero into this -field. You'll get weird results if you change the table's contents -anytime between compiling the pattern buffer, compiling its fastmap, and -matching or searching with the pattern buffer. - - ---------- Footnotes ---------- - - (1) A table that maps all uppercase letters to the corresponding -lowercase ones would work just as well for this purpose. - - -File: regex.info, Node: Using Registers, Next: Freeing GNU Pattern Buffers, Prev: GNU Translate Tables, Up: GNU Regex Functions - -Using Registers ---------------- - - A group in a regular expression can match a (posssibly empty) -substring of the string that regular expression as a whole matched. -The matcher remembers the beginning and end of the substring matched by -each group. - - To find out what they matched, pass a nonzero REGS argument to a GNU -matching or searching function (*note GNU Matching::. and *Note GNU -Searching::), i.e., the address of a structure of this type, as defined -in `regex.h': - - struct re_registers - { - unsigned num_regs; - regoff_t *start; - regoff_t *end; - }; - - Except for (possibly) the NUM_REGS'th element (see below), the Ith -element of the `start' and `end' arrays records information about the -Ith group in the pattern. (They're declared as C pointers, but this is -only because not all C compilers accept zero-length arrays; -conceptually, it is simplest to think of them as arrays.) - - The `start' and `end' arrays are allocated in various ways, depending -on the value of the `regs_allocated' field in the pattern buffer passed -to the matcher. - - The simplest and perhaps most useful is to let the matcher -(re)allocate enough space to record information for all the groups in -the regular expression. If `regs_allocated' is `REGS_UNALLOCATED', the -matcher allocates 1 + RE_NSUB (another field in the pattern buffer; -*note GNU Pattern Buffers::.). The extra element is set to -1, and -sets `regs_allocated' to `REGS_REALLOCATE'. Then on subsequent calls -with the same pattern buffer and REGS arguments, the matcher -reallocates more space if necessary. - - It would perhaps be more logical to make the `regs_allocated' field -part of the `re_registers' structure, instead of part of the pattern -buffer. But in that case the caller would be forced to initialize the -structure before passing it. Much existing code doesn't do this -initialization, and it's arguably better to avoid it anyway. - - `re_compile_pattern' sets `regs_allocated' to `REGS_UNALLOCATED', so -if you use the GNU regular expression functions, you get this behavior -by default. - - xx document re_set_registers - - POSIX, on the other hand, requires a different interface: the caller -is supposed to pass in a fixed-length array which the matcher fills. -Therefore, if `regs_allocated' is `REGS_FIXED' the matcher simply fills -that array. - - The following examples illustrate the information recorded in the -`re_registers' structure. (In all of them, `(' represents the -open-group and `)' the close-group operator. The first character in -the string STRING is at index 0.) - - * If the regular expression has an I-th group not contained within - another group that matches a substring of STRING, then the - function sets `REGS->start[I]' to the index in STRING where the - substring matched by the I-th group begins, and `REGS->end[I]' to - the index just beyond that substring's end. The function sets - `REGS->start[0]' and `REGS->end[0]' to analogous information about - the entire pattern. - - For example, when you match `((a)(b))' against `ab', you get: - - * 0 in `REGS->start[0]' and 2 in `REGS->end[0]' - - * 0 in `REGS->start[1]' and 2 in `REGS->end[1]' - - * 0 in `REGS->start[2]' and 1 in `REGS->end[2]' - - * 1 in `REGS->start[3]' and 2 in `REGS->end[3]' - - * If a group matches more than once (as it might if followed by, - e.g., a repetition operator), then the function reports the - information about what the group *last* matched. - - For example, when you match the pattern `(a)*' against the string - `aa', you get: - - * 0 in `REGS->start[0]' and 2 in `REGS->end[0]' - - * 1 in `REGS->start[1]' and 2 in `REGS->end[1]' - - * If the I-th group does not participate in a successful match, - e.g., it is an alternative not taken or a repetition operator - allows zero repetitions of it, then the function sets - `REGS->start[I]' and `REGS->end[I]' to -1. - - For example, when you match the pattern `(a)*b' against the string - `b', you get: - - * 0 in `REGS->start[0]' and 1 in `REGS->end[0]' - - * -1 in `REGS->start[1]' and -1 in `REGS->end[1]' - - * If the I-th group matches a zero-length string, then the function - sets `REGS->start[I]' and `REGS->end[I]' to the index just beyond - that zero-length string. - - For example, when you match the pattern `(a*)b' against the string - `b', you get: - - * 0 in `REGS->start[0]' and 1 in `REGS->end[0]' - - * 0 in `REGS->start[1]' and 0 in `REGS->end[1]' - - * If an I-th group contains a J-th group in turn not contained - within any other group within group I and the function reports a - match of the I-th group, then it records in `REGS->start[J]' and - `REGS->end[J]' the last match (if it matched) of the J-th group. - - For example, when you match the pattern `((a*)b)*' against the - string `abb', group 2 last matches the empty string, so you get - what it previously matched: - - * 0 in `REGS->start[0]' and 3 in `REGS->end[0]' - - * 2 in `REGS->start[1]' and 3 in `REGS->end[1]' - - * 2 in `REGS->start[2]' and 2 in `REGS->end[2]' - - When you match the pattern `((a)*b)*' against the string `abb', - group 2 doesn't participate in the last match, so you get: - - * 0 in `REGS->start[0]' and 3 in `REGS->end[0]' - - * 2 in `REGS->start[1]' and 3 in `REGS->end[1]' - - * 0 in `REGS->start[2]' and 1 in `REGS->end[2]' - - * If an I-th group contains a J-th group in turn not contained - within any other group within group I and the function sets - `REGS->start[I]' and `REGS->end[I]' to -1, then it also sets - `REGS->start[J]' and `REGS->end[J]' to -1. - - For example, when you match the pattern `((a)*b)*c' against the - string `c', you get: - - * 0 in `REGS->start[0]' and 1 in `REGS->end[0]' - - * -1 in `REGS->start[1]' and -1 in `REGS->end[1]' - - * -1 in `REGS->start[2]' and -1 in `REGS->end[2]' - - -File: regex.info, Node: Freeing GNU Pattern Buffers, Prev: Using Registers, Up: GNU Regex Functions - -Freeing GNU Pattern Buffers ---------------------------- - - To free any allocated fields of a pattern buffer, you can use the -POSIX function described in *Note Freeing POSIX Pattern Buffers::, -since the type `regex_t'--the type for POSIX pattern buffers--is -equivalent to the type `re_pattern_buffer'. After freeing a pattern -buffer, you need to again compile a regular expression in it (*note GNU -Regular Expression Compiling::.) before passing it to a matching or -searching function. - - -File: regex.info, Node: POSIX Regex Functions, Next: BSD Regex Functions, Prev: GNU Regex Functions, Up: Programming with Regex - -POSIX Regex Functions -===================== - - If you're writing code that has to be POSIX compatible, you'll need -to use these functions. Their interfaces are as specified by POSIX, -draft 1003.2/D11.2. - -* Menu: - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - - -File: regex.info, Node: POSIX Pattern Buffers, Next: POSIX Regular Expression Compiling, Up: POSIX Regex Functions - -POSIX Pattern Buffers ---------------------- - - To compile or match a given regular expression the POSIX way, you -must supply a pattern buffer exactly the way you do for GNU (*note GNU -Pattern Buffers::.). POSIX pattern buffers have type `regex_t', which -is equivalent to the GNU pattern buffer type `re_pattern_buffer'. - - -File: regex.info, Node: POSIX Regular Expression Compiling, Next: POSIX Matching, Prev: POSIX Pattern Buffers, Up: POSIX Regex Functions - -POSIX Regular Expression Compiling ----------------------------------- - - With POSIX, you can only search for a given regular expression; you -can't match it. To do this, you must first compile it in a pattern -buffer, using `regcomp'. - - To compile a pattern buffer, use: - - int - regcomp (regex_t *PREG, const char *REGEX, int CFLAGS) - -PREG is the initialized pattern buffer's address, REGEX is the regular -expression's address, and CFLAGS is the compilation flags, which Regex -considers as a collection of bits. Here are the valid bits, as defined -in `regex.h': - -`REG_EXTENDED' - says to use POSIX Extended Regular Expression syntax; if this isn't - set, then says to use POSIX Basic Regular Expression syntax. - `regcomp' sets PREG's `syntax' field accordingly. - -`REG_ICASE' - says to ignore case; `regcomp' sets PREG's `translate' field to a - translate table which ignores case, replacing anything you've put - there before. - -`REG_NOSUB' - says to set PREG's `no_sub' field; *note POSIX Matching::., for - what this means. - -`REG_NEWLINE' - says that a: - - * match-any-character operator (*note Match-any-character - Operator::.) doesn't match a newline. - - * nonmatching list not containing a newline (*note List - Operators::.) matches a newline. - - * match-beginning-of-line operator (*note - Match-beginning-of-line Operator::.) matches the empty string - immediately after a newline, regardless of how `REG_NOTBOL' - is set (*note POSIX Matching::., for an explanation of - `REG_NOTBOL'). - - * match-end-of-line operator (*note Match-beginning-of-line - Operator::.) matches the empty string immediately before a - newline, regardless of how `REG_NOTEOL' is set (*note POSIX - Matching::., for an explanation of `REG_NOTEOL'). - - If `regcomp' successfully compiles the regular expression, it returns -zero and sets `*PATTERN_BUFFER' to the compiled pattern. Except for -`syntax' (which it sets as explained above), it also sets the same -fields the same way as does the GNU compiling function (*note GNU -Regular Expression Compiling::.). - - If `regcomp' can't compile the regular expression, it returns one of -the error codes listed here. (Except when noted differently, the -syntax of in all examples below is basic regular expression syntax.) - -`REG_BADRPT' - For example, the consecutive repetition operators `**' in `a**' - are invalid. As another example, if the syntax is extended - regular expression syntax, then the repetition operator `*' with - nothing on which to operate in `*' is invalid. - -`REG_BADBR' - For example, the COUNT `-1' in `a\{-1' is invalid. - -`REG_EBRACE' - For example, `a\{1' is missing a close-interval operator. - -`REG_EBRACK' - For example, `[a' is missing a close-list operator. - -`REG_ERANGE' - For example, the range ending point `z' that collates lower than - does its starting point `a' in `[z-a]' is invalid. Also, the - range with the character class `[:alpha:]' as its starting point in - `[[:alpha:]-|]'. - -`REG_ECTYPE' - For example, the character class name `foo' in `[[:foo:]' is - invalid. - -`REG_EPAREN' - For example, `a\)' is missing an open-group operator and `\(a' is - missing a close-group operator. - -`REG_ESUBREG' - For example, the back reference `\2' that refers to a nonexistent - subexpression in `\(a\)\2' is invalid. - -`REG_EEND' - Returned when a regular expression causes no other more specific - error. - -`REG_EESCAPE' - For example, the trailing backslash `\' in `a\' is invalid, as is - the one in `\'. - -`REG_BADPAT' - For example, in the extended regular expression syntax, the empty - group `()' in `a()b' is invalid. - -`REG_ESIZE' - Returned when a regular expression needs a pattern buffer larger - than 65536 bytes. - -`REG_ESPACE' - Returned when a regular expression makes Regex to run out of - memory. - - -File: regex.info, Node: POSIX Matching, Next: Reporting Errors, Prev: POSIX Regular Expression Compiling, Up: POSIX Regex Functions - -POSIX Matching --------------- - - Matching the POSIX way means trying to match a null-terminated string -starting at its first character. Once you've compiled a pattern into a -pattern buffer (*note POSIX Regular Expression Compiling::.), you can -ask the matcher to match that pattern against a string using: - - int - regexec (const regex_t *PREG, const char *STRING, - size_t NMATCH, regmatch_t PMATCH[], int EFLAGS) - -PREG is the address of a pattern buffer for a compiled pattern. STRING -is the string you want to match. - - *Note Using Byte Offsets::, for an explanation of PMATCH. If you -pass zero for NMATCH or you compiled PREG with the compilation flag -`REG_NOSUB' set, then `regexec' will ignore PMATCH; otherwise, you must -allocate it to have at least NMATCH elements. `regexec' will record -NMATCH byte offsets in PMATCH, and set to -1 any unused elements up to -PMATCH`[NMATCH]' - 1. - - EFLAGS specifies "execution flags"--namely, the two bits `REG_NOTBOL' -and `REG_NOTEOL' (defined in `regex.h'). If you set `REG_NOTBOL', then -the match-beginning-of-line operator (*note Match-beginning-of-line -Operator::.) always fails to match. This lets you match against pieces -of a line, as you would need to if, say, searching for repeated -instances of a given pattern in a line; it would work correctly for -patterns both with and without match-beginning-of-line operators. -`REG_NOTEOL' works analogously for the match-end-of-line operator -(*note Match-end-of-line Operator::.); it exists for symmetry. - - `regexec' tries to find a match for PREG in STRING according to the -syntax in PREG's `syntax' field. (*Note POSIX Regular Expression -Compiling::, for how to set it.) The function returns zero if the -compiled pattern matches STRING and `REG_NOMATCH' (defined in -`regex.h') if it doesn't. - - -File: regex.info, Node: Reporting Errors, Next: Using Byte Offsets, Prev: POSIX Matching, Up: POSIX Regex Functions - -Reporting Errors ----------------- - - If either `regcomp' or `regexec' fail, they return a nonzero error -code, the possibilities for which are defined in `regex.h'. *Note -POSIX Regular Expression Compiling::, and *Note POSIX Matching::, for -what these codes mean. To get an error string corresponding to these -codes, you can use: - - size_t - regerror (int ERRCODE, - const regex_t *PREG, - char *ERRBUF, - size_t ERRBUF_SIZE) - -ERRCODE is an error code, PREG is the address of the pattern buffer -which provoked the error, ERRBUF is the error buffer, and ERRBUF_SIZE -is ERRBUF's size. - - `regerror' returns the size in bytes of the error string -corresponding to ERRCODE (including its terminating null). If ERRBUF -and ERRBUF_SIZE are nonzero, it also returns in ERRBUF the first -ERRBUF_SIZE - 1 characters of the error string, followed by a null. -eRRBUF_SIZE must be a nonnegative number less than or equal to the size -in bytes of ERRBUF. - - You can call `regerror' with a null ERRBUF and a zero ERRBUF_SIZE to -determine how large ERRBUF need be to accommodate `regerror''s error -string. - - -File: regex.info, Node: Using Byte Offsets, Next: Freeing POSIX Pattern Buffers, Prev: Reporting Errors, Up: POSIX Regex Functions - -Using Byte Offsets ------------------- - - In POSIX, variables of type `regmatch_t' hold analogous information, -but are not identical to, GNU's registers (*note Using Registers::.). -To get information about registers in POSIX, pass to `regexec' a -nonzero PMATCH of type `regmatch_t', i.e., the address of a structure -of this type, defined in `regex.h': - - typedef struct - { - regoff_t rm_so; - regoff_t rm_eo; - } regmatch_t; - - When reading in *Note Using Registers::, about how the matching -function stores the information into the registers, substitute PMATCH -for REGS, `PMATCH[I]->rm_so' for `REGS->start[I]' and -`PMATCH[I]->rm_eo' for `REGS->end[I]'. - - -File: regex.info, Node: Freeing POSIX Pattern Buffers, Prev: Using Byte Offsets, Up: POSIX Regex Functions - -Freeing POSIX Pattern Buffers ------------------------------ - - To free any allocated fields of a pattern buffer, use: - - void - regfree (regex_t *PREG) - -PREG is the pattern buffer whose allocated fields you want freed. -`regfree' also sets PREG's `allocated' and `used' fields to zero. -After freeing a pattern buffer, you need to again compile a regular -expression in it (*note POSIX Regular Expression Compiling::.) before -passing it to the matching function (*note POSIX Matching::.). - - -File: regex.info, Node: BSD Regex Functions, Prev: POSIX Regex Functions, Up: Programming with Regex - -BSD Regex Functions -=================== - - If you're writing code that has to be Berkeley UNIX compatible, -you'll need to use these functions whose interfaces are the same as -those in Berkeley UNIX. - -* Menu: - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () - - -File: regex.info, Node: BSD Regular Expression Compiling, Next: BSD Searching, Up: BSD Regex Functions - -BSD Regular Expression Compiling --------------------------------- - - With Berkeley UNIX, you can only search for a given regular -expression; you can't match one. To search for it, you must first -compile it. Before you compile it, you must indicate the regular -expression syntax you want it compiled according to by setting the -variable `re_syntax_options' (declared in `regex.h' to some syntax -(*note Regular Expression Syntax::.). - - To compile a regular expression use: - - char * - re_comp (char *REGEX) - -REGEX is the address of a null-terminated regular expression. -`re_comp' uses an internal pattern buffer, so you can use only the most -recently compiled pattern buffer. This means that if you want to use a -given regular expression that you've already compiled--but it isn't the -latest one you've compiled--you'll have to recompile it. If you call -`re_comp' with the null string (*not* the empty string) as the -argument, it doesn't change the contents of the pattern buffer. - - If `re_comp' successfully compiles the regular expression, it returns -zero. If it can't compile the regular expression, it returns an error -string. `re_comp''s error messages are identical to those of -`re_compile_pattern' (*note GNU Regular Expression Compiling::.). - - -File: regex.info, Node: BSD Searching, Prev: BSD Regular Expression Compiling, Up: BSD Regex Functions - -BSD Searching -------------- - - Searching the Berkeley UNIX way means searching in a string starting -at its first character and trying successive positions within it to -find a match. Once you've compiled a pattern using `re_comp' (*note -BSD Regular Expression Compiling::.), you can ask Regex to search for -that pattern in a string using: - - int - re_exec (char *STRING) - -STRING is the address of the null-terminated string in which you want -to search. - - `re_exec' returns either 1 for success or 0 for failure. It -automatically uses a GNU fastmap (*note Searching with Fastmaps::.). - - -File: regex.info, Node: Copying, Next: Index, Prev: Programming with Regex, Up: Top - -GNU GENERAL PUBLIC LICENSE -************************** - - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -Preamble -======== - - The licenses for most software are designed to take away your freedom -to share and change it. By contrast, the GNU General Public License is -intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it in -new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 1. This License applies to any program or other work which contains a - notice placed by the copyright holder saying it may be distributed - under the terms of this General Public License. The "Program", - below, refers to any such program or work, and a "work based on - the Program" means either the Program or any derivative work under - copyright law: that is to say, a work containing the Program or a - portion of it, either verbatim or with modifications and/or - translated into another language. (Hereinafter, translation is - included without limitation in the term "modification".) Each - licensee is addressed as "you". - - Activities other than copying, distribution and modification are - not covered by this License; they are outside its scope. The act - of running the Program is not restricted, and the output from the - Program is covered only if its contents constitute a work based on - the Program (independent of having been made by running the - Program). Whether that is true depends on what the Program does. - - 2. You may copy and distribute verbatim copies of the Program's - source code as you receive it, in any medium, provided that you - conspicuously and appropriately publish on each copy an appropriate - copyright notice and disclaimer of warranty; keep intact all the - notices that refer to this License and to the absence of any - warranty; and give any other recipients of the Program a copy of - this License along with the Program. - - You may charge a fee for the physical act of transferring a copy, - and you may at your option offer warranty protection in exchange - for a fee. - - 3. You may modify your copy or copies of the Program or any portion - of it, thus forming a work based on the Program, and copy and - distribute such modifications or work under the terms of Section 1 - above, provided that you also meet all of these conditions: - - a. You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b. You must cause any work that you distribute or publish, that - in whole or in part contains or is derived from the Program - or any part thereof, to be licensed as a whole at no charge - to all third parties under the terms of this License. - - c. If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display - an announcement including an appropriate copyright notice and - a notice that there is no warranty (or else, saying that you - provide a warranty) and that users may redistribute the - program under these conditions, and telling the user how to - view a copy of this License. (Exception: if the Program - itself is interactive but does not normally print such an - announcement, your work based on the Program is not required - to print an announcement.) - - These requirements apply to the modified work as a whole. If - identifiable sections of that work are not derived from the - Program, and can be reasonably considered independent and separate - works in themselves, then this License, and its terms, do not - apply to those sections when you distribute them as separate - works. But when you distribute the same sections as part of a - whole which is a work based on the Program, the distribution of - the whole must be on the terms of this License, whose permissions - for other licensees extend to the entire whole, and thus to each - and every part regardless of who wrote it. - - Thus, it is not the intent of this section to claim rights or - contest your rights to work written entirely by you; rather, the - intent is to exercise the right to control the distribution of - derivative or collective works based on the Program. - - In addition, mere aggregation of another work not based on the - Program with the Program (or with a work based on the Program) on - a volume of a storage or distribution medium does not bring the - other work under the scope of this License. - - 4. You may copy and distribute the Program (or a work based on it, - under Section 2) in object code or executable form under the terms - of Sections 1 and 2 above provided that you also do one of the - following: - - a. Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of - Sections 1 and 2 above on a medium customarily used for - software interchange; or, - - b. Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a - medium customarily used for software interchange; or, - - c. Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with - such an offer, in accord with Subsection b above.) - - The source code for a work means the preferred form of the work for - making modifications to it. For an executable work, complete - source code means all the source code for all modules it contains, - plus any associated interface definition files, plus the scripts - used to control compilation and installation of the executable. - However, as a special exception, the source code distributed need - not include anything that is normally distributed (in either - source or binary form) with the major components (compiler, - kernel, and so on) of the operating system on which the executable - runs, unless that component itself accompanies the executable. - - If distribution of executable or object code is made by offering - access to copy from a designated place, then offering equivalent - access to copy the source code from the same place counts as - distribution of the source code, even though third parties are not - compelled to copy the source along with the object code. - - 5. You may not copy, modify, sublicense, or distribute the Program - except as expressly provided under this License. Any attempt - otherwise to copy, modify, sublicense or distribute the Program is - void, and will automatically terminate your rights under this - License. However, parties who have received copies, or rights, - from you under this License will not have their licenses - terminated so long as such parties remain in full compliance. - - 6. You are not required to accept this License, since you have not - signed it. However, nothing else grants you permission to modify - or distribute the Program or its derivative works. These actions - are prohibited by law if you do not accept this License. - Therefore, by modifying or distributing the Program (or any work - based on the Program), you indicate your acceptance of this - License to do so, and all its terms and conditions for copying, - distributing or modifying the Program or works based on it. - - 7. Each time you redistribute the Program (or any work based on the - Program), the recipient automatically receives a license from the - original licensor to copy, distribute or modify the Program - subject to these terms and conditions. You may not impose any - further restrictions on the recipients' exercise of the rights - granted herein. You are not responsible for enforcing compliance - by third parties to this License. - - 8. If, as a consequence of a court judgment or allegation of patent - infringement or for any other reason (not limited to patent - issues), conditions are imposed on you (whether by court order, - agreement or otherwise) that contradict the conditions of this - License, they do not excuse you from the conditions of this - License. If you cannot distribute so as to satisfy simultaneously - your obligations under this License and any other pertinent - obligations, then as a consequence you may not distribute the - Program at all. For example, if a patent license would not permit - royalty-free redistribution of the Program by all those who - receive copies directly or indirectly through you, then the only - way you could satisfy both it and this License would be to refrain - entirely from distribution of the Program. - - If any portion of this section is held invalid or unenforceable - under any particular circumstance, the balance of the section is - intended to apply and the section as a whole is intended to apply - in other circumstances. - - It is not the purpose of this section to induce you to infringe any - patents or other property right claims or to contest validity of - any such claims; this section has the sole purpose of protecting - the integrity of the free software distribution system, which is - implemented by public license practices. Many people have made - generous contributions to the wide range of software distributed - through that system in reliance on consistent application of that - system; it is up to the author/donor to decide if he or she is - willing to distribute software through any other system and a - licensee cannot impose that choice. - - This section is intended to make thoroughly clear what is believed - to be a consequence of the rest of this License. - - 9. If the distribution and/or use of the Program is restricted in - certain countries either by patents or by copyrighted interfaces, - the original copyright holder who places the Program under this - License may add an explicit geographical distribution limitation - excluding those countries, so that distribution is permitted only - in or among countries not thus excluded. In such case, this - License incorporates the limitation as if written in the body of - this License. - - 10. The Free Software Foundation may publish revised and/or new - versions of the General Public License from time to time. Such - new versions will be similar in spirit to the present version, but - may differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the - Program specifies a version number of this License which applies - to it and "any later version", you have the option of following - the terms and conditions either of that version or of any later - version published by the Free Software Foundation. If the Program - does not specify a version number of this License, you may choose - any version ever published by the Free Software Foundation. - - 11. If you wish to incorporate parts of the Program into other free - programs whose distribution conditions are different, write to the - author to ask for permission. For software which is copyrighted - by the Free Software Foundation, write to the Free Software - Foundation; we sometimes make exceptions for this. Our decision - will be guided by the two goals of preserving the free status of - all derivatives of our free software and of promoting the sharing - and reuse of software generally. - - NO WARRANTY - - 12. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO - WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE - LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT - HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT - WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT - NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE - QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE - PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY - SERVICING, REPAIR OR CORRECTION. - - 13. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN - WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY - MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE - LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, - INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR - INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF - DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU - OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY - OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - -Appendix: How to Apply These Terms to Your New Programs -======================================================= - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these -terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES. - Copyright (C) 19YY NAME OF AUTHOR - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Also add information on how to contact you by electronic and paper -mail. - - If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - - The hypothetical commands `show w' and `show c' should show the -appropriate parts of the General Public License. Of course, the -commands you use may be called something other than `show w' and `show -c'; they could even be mouse-clicks or menu items--whatever suits your -program. - - You should also get your employer (if you work as a programmer) or -your school, if any, to sign a "copyright disclaimer" for the program, -if necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - SIGNATURE OF TY COON, 1 April 1989 - Ty Coon, President of Vice - - This General Public License does not permit incorporating your -program into proprietary programs. If your program is a subroutine -library, you may consider it more useful to permit linking proprietary -applications with the library. If this is what you want to do, use the -GNU Library General Public License instead of this License. - - -File: regex.info, Node: Index, Prev: Copying, Up: Top - -Index -***** - -* Menu: - -* $: Match-end-of-line Operator. -* (: Grouping Operators. -* ): Grouping Operators. -* *: Match-zero-or-more Operator. -* +: Match-one-or-more Operator. -* -: List Operators. -* .: Match-any-character Operator. -* :] in regex: Character Class Operators. -* ?: Match-zero-or-one Operator. -* {: Interval Operators. -* }: Interval Operators. -* [: in regex: Character Class Operators. -* [^: List Operators. -* [: List Operators. -* \': Match-end-of-buffer Operator. -* \<: Match-beginning-of-word Operator. -* \>: Match-end-of-word Operator. -* \{: Interval Operators. -* \}: Interval Operators. -* \b: Match-word-boundary Operator. -* \B: Match-within-word Operator. -* \s: Match-syntactic-class Operator. -* \S: Match-not-syntactic-class Operator. -* \w: Match-word-constituent Operator. -* \W: Match-non-word-constituent Operator. -* \`: Match-beginning-of-buffer Operator. -* \: List Operators. -* ]: List Operators. -* ^: List Operators. -* allocated initialization: GNU Regular Expression Compiling. -* alternation operator: Alternation Operator. -* alternation operator and ^: Match-beginning-of-line Operator. -* anchoring: Anchoring Operators. -* anchors: Match-end-of-line Operator. -* anchors: Match-beginning-of-line Operator. -* Awk: Predefined Syntaxes. -* back references: Back-reference Operator. -* backtracking: Match-zero-or-more Operator. -* backtracking: Alternation Operator. -* beginning-of-line operator: Match-beginning-of-line Operator. -* bracket expression: List Operators. -* buffer field, set by re_compile_pattern: GNU Regular Expression Compiling. -* buffer initialization: GNU Regular Expression Compiling. -* character classes: Character Class Operators. -* Egrep: Predefined Syntaxes. -* Emacs: Predefined Syntaxes. -* end in struct re_registers: Using Registers. -* end-of-line operator: Match-end-of-line Operator. -* fastmap initialization: GNU Regular Expression Compiling. -* fastmaps: Searching with Fastmaps. -* fastmap_accurate field, set by re_compile_pattern: GNU Regular Expression Compiling. -* Grep: Predefined Syntaxes. -* grouping: Grouping Operators. -* ignoring case: POSIX Regular Expression Compiling. -* interval expression: Interval Operators. -* matching list: List Operators. -* matching newline: List Operators. -* matching with GNU functions: GNU Matching. -* newline_anchor field in pattern buffer: Match-beginning-of-line Operator. -* nonmatching list: List Operators. -* not_bol field in pattern buffer: Match-beginning-of-line Operator. -* num_regs in struct re_registers: Using Registers. -* open-group operator and ^: Match-beginning-of-line Operator. -* or operator: Alternation Operator. -* parenthesizing: Grouping Operators. -* pattern buffer initialization: GNU Regular Expression Compiling. -* pattern buffer, definition of: GNU Pattern Buffers. -* POSIX Awk: Predefined Syntaxes. -* range argument to re_search: GNU Searching. -* regex.c: Overview. -* regex.h: Overview. -* regexp anchoring: Anchoring Operators. -* regmatch_t: Using Byte Offsets. -* regs_allocated: Using Registers. -* REGS_FIXED: Using Registers. -* REGS_REALLOCATE: Using Registers. -* REGS_UNALLOCATED: Using Registers. -* regular expressions, syntax of: Regular Expression Syntax. -* REG_EXTENDED: POSIX Regular Expression Compiling. -* REG_ICASE: POSIX Regular Expression Compiling. -* REG_NEWLINE: POSIX Regular Expression Compiling. -* REG_NOSUB: POSIX Regular Expression Compiling. -* RE_BACKSLASH_ESCAPE_IN_LIST: Syntax Bits. -* RE_BK_PLUS_QM: Syntax Bits. -* RE_CHAR_CLASSES: Syntax Bits. -* RE_CONTEXT_INDEP_ANCHORS: Syntax Bits. -* RE_CONTEXT_INDEP_ANCHORS (and ^): Match-beginning-of-line Operator. -* RE_CONTEXT_INDEP_OPS: Syntax Bits. -* RE_CONTEXT_INVALID_OPS: Syntax Bits. -* RE_DOT_NEWLINE: Syntax Bits. -* RE_DOT_NOT_NULL: Syntax Bits. -* RE_INTERVALS: Syntax Bits. -* RE_LIMITED_OPS: Syntax Bits. -* RE_NEWLINE_ALT: Syntax Bits. -* RE_NO_BK_BRACES: Syntax Bits. -* RE_NO_BK_PARENS: Syntax Bits. -* RE_NO_BK_REFS: Syntax Bits. -* RE_NO_BK_VBAR: Syntax Bits. -* RE_NO_EMPTY_RANGES: Syntax Bits. -* re_nsub field, set by re_compile_pattern: GNU Regular Expression Compiling. -* re_pattern_buffer definition: GNU Pattern Buffers. -* re_registers: Using Registers. -* re_syntax_options initialization: GNU Regular Expression Compiling. -* RE_UNMATCHED_RIGHT_PAREN_ORD: Syntax Bits. -* searching with GNU functions: GNU Searching. -* start argument to re_search: GNU Searching. -* start in struct re_registers: Using Registers. -* struct re_pattern_buffer definition: GNU Pattern Buffers. -* subexpressions: Grouping Operators. -* syntax field, set by re_compile_pattern: GNU Regular Expression Compiling. -* syntax bits: Syntax Bits. -* syntax initialization: GNU Regular Expression Compiling. -* syntax of regular expressions: Regular Expression Syntax. -* translate initialization: GNU Regular Expression Compiling. -* used field, set by re_compile_pattern: GNU Regular Expression Compiling. -* word boundaries, matching: Match-word-boundary Operator. -* \: The Backslash Character. -* \(: Grouping Operators. -* \): Grouping Operators. -* \|: Alternation Operator. -* ^: Match-beginning-of-line Operator. -* |: Alternation Operator. - - - -Tag Table: -Node: Top1064 -Node: Overview4562 -Node: Regular Expression Syntax6746 -Node: Syntax Bits7916 -Node: Predefined Syntaxes14018 -Node: Collating Elements vs. Characters17872 -Node: The Backslash Character18835 -Node: Common Operators21992 -Node: Match-self Operator23445 -Node: Match-any-character Operator23941 -Node: Concatenation Operator24520 -Node: Repetition Operators25017 -Node: Match-zero-or-more Operator25436 -Node: Match-one-or-more Operator27483 -Node: Match-zero-or-one Operator28341 -Node: Interval Operators29196 -Node: Alternation Operator30991 -Node: List Operators32489 -Node: Character Class Operators35272 -Node: Range Operator36901 -Node: Grouping Operators38930 -Node: Back-reference Operator40251 -Node: Anchoring Operators43073 -Node: Match-beginning-of-line Operator43447 -Node: Match-end-of-line Operator44779 -Node: GNU Operators45518 -Node: Word Operators45767 -Node: Non-Emacs Syntax Tables46391 -Node: Match-word-boundary Operator47465 -Node: Match-within-word Operator47858 -Node: Match-beginning-of-word Operator48255 -Node: Match-end-of-word Operator48588 -Node: Match-word-constituent Operator48908 -Node: Match-non-word-constituent Operator49234 -Node: Buffer Operators49545 -Node: Match-beginning-of-buffer Operator49952 -Node: Match-end-of-buffer Operator50264 -Node: GNU Emacs Operators50558 -Node: Syntactic Class Operators50901 -Node: Emacs Syntax Tables51307 -Node: Match-syntactic-class Operator51963 -Node: Match-not-syntactic-class Operator52560 -Node: What Gets Matched?53150 -Node: Programming with Regex53799 -Node: GNU Regex Functions54237 -Node: GNU Pattern Buffers55078 -Node: GNU Regular Expression Compiling58303 -Node: GNU Matching61181 -Node: GNU Searching63101 -Node: Matching/Searching with Split Data64913 -Node: Searching with Fastmaps66369 -Node: GNU Translate Tables68921 -Node: Using Registers70892 -Node: Freeing GNU Pattern Buffers77000 -Node: POSIX Regex Functions77593 -Node: POSIX Pattern Buffers78266 -Node: POSIX Regular Expression Compiling78709 -Node: POSIX Matching82836 -Node: Reporting Errors84791 -Node: Using Byte Offsets86048 -Node: Freeing POSIX Pattern Buffers86861 -Node: BSD Regex Functions87467 -Node: BSD Regular Expression Compiling87886 -Node: BSD Searching89258 -Node: Copying89960 -Node: Index109122 - -End Tag Table diff --git a/gnu/libregex/doc/regex.texi b/gnu/libregex/doc/regex.texi deleted file mode 100644 index d93953ece20c..000000000000 --- a/gnu/libregex/doc/regex.texi +++ /dev/null @@ -1,3138 +0,0 @@ -\input texinfo -@c %**start of header -@setfilename regex.info -@settitle Regex -@c %**end of header - -@c \\{fill-paragraph} works better (for me, anyway) if the text in the -@c source file isn't indented. -@paragraphindent 2 - -@c Define a new index for our magic constants. -@defcodeindex cn - -@c Put everything in one index (arbitrarily chosen to be the concept index). -@syncodeindex cn cp -@syncodeindex ky cp -@syncodeindex pg cp -@syncodeindex tp cp -@syncodeindex vr cp - -@c Here is what we use in the Info `dir' file: -@c * Regex: (regex). Regular expression library. - - -@ifinfo -This file documents the GNU regular expression library. - -Copyright (C) 1992, 1993 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries a copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). -@end ignore - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled ``GNU General Public License'' is included exactly as -in the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that the section entitled ``GNU General Public License'' may be -included in a translation approved by the Free Software Foundation -instead of in the original English. -@end ifinfo - - -@titlepage - -@title Regex -@subtitle edition 0.12a -@subtitle 19 September 1992 -@author Kathryn A. Hargreaves -@author Karl Berry - -@page - -@vskip 0pt plus 1filll -Copyright @copyright{} 1992 Free Software Foundation. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled ``GNU General Public License'' is included exactly as -in the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this -one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that the section entitled ``GNU General Public License'' may be -included in a translation approved by the Free Software Foundation -instead of in the original English. - -@end titlepage - - -@ifinfo -@node Top, Overview, (dir), (dir) -@top Regular Expression Library - -This manual documents how to program with the GNU regular expression -library. This is edition 0.12a of the manual, 19 September 1992. - -The first part of this master menu lists the major nodes in this Info -document, including the index. The rest of the menu lists all the -lower level nodes in the document. - -@menu -* Overview:: -* Regular Expression Syntax:: -* Common Operators:: -* GNU Operators:: -* GNU Emacs Operators:: -* What Gets Matched?:: -* Programming with Regex:: -* Copying:: Copying and sharing Regex. -* Index:: General index. - --- The Detailed Node Listing --- - -Regular Expression Syntax - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - -Common Operators - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? @{@} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - -Repetition Operators - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: @{@} - -List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]}) - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - -Anchoring Operators - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - -GNU Operators - -* Word Operators:: -* Buffer Operators:: - -Word Operators - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - -Buffer Operators - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - -GNU Emacs Operators - -* Syntactic Class Operators:: - -Syntactic Class Operators - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - -Programming with Regex - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - -GNU Regex Functions - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - -POSIX Regex Functions - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - -BSD Regex Functions - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () -@end menu -@end ifinfo -@node Overview, Regular Expression Syntax, Top, Top -@chapter Overview - -A @dfn{regular expression} (or @dfn{regexp}, or @dfn{pattern}) is a text -string that describes some (mathematical) set of strings. A regexp -@var{r} @dfn{matches} a string @var{s} if @var{s} is in the set of -strings described by @var{r}. - -Using the Regex library, you can: - -@itemize @bullet - -@item -see if a string matches a specified pattern as a whole, and - -@item -search within a string for a substring matching a specified pattern. - -@end itemize - -Some regular expressions match only one string, i.e., the set they -describe has only one member. For example, the regular expression -@samp{foo} matches the string @samp{foo} and no others. Other regular -expressions match more than one string, i.e., the set they describe has -more than one member. For example, the regular expression @samp{f*} -matches the set of strings made up of any number (including zero) of -@samp{f}s. As you can see, some characters in regular expressions match -themselves (such as @samp{f}) and some don't (such as @samp{*}); the -ones that don't match themselves instead let you specify patterns that -describe many different strings. - -To either match or search for a regular expression with the Regex -library functions, you must first compile it with a Regex pattern -compiling function. A @dfn{compiled pattern} is a regular expression -converted to the internal format used by the library functions. Once -you've compiled a pattern, you can use it for matching or searching any -number of times. - -The Regex library consists of two source files: @file{regex.h} and -@file{regex.c}. -@pindex regex.h -@pindex regex.c -Regex provides three groups of functions with which you can operate on -regular expressions. One group---the @sc{gnu} group---is more powerful -but not completely compatible with the other two, namely the @sc{posix} -and Berkeley @sc{unix} groups; its interface was designed specifically -for @sc{gnu}. The other groups have the same interfaces as do the -regular expression functions in @sc{posix} and Berkeley -@sc{unix}. - -We wrote this chapter with programmers in mind, not users of -programs---such as Emacs---that use Regex. We describe the Regex -library in its entirety, not how to write regular expressions that a -particular program understands. - - -@node Regular Expression Syntax, Common Operators, Overview, Top -@chapter Regular Expression Syntax - -@cindex regular expressions, syntax of -@cindex syntax of regular expressions - -@dfn{Characters} are things you can type. @dfn{Operators} are things in -a regular expression that match one or more characters. You compose -regular expressions from operators, which in turn you specify using one -or more characters. - -Most characters represent what we call the match-self operator, i.e., -they match themselves; we call these characters @dfn{ordinary}. Other -characters represent either all or parts of fancier operators; e.g., -@samp{.} represents what we call the match-any-character operator -(which, no surprise, matches (almost) any character); we call these -characters @dfn{special}. Two different things determine what -characters represent what operators: - -@enumerate -@item -the regular expression syntax your program has told the Regex library to -recognize, and - -@item -the context of the character in the regular expression. -@end enumerate - -In the following sections, we describe these things in more detail. - -@menu -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: -@end menu - - -@node Syntax Bits, Predefined Syntaxes, , Regular Expression Syntax -@section Syntax Bits - -@cindex syntax bits - -In any particular syntax for regular expressions, some characters are -always special, others are sometimes special, and others are never -special. The particular syntax that Regex recognizes for a given -regular expression depends on the value in the @code{syntax} field of -the pattern buffer of that regular expression. - -You get a pattern buffer by compiling a regular expression. @xref{GNU -Pattern Buffers}, and @ref{POSIX Pattern Buffers}, for more information -on pattern buffers. @xref{GNU Regular Expression Compiling}, @ref{POSIX -Regular Expression Compiling}, and @ref{BSD Regular Expression -Compiling}, for more information on compiling. - -Regex considers the value of the @code{syntax} field to be a collection -of bits; we refer to these bits as @dfn{syntax bits}. In most cases, -they affect what characters represent what operators. We describe the -meanings of the operators to which we refer in @ref{Common Operators}, -@ref{GNU Operators}, and @ref{GNU Emacs Operators}. - -For reference, here is the complete list of syntax bits, in alphabetical -order: - -@table @code - -@cnindex RE_BACKSLASH_ESCAPE_IN_LIST -@item RE_BACKSLASH_ESCAPE_IN_LISTS -If this bit is set, then @samp{\} inside a list (@pxref{List Operators} -quotes (makes ordinary, if it's special) the following character; if -this bit isn't set, then @samp{\} is an ordinary character inside lists. -(@xref{The Backslash Character}, for what `\' does outside of lists.) - -@cnindex RE_BK_PLUS_QM -@item RE_BK_PLUS_QM -If this bit is set, then @samp{\+} represents the match-one-or-more -operator and @samp{\?} represents the match-zero-or-more operator; if -this bit isn't set, then @samp{+} represents the match-one-or-more -operator and @samp{?} represents the match-zero-or-one operator. This -bit is irrelevant if @code{RE_LIMITED_OPS} is set. - -@cnindex RE_CHAR_CLASSES -@item RE_CHAR_CLASSES -If this bit is set, then you can use character classes in lists; if this -bit isn't set, then you can't. - -@cnindex RE_CONTEXT_INDEP_ANCHORS -@item RE_CONTEXT_INDEP_ANCHORS -If this bit is set, then @samp{^} and @samp{$} are special anywhere outside -a list; if this bit isn't set, then these characters are special only in -certain contexts. @xref{Match-beginning-of-line Operator}, and -@ref{Match-end-of-line Operator}. - -@cnindex RE_CONTEXT_INDEP_OPS -@item RE_CONTEXT_INDEP_OPS -If this bit is set, then certain characters are special anywhere outside -a list; if this bit isn't set, then those characters are special only in -some contexts and are ordinary elsewhere. Specifically, if this bit -isn't set then @samp{*}, and (if the syntax bit @code{RE_LIMITED_OPS} -isn't set) @samp{+} and @samp{?} (or @samp{\+} and @samp{\?}, depending -on the syntax bit @code{RE_BK_PLUS_QM}) represent repetition operators -only if they're not first in a regular expression or just after an -open-group or alternation operator. The same holds for @samp{@{} (or -@samp{\@{}, depending on the syntax bit @code{RE_NO_BK_BRACES}) if -it is the beginning of a valid interval and the syntax bit -@code{RE_INTERVALS} is set. - -@cnindex RE_CONTEXT_INVALID_OPS -@item RE_CONTEXT_INVALID_OPS -If this bit is set, then repetition and alternation operators can't be -in certain positions within a regular expression. Specifically, the -regular expression is invalid if it has: - -@itemize @bullet - -@item -a repetition operator first in the regular expression or just after a -match-beginning-of-line, open-group, or alternation operator; or - -@item -an alternation operator first or last in the regular expression, just -before a match-end-of-line operator, or just after an alternation or -open-group operator. - -@end itemize - -If this bit isn't set, then you can put the characters representing the -repetition and alternation characters anywhere in a regular expression. -Whether or not they will in fact be operators in certain positions -depends on other syntax bits. - -@cnindex RE_DOT_NEWLINE -@item RE_DOT_NEWLINE -If this bit is set, then the match-any-character operator matches -a newline; if this bit isn't set, then it doesn't. - -@cnindex RE_DOT_NOT_NULL -@item RE_DOT_NOT_NULL -If this bit is set, then the match-any-character operator doesn't match -a null character; if this bit isn't set, then it does. - -@cnindex RE_INTERVALS -@item RE_INTERVALS -If this bit is set, then Regex recognizes interval operators; if this bit -isn't set, then it doesn't. - -@cnindex RE_LIMITED_OPS -@item RE_LIMITED_OPS -If this bit is set, then Regex doesn't recognize the match-one-or-more, -match-zero-or-one or alternation operators; if this bit isn't set, then -it does. - -@cnindex RE_NEWLINE_ALT -@item RE_NEWLINE_ALT -If this bit is set, then newline represents the alternation operator; if -this bit isn't set, then newline is ordinary. - -@cnindex RE_NO_BK_BRACES -@item RE_NO_BK_BRACES -If this bit is set, then @samp{@{} represents the open-interval operator -and @samp{@}} represents the close-interval operator; if this bit isn't -set, then @samp{\@{} represents the open-interval operator and -@samp{\@}} represents the close-interval operator. This bit is relevant -only if @code{RE_INTERVALS} is set. - -@cnindex RE_NO_BK_PARENS -@item RE_NO_BK_PARENS -If this bit is set, then @samp{(} represents the open-group operator and -@samp{)} represents the close-group operator; if this bit isn't set, then -@samp{\(} represents the open-group operator and @samp{\)} represents -the close-group operator. - -@cnindex RE_NO_BK_REFS -@item RE_NO_BK_REFS -If this bit is set, then Regex doesn't recognize @samp{\}@var{digit} as -the back reference operator; if this bit isn't set, then it does. - -@cnindex RE_NO_BK_VBAR -@item RE_NO_BK_VBAR -If this bit is set, then @samp{|} represents the alternation operator; -if this bit isn't set, then @samp{\|} represents the alternation -operator. This bit is irrelevant if @code{RE_LIMITED_OPS} is set. - -@cnindex RE_NO_EMPTY_RANGES -@item RE_NO_EMPTY_RANGES -If this bit is set, then a regular expression with a range whose ending -point collates lower than its starting point is invalid; if this bit -isn't set, then Regex considers such a range to be empty. - -@cnindex RE_UNMATCHED_RIGHT_PAREN_ORD -@item RE_UNMATCHED_RIGHT_PAREN_ORD -If this bit is set and the regular expression has no matching open-group -operator, then Regex considers what would otherwise be a close-group -operator (based on how @code{RE_NO_BK_PARENS} is set) to match @samp{)}. - -@end table - - -@node Predefined Syntaxes, Collating Elements vs. Characters, Syntax Bits, Regular Expression Syntax -@section Predefined Syntaxes - -If you're programming with Regex, you can set a pattern buffer's -(@pxref{GNU Pattern Buffers}, and @ref{POSIX Pattern Buffers}) -@code{syntax} field either to an arbitrary combination of syntax bits -(@pxref{Syntax Bits}) or else to the configurations defined by Regex. -These configurations define the syntaxes used by certain -programs---@sc{gnu} Emacs, -@cindex Emacs -@sc{posix} Awk, -@cindex POSIX Awk -traditional Awk, -@cindex Awk -Grep, -@cindex Grep -@cindex Egrep -Egrep---in addition to syntaxes for @sc{posix} basic and extended -regular expressions. - -The predefined syntaxes--taken directly from @file{regex.h}---are: - -@example -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -#define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) - -/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - -/* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) -@end example - -@node Collating Elements vs. Characters, The Backslash Character, Predefined Syntaxes, Regular Expression Syntax -@section Collating Elements vs.@: Characters - -@sc{posix} generalizes the notion of a character to that of a -collating element. It defines a @dfn{collating element} to be ``a -sequence of one or more bytes defined in the current collating sequence -as a unit of collation.'' - -This generalizes the notion of a character in -two ways. First, a single character can map into two or more collating -elements. For example, the German -@tex -`\ss' -@end tex -@ifinfo -``es-zet'' -@end ifinfo -collates as the collating element @samp{s} followed by another collating -element @samp{s}. Second, two or more characters can map into one -collating element. For example, the Spanish @samp{ll} collates after -@samp{l} and before @samp{m}. - -Since @sc{posix}'s ``collating element'' preserves the essential idea of -a ``character,'' we use the latter, more familiar, term in this document. - -@node The Backslash Character, , Collating Elements vs. Characters, Regular Expression Syntax -@section The Backslash Character - -@cindex \ -The @samp{\} character has one of four different meanings, depending on -the context in which you use it and what syntax bits are set -(@pxref{Syntax Bits}). It can: 1) stand for itself, 2) quote the next -character, 3) introduce an operator, or 4) do nothing. - -@enumerate -@item -It stands for itself inside a list -(@pxref{List Operators}) if the syntax bit -@code{RE_BACKSLASH_ESCAPE_IN_LISTS} is not set. For example, @samp{[\]} -would match @samp{\}. - -@item -It quotes (makes ordinary, if it's special) the next character when you -use it either: - -@itemize @bullet -@item -outside a list,@footnote{Sometimes -you don't have to explicitly quote special characters to make -them ordinary. For instance, most characters lose any special meaning -inside a list (@pxref{List Operators}). In addition, if the syntax bits -@code{RE_CONTEXT_INVALID_OPS} and @code{RE_CONTEXT_INDEP_OPS} -aren't set, then (for historical reasons) the matcher considers special -characters ordinary if they are in contexts where the operations they -represent make no sense; for example, then the match-zero-or-more -operator (represented by @samp{*}) matches itself in the regular -expression @samp{*foo} because there is no preceding expression on which -it can operate. It is poor practice, however, to depend on this -behavior; if you want a special character to be ordinary outside a list, -it's better to always quote it, regardless.} or - -@item -inside a list and the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is set. - -@end itemize - -@item -It introduces an operator when followed by certain ordinary -characters---sometimes only when certain syntax bits are set. See the -cases @code{RE_BK_PLUS_QM}, @code{RE_NO_BK_BRACES}, @code{RE_NO_BK_VAR}, -@code{RE_NO_BK_PARENS}, @code{RE_NO_BK_REF} in @ref{Syntax Bits}. Also: - -@itemize @bullet -@item -@samp{\b} represents the match-word-boundary operator -(@pxref{Match-word-boundary Operator}). - -@item -@samp{\B} represents the match-within-word operator -(@pxref{Match-within-word Operator}). - -@item -@samp{\<} represents the match-beginning-of-word operator @* -(@pxref{Match-beginning-of-word Operator}). - -@item -@samp{\>} represents the match-end-of-word operator -(@pxref{Match-end-of-word Operator}). - -@item -@samp{\w} represents the match-word-constituent operator -(@pxref{Match-word-constituent Operator}). - -@item -@samp{\W} represents the match-non-word-constituent operator -(@pxref{Match-non-word-constituent Operator}). - -@item -@samp{\`} represents the match-beginning-of-buffer -operator and @samp{\'} represents the match-end-of-buffer operator -(@pxref{Buffer Operators}). - -@item -If Regex was compiled with the C preprocessor symbol @code{emacs} -defined, then @samp{\s@var{class}} represents the match-syntactic-class -operator and @samp{\S@var{class}} represents the -match-not-syntactic-class operator (@pxref{Syntactic Class Operators}). - -@end itemize - -@item -In all other cases, Regex ignores @samp{\}. For example, -@samp{\n} matches @samp{n}. - -@end enumerate - -@node Common Operators, GNU Operators, Regular Expression Syntax, Top -@chapter Common Operators - -You compose regular expressions from operators. In the following -sections, we describe the regular expression operators specified by -@sc{posix}; @sc{gnu} also uses these. Most operators have more than one -representation as characters. @xref{Regular Expression Syntax}, for -what characters represent what operators under what circumstances. - -For most operators that can be represented in two ways, one -representation is a single character and the other is that character -preceded by @samp{\}. For example, either @samp{(} or @samp{\(} -represents the open-group operator. Which one does depends on the -setting of a syntax bit, in this case @code{RE_NO_BK_PARENS}. Why is -this so? Historical reasons dictate some of the varying -representations, while @sc{posix} dictates others. - -Finally, almost all characters lose any special meaning inside a list -(@pxref{List Operators}). - -@menu -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? @{@} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ -@end menu - -@node Match-self Operator, Match-any-character Operator, , Common Operators -@section The Match-self Operator (@var{ordinary character}) - -This operator matches the character itself. All ordinary characters -(@pxref{Regular Expression Syntax}) represent this operator. For -example, @samp{f} is always an ordinary character, so the regular -expression @samp{f} matches only the string @samp{f}. In -particular, it does @emph{not} match the string @samp{ff}. - -@node Match-any-character Operator, Concatenation Operator, Match-self Operator, Common Operators -@section The Match-any-character Operator (@code{.}) - -@cindex @samp{.} - -This operator matches any single printing or nonprinting character -except it won't match a: - -@table @asis -@item newline -if the syntax bit @code{RE_DOT_NEWLINE} isn't set. - -@item null -if the syntax bit @code{RE_DOT_NOT_NULL} is set. - -@end table - -The @samp{.} (period) character represents this operator. For example, -@samp{a.b} matches any three-character string beginning with @samp{a} -and ending with @samp{b}. - -@node Concatenation Operator, Repetition Operators, Match-any-character Operator, Common Operators -@section The Concatenation Operator - -This operator concatenates two regular expressions @var{a} and @var{b}. -No character represents this operator; you simply put @var{b} after -@var{a}. The result is a regular expression that will match a string if -@var{a} matches its first part and @var{b} matches the rest. For -example, @samp{xy} (two match-self operators) matches @samp{xy}. - -@node Repetition Operators, Alternation Operator, Concatenation Operator, Common Operators -@section Repetition Operators - -Repetition operators repeat the preceding regular expression a specified -number of times. - -@menu -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: @{@} -@end menu - -@node Match-zero-or-more Operator, Match-one-or-more Operator, , Repetition Operators -@subsection The Match-zero-or-more Operator (@code{*}) - -@cindex @samp{*} - -This operator repeats the smallest possible preceding regular expression -as many times as necessary (including zero) to match the pattern. -@samp{*} represents this operator. For example, @samp{o*} -matches any string made up of zero or more @samp{o}s. Since this -operator operates on the smallest preceding regular expression, -@samp{fo*} has a repeating @samp{o}, not a repeating @samp{fo}. So, -@samp{fo*} matches @samp{f}, @samp{fo}, @samp{foo}, and so on. - -Since the match-zero-or-more operator is a suffix operator, it may be -useless as such when no regular expression precedes it. This is the -case when it: - -@itemize @bullet -@item -is first in a regular expression, or - -@item -follows a match-beginning-of-line, open-group, or alternation -operator. - -@end itemize - -@noindent -Three different things can happen in these cases: - -@enumerate -@item -If the syntax bit @code{RE_CONTEXT_INVALID_OPS} is set, then the -regular expression is invalid. - -@item -If @code{RE_CONTEXT_INVALID_OPS} isn't set, but -@code{RE_CONTEXT_INDEP_OPS} is, then @samp{*} represents the -match-zero-or-more operator (which then operates on the empty string). - -@item -Otherwise, @samp{*} is ordinary. - -@end enumerate - -@cindex backtracking -The matcher processes a match-zero-or-more operator by first matching as -many repetitions of the smallest preceding regular expression as it can. -Then it continues to match the rest of the pattern. - -If it can't match the rest of the pattern, it backtracks (as many times -as necessary), each time discarding one of the matches until it can -either match the entire pattern or be certain that it cannot get a -match. For example, when matching @samp{ca*ar} against @samp{caaar}, -the matcher first matches all three @samp{a}s of the string with the -@samp{a*} of the regular expression. However, it cannot then match the -final @samp{ar} of the regular expression against the final @samp{r} of -the string. So it backtracks, discarding the match of the last @samp{a} -in the string. It can then match the remaining @samp{ar}. - - -@node Match-one-or-more Operator, Match-zero-or-one Operator, Match-zero-or-more Operator, Repetition Operators -@subsection The Match-one-or-more Operator (@code{+} or @code{\+}) - -@cindex @samp{+} - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't recognize -this operator. Otherwise, if the syntax bit @code{RE_BK_PLUS_QM} isn't -set, then @samp{+} represents this operator; if it is, then @samp{\+} -does. - -This operator is similar to the match-zero-or-more operator except that -it repeats the preceding regular expression at least once; -@pxref{Match-zero-or-more Operator}, for what it operates on, how some -syntax bits affect it, and how Regex backtracks to match it. - -For example, supposing that @samp{+} represents the match-one-or-more -operator; then @samp{ca+r} matches, e.g., @samp{car} and -@samp{caaaar}, but not @samp{cr}. - -@node Match-zero-or-one Operator, Interval Operators, Match-one-or-more Operator, Repetition Operators -@subsection The Match-zero-or-one Operator (@code{?} or @code{\?}) -@cindex @samp{?} - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit -@code{RE_BK_PLUS_QM} isn't set, then @samp{?} represents this operator; -if it is, then @samp{\?} does. - -This operator is similar to the match-zero-or-more operator except that -it repeats the preceding regular expression once or not at all; -@pxref{Match-zero-or-more Operator}, to see what it operates on, how -some syntax bits affect it, and how Regex backtracks to match it. - -For example, supposing that @samp{?} represents the match-zero-or-one -operator; then @samp{ca?r} matches both @samp{car} and @samp{cr}, but -nothing else. - -@node Interval Operators, , Match-zero-or-one Operator, Repetition Operators -@subsection Interval Operators (@code{@{} @dots{} @code{@}} or @code{\@{} @dots{} @code{\@}}) - -@cindex interval expression -@cindex @samp{@{} -@cindex @samp{@}} -@cindex @samp{\@{} -@cindex @samp{\@}} - -If the syntax bit @code{RE_INTERVALS} is set, then Regex recognizes -@dfn{interval expressions}. They repeat the smallest possible preceding -regular expression a specified number of times. - -If the syntax bit @code{RE_NO_BK_BRACES} is set, @samp{@{} represents -the @dfn{open-interval operator} and @samp{@}} represents the -@dfn{close-interval operator} ; otherwise, @samp{\@{} and @samp{\@}} do. - -Specifically, supposing that @samp{@{} and @samp{@}} represent the -open-interval and close-interval operators; then: - -@table @code -@item @{@var{count}@} -matches exactly @var{count} occurrences of the preceding regular -expression. - -@item @{@var{min,}@} -matches @var{min} or more occurrences of the preceding regular -expression. - -@item @{@var{min, max}@} -matches at least @var{min} but no more than @var{max} occurrences of -the preceding regular expression. - -@end table - -The interval expression (but not necessarily the regular expression that -contains it) is invalid if: - -@itemize @bullet -@item -@var{min} is greater than @var{max}, or - -@item -any of @var{count}, @var{min}, or @var{max} are outside the range -zero to @code{RE_DUP_MAX} (which symbol @file{regex.h} -defines). - -@end itemize - -If the interval expression is invalid and the syntax bit -@code{RE_NO_BK_BRACES} is set, then Regex considers all the -characters in the would-be interval to be ordinary. If that bit -isn't set, then the regular expression is invalid. - -If the interval expression is valid but there is no preceding regular -expression on which to operate, then if the syntax bit -@code{RE_CONTEXT_INVALID_OPS} is set, the regular expression is invalid. -If that bit isn't set, then Regex considers all the characters---other -than backslashes, which it ignores---in the would-be interval to be -ordinary. - - -@node Alternation Operator, List Operators, Repetition Operators, Common Operators -@section The Alternation Operator (@code{|} or @code{\|}) - -@kindex | -@kindex \| -@cindex alternation operator -@cindex or operator - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit -@code{RE_NO_BK_VBAR} is set, then @samp{|} represents this operator; -otherwise, @samp{\|} does. - -Alternatives match one of a choice of regular expressions: -if you put the character(s) representing the alternation operator between -any two regular expressions @var{a} and @var{b}, the result matches -the union of the strings that @var{a} and @var{b} match. For -example, supposing that @samp{|} is the alternation operator, then -@samp{foo|bar|quux} would match any of @samp{foo}, @samp{bar} or -@samp{quux}. - -@ignore -@c Nobody needs to disallow empty alternatives any more. -If the syntax bit @code{RE_NO_EMPTY_ALTS} is set, then if either of the regular -expressions @var{a} or @var{b} is empty, the -regular expression is invalid. More precisely, if this syntax bit is -set, then the alternation operator can't: - -@itemize @bullet -@item -be first or last in a regular expression; - -@item -follow either another alternation operator or an open-group operator -(@pxref{Grouping Operators}); or - -@item -precede a close-group operator. - -@end itemize - -@noindent -For example, supposing @samp{(} and @samp{)} represent the open and -close-group operators, then @samp{|foo}, @samp{foo|}, @samp{foo||bar}, -@samp{foo(|bar)}, and @samp{(foo|)bar} would all be invalid. -@end ignore - -The alternation operator operates on the @emph{largest} possible -surrounding regular expressions. (Put another way, it has the lowest -precedence of any regular expression operator.) -Thus, the only way you can -delimit its arguments is to use grouping. For example, if @samp{(} and -@samp{)} are the open and close-group operators, then @samp{fo(o|b)ar} -would match either @samp{fooar} or @samp{fobar}. (@samp{foo|bar} would -match @samp{foo} or @samp{bar}.) - -@cindex backtracking -The matcher usually tries all combinations of alternatives so as to -match the longest possible string. For example, when matching -@samp{(fooq|foo)*(qbarquux|bar)} against @samp{fooqbarquux}, it cannot -take, say, the first (``depth-first'') combination it could match, since -then it would be content to match just @samp{fooqbar}. - -@comment xx something about leftmost-longest - - -@node List Operators, Grouping Operators, Alternation Operator, Common Operators -@section List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]}) - -@cindex matching list -@cindex @samp{[} -@cindex @samp{]} -@cindex @samp{^} -@cindex @samp{-} -@cindex @samp{\} -@cindex @samp{[^} -@cindex nonmatching list -@cindex matching newline -@cindex bracket expression - -@dfn{Lists}, also called @dfn{bracket expressions}, are a set of one or -more items. An @dfn{item} is a character, -@ignore -(These get added when they get implemented.) -a collating symbol, an equivalence class expression, -@end ignore -a character class expression, or a range expression. The syntax bits -affect which kinds of items you can put in a list. We explain the last -two items in subsections below. Empty lists are invalid. - -A @dfn{matching list} matches a single character represented by one of -the list items. You form a matching list by enclosing one or more items -within an @dfn{open-matching-list operator} (represented by @samp{[}) -and a @dfn{close-list operator} (represented by @samp{]}). - -For example, @samp{[ab]} matches either @samp{a} or @samp{b}. -@samp{[ad]*} matches the empty string and any string composed of just -@samp{a}s and @samp{d}s in any order. Regex considers invalid a regular -expression with a @samp{[} but no matching -@samp{]}. - -@dfn{Nonmatching lists} are similar to matching lists except that they -match a single character @emph{not} represented by one of the list -items. You use an @dfn{open-nonmatching-list operator} (represented by -@samp{[^}@footnote{Regex therefore doesn't consider the @samp{^} to be -the first character in the list. If you put a @samp{^} character first -in (what you think is) a matching list, you'll turn it into a -nonmatching list.}) instead of an open-matching-list operator to start a -nonmatching list. - -For example, @samp{[^ab]} matches any character except @samp{a} or -@samp{b}. - -If the @code{posix_newline} field in the pattern buffer (@pxref{GNU -Pattern Buffers} is set, then nonmatching lists do not match a newline. - -Most characters lose any special meaning inside a list. The special -characters inside a list follow. - -@table @samp -@item ] -ends the list if it's not the first list item. So, if you want to make -the @samp{]} character a list item, you must put it first. - -@item \ -quotes the next character if the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is -set. - -@ignore -Put these in if they get implemented. - -@item [. -represents the open-collating-symbol operator (@pxref{Collating Symbol -Operators}). - -@item .] -represents the close-collating-symbol operator. - -@item [= -represents the open-equivalence-class operator (@pxref{Equivalence Class -Operators}). - -@item =] -represents the close-equivalence-class operator. - -@end ignore - -@item [: -represents the open-character-class operator (@pxref{Character Class -Operators}) if the syntax bit @code{RE_CHAR_CLASSES} is set and what -follows is a valid character class expression. - -@item :] -represents the close-character-class operator if the syntax bit -@code{RE_CHAR_CLASSES} is set and what precedes it is an -open-character-class operator followed by a valid character class name. - -@item - -represents the range operator (@pxref{Range Operator}) if it's -not first or last in a list or the ending point of a range. - -@end table - -@noindent -All other characters are ordinary. For example, @samp{[.*]} matches -@samp{.} and @samp{*}. - -@menu -* Character Class Operators:: [:class:] -* Range Operator:: start-end -@end menu - -@ignore -(If collating symbols and equivalence class expressions get implemented, -then add this.) - -node Collating Symbol Operators -subsubsection Collating Symbol Operators (@code{[.} @dots{} @code{.]}) - -If the syntax bit @code{XX} is set, then you can represent -collating symbols inside lists. You form a @dfn{collating symbol} by -putting a collating element between an @dfn{open-collating-symbol -operator} and an @dfn{close-collating-symbol operator}. @samp{[.} -represents the open-collating-symbol operator and @samp{.]} represents -the close-collating-symbol operator. For example, if @samp{ll} is a -collating element, then @samp{[[.ll.]]} would match @samp{ll}. - -node Equivalence Class Operators -subsubsection Equivalence Class Operators (@code{[=} @dots{} @code{=]}) -@cindex equivalence class expression in regex -@cindex @samp{[=} in regex -@cindex @samp{=]} in regex - -If the syntax bit @code{XX} is set, then Regex recognizes equivalence class -expressions inside lists. A @dfn{equivalence class expression} is a set -of collating elements which all belong to the same equivalence class. -You form an equivalence class expression by putting a collating -element between an @dfn{open-equivalence-class operator} and a -@dfn{close-equivalence-class operator}. @samp{[=} represents the -open-equivalence-class operator and @samp{=]} represents the -close-equivalence-class operator. For example, if @samp{a} and @samp{A} -were an equivalence class, then both @samp{[[=a=]]} and @samp{[[=A=]]} -would match both @samp{a} and @samp{A}. If the collating element in an -equivalence class expression isn't part of an equivalence class, then -the matcher considers the equivalence class expression to be a collating -symbol. - -@end ignore - -@node Character Class Operators, Range Operator, , List Operators -@subsection Character Class Operators (@code{[:} @dots{} @code{:]}) - -@cindex character classes -@cindex @samp{[:} in regex -@cindex @samp{:]} in regex - -If the syntax bit @code{RE_CHARACTER_CLASSES} is set, then Regex -recognizes character class expressions inside lists. A @dfn{character -class expression} matches one character from a given class. You form a -character class expression by putting a character class name between an -@dfn{open-character-class operator} (represented by @samp{[:}) and a -@dfn{close-character-class operator} (represented by @samp{:]}). The -character class names and their meanings are: - -@table @code - -@item alnum -letters and digits - -@item alpha -letters - -@item blank -system-dependent; for @sc{gnu}, a space or tab - -@item cntrl -control characters (in the @sc{ascii} encoding, code 0177 and codes -less than 040) - -@item digit -digits - -@item graph -same as @code{print} except omits space - -@item lower -lowercase letters - -@item print -printable characters (in the @sc{ascii} encoding, space -tilde---codes 040 through 0176) - -@item punct -neither control nor alphanumeric characters - -@item space -space, carriage return, newline, vertical tab, and form feed - -@item upper -uppercase letters - -@item xdigit -hexadecimal digits: @code{0}--@code{9}, @code{a}--@code{f}, @code{A}--@code{F} - -@end table - -@noindent -These correspond to the definitions in the C library's @file{<ctype.h>} -facility. For example, @samp{[:alpha:]} corresponds to the standard -facility @code{isalpha}. Regex recognizes character class expressions -only inside of lists; so @samp{[[:alpha:]]} matches any letter, but -@samp{[:alpha:]} outside of a bracket expression and not followed by a -repetition operator matches just itself. - -@node Range Operator, , Character Class Operators, List Operators -@subsection The Range Operator (@code{-}) - -Regex recognizes @dfn{range expressions} inside a list. They represent -those characters -that fall between two elements in the current collating sequence. You -form a range expression by putting a @dfn{range operator} between two -@ignore -(If these get implemented, then substitute this for ``characters.'') -of any of the following: characters, collating elements, collating symbols, -and equivalence class expressions. The starting point of the range and -the ending point of the range don't have to be the same kind of item, -e.g., the starting point could be a collating element and the ending -point could be an equivalence class expression. If a range's ending -point is an equivalence class, then all the collating elements in that -class will be in the range. -@end ignore -characters.@footnote{You can't use a character class for the starting -or ending point of a range, since a character class is not a single -character.} @samp{-} represents the range operator. For example, -@samp{a-f} within a list represents all the characters from @samp{a} -through @samp{f} -inclusively. - -If the syntax bit @code{RE_NO_EMPTY_RANGES} is set, then if the range's -ending point collates less than its starting point, the range (and the -regular expression containing it) is invalid. For example, the regular -expression @samp{[z-a]} would be invalid. If this bit isn't set, then -Regex considers such a range to be empty. - -Since @samp{-} represents the range operator, if you want to make a -@samp{-} character itself -a list item, you must do one of the following: - -@itemize @bullet -@item -Put the @samp{-} either first or last in the list. - -@item -Include a range whose starting point collates strictly lower than -@samp{-} and whose ending point collates equal or higher. Unless a -range is the first item in a list, a @samp{-} can't be its starting -point, but @emph{can} be its ending point. That is because Regex -considers @samp{-} to be the range operator unless it is preceded by -another @samp{-}. For example, in the @sc{ascii} encoding, @samp{)}, -@samp{*}, @samp{+}, @samp{,}, @samp{-}, @samp{.}, and @samp{/} are -contiguous characters in the collating sequence. You might think that -@samp{[)-+--/]} has two ranges: @samp{)-+} and @samp{--/}. Rather, it -has the ranges @samp{)-+} and @samp{+--}, plus the character @samp{/}, so -it matches, e.g., @samp{,}, not @samp{.}. - -@item -Put a range whose starting point is @samp{-} first in the list. - -@end itemize - -For example, @samp{[-a-z]} matches a lowercase letter or a hyphen (in -English, in @sc{ascii}). - - -@node Grouping Operators, Back-reference Operator, List Operators, Common Operators -@section Grouping Operators (@code{(} @dots{} @code{)} or @code{\(} @dots{} @code{\)}) - -@kindex ( -@kindex ) -@kindex \( -@kindex \) -@cindex grouping -@cindex subexpressions -@cindex parenthesizing - -A @dfn{group}, also known as a @dfn{subexpression}, consists of an -@dfn{open-group operator}, any number of other operators, and a -@dfn{close-group operator}. Regex treats this sequence as a unit, just -as mathematics and programming languages treat a parenthesized -expression as a unit. - -Therefore, using @dfn{groups}, you can: - -@itemize @bullet -@item -delimit the argument(s) to an alternation operator (@pxref{Alternation -Operator}) or a repetition operator (@pxref{Repetition -Operators}). - -@item -keep track of the indices of the substring that matched a given group. -@xref{Using Registers}, for a precise explanation. -This lets you: - -@itemize @bullet -@item -use the back-reference operator (@pxref{Back-reference Operator}). - -@item -use registers (@pxref{Using Registers}). - -@end itemize - -@end itemize - -If the syntax bit @code{RE_NO_BK_PARENS} is set, then @samp{(} represents -the open-group operator and @samp{)} represents the -close-group operator; otherwise, @samp{\(} and @samp{\)} do. - -If the syntax bit @code{RE_UNMATCHED_RIGHT_PAREN_ORD} is set and a -close-group operator has no matching open-group operator, then Regex -considers it to match @samp{)}. - - -@node Back-reference Operator, Anchoring Operators, Grouping Operators, Common Operators -@section The Back-reference Operator (@dfn{\}@var{digit}) - -@cindex back references - -If the syntax bit @code{RE_NO_BK_REF} isn't set, then Regex recognizes -back references. A back reference matches a specified preceding group. -The back reference operator is represented by @samp{\@var{digit}} -anywhere after the end of a regular expression's @w{@var{digit}-th} -group (@pxref{Grouping Operators}). - -@var{digit} must be between @samp{1} and @samp{9}. The matcher assigns -numbers 1 through 9 to the first nine groups it encounters. By using -one of @samp{\1} through @samp{\9} after the corresponding group's -close-group operator, you can match a substring identical to the -one that the group does. - -Back references match according to the following (in all examples below, -@samp{(} represents the open-group, @samp{)} the close-group, @samp{@{} -the open-interval and @samp{@}} the close-interval operator): - -@itemize @bullet -@item -If the group matches a substring, the back reference matches an -identical substring. For example, @samp{(a)\1} matches @samp{aa} and -@samp{(bana)na\1bo\1} matches @samp{bananabanabobana}. Likewise, -@samp{(.*)\1} matches any (newline-free if the syntax bit -@code{RE_DOT_NEWLINE} isn't set) string that is composed of two -identical halves; the @samp{(.*)} matches the first half and the -@samp{\1} matches the second half. - -@item -If the group matches more than once (as it might if followed -by, e.g., a repetition operator), then the back reference matches the -substring the group @emph{last} matched. For example, -@samp{((a*)b)*\1\2} matches @samp{aabababa}; first @w{group 1} (the -outer one) matches @samp{aab} and @w{group 2} (the inner one) matches -@samp{aa}. Then @w{group 1} matches @samp{ab} and @w{group 2} matches -@samp{a}. So, @samp{\1} matches @samp{ab} and @samp{\2} matches -@samp{a}. - -@item -If the group doesn't participate in a match, i.e., it is part of an -alternative not taken or a repetition operator allows zero repetitions -of it, then the back reference makes the whole match fail. For example, -@samp{(one()|two())-and-(three\2|four\3)} matches @samp{one-and-three} -and @samp{two-and-four}, but not @samp{one-and-four} or -@samp{two-and-three}. For example, if the pattern matches -@samp{one-and-}, then its @w{group 2} matches the empty string and its -@w{group 3} doesn't participate in the match. So, if it then matches -@samp{four}, then when it tries to back reference @w{group 3}---which it -will attempt to do because @samp{\3} follows the @samp{four}---the match -will fail because @w{group 3} didn't participate in the match. - -@end itemize - -You can use a back reference as an argument to a repetition operator. For -example, @samp{(a(b))\2*} matches @samp{a} followed by two or more -@samp{b}s. Similarly, @samp{(a(b))\2@{3@}} matches @samp{abbbb}. - -If there is no preceding @w{@var{digit}-th} subexpression, the regular -expression is invalid. - - -@node Anchoring Operators, , Back-reference Operator, Common Operators -@section Anchoring Operators - -@cindex anchoring -@cindex regexp anchoring - -These operators can constrain a pattern to match only at the beginning or -end of the entire string or at the beginning or end of a line. - -@menu -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ -@end menu - - -@node Match-beginning-of-line Operator, Match-end-of-line Operator, , Anchoring Operators -@subsection The Match-beginning-of-line Operator (@code{^}) - -@kindex ^ -@cindex beginning-of-line operator -@cindex anchors - -This operator can match the empty string either at the beginning of the -string or after a newline character. Thus, it is said to @dfn{anchor} -the pattern to the beginning of a line. - -In the cases following, @samp{^} represents this operator. (Otherwise, -@samp{^} is ordinary.) - -@itemize @bullet - -@item -It (the @samp{^}) is first in the pattern, as in @samp{^foo}. - -@cnindex RE_CONTEXT_INDEP_ANCHORS @r{(and @samp{^})} -@item -The syntax bit @code{RE_CONTEXT_INDEP_ANCHORS} is set, and it is outside -a bracket expression. - -@cindex open-group operator and @samp{^} -@cindex alternation operator and @samp{^} -@item -It follows an open-group or alternation operator, as in @samp{a\(^b\)} -and @samp{a\|^b}. @xref{Grouping Operators}, and @ref{Alternation -Operator}. - -@end itemize - -These rules imply that some valid patterns containing @samp{^} cannot be -matched; for example, @samp{foo^bar} if @code{RE_CONTEXT_INDEP_ANCHORS} -is set. - -@vindex not_bol @r{field in pattern buffer} -If the @code{not_bol} field is set in the pattern buffer (@pxref{GNU -Pattern Buffers}), then @samp{^} fails to match at the beginning of the -string. @xref{POSIX Matching}, for when you might find this useful. - -@vindex newline_anchor @r{field in pattern buffer} -If the @code{newline_anchor} field is set in the pattern buffer, then -@samp{^} fails to match after a newline. This is useful when you do not -regard the string to be matched as broken into lines. - - -@node Match-end-of-line Operator, , Match-beginning-of-line Operator, Anchoring Operators -@subsection The Match-end-of-line Operator (@code{$}) - -@kindex $ -@cindex end-of-line operator -@cindex anchors - -This operator can match the empty string either at the end of -the string or before a newline character in the string. Thus, it is -said to @dfn{anchor} the pattern to the end of a line. - -It is always represented by @samp{$}. For example, @samp{foo$} usually -matches, e.g., @samp{foo} and, e.g., the first three characters of -@samp{foo\nbar}. - -Its interaction with the syntax bits and pattern buffer fields is -exactly the dual of @samp{^}'s; see the previous section. (That is, -``beginning'' becomes ``end'', ``next'' becomes ``previous'', and -``after'' becomes ``before''.) - - -@node GNU Operators, GNU Emacs Operators, Common Operators, Top -@chapter GNU Operators - -Following are operators that @sc{gnu} defines (and @sc{posix} doesn't). - -@menu -* Word Operators:: -* Buffer Operators:: -@end menu - -@node Word Operators, Buffer Operators, , GNU Operators -@section Word Operators - -The operators in this section require Regex to recognize parts of words. -Regex uses a syntax table to determine whether or not a character is -part of a word, i.e., whether or not it is @dfn{word-constituent}. - -@menu -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W -@end menu - -@node Non-Emacs Syntax Tables, Match-word-boundary Operator, , Word Operators -@subsection Non-Emacs Syntax Tables - -A @dfn{syntax table} is an array indexed by the characters in your -character set. In the @sc{ascii} encoding, therefore, a syntax table -has 256 elements. Regex always uses a @code{char *} variable -@code{re_syntax_table} as its syntax table. In some cases, it -initializes this variable and in others it expects you to initialize it. - -@itemize @bullet -@item -If Regex is compiled with the preprocessor symbols @code{emacs} and -@code{SYNTAX_TABLE} both undefined, then Regex allocates -@code{re_syntax_table} and initializes an element @var{i} either to -@code{Sword} (which it defines) if @var{i} is a letter, number, or -@samp{_}, or to zero if it's not. - -@item -If Regex is compiled with @code{emacs} undefined but @code{SYNTAX_TABLE} -defined, then Regex expects you to define a @code{char *} variable -@code{re_syntax_table} to be a valid syntax table. - -@item -@xref{Emacs Syntax Tables}, for what happens when Regex is compiled with -the preprocessor symbol @code{emacs} defined. - -@end itemize - -@node Match-word-boundary Operator, Match-within-word Operator, Non-Emacs Syntax Tables, Word Operators -@subsection The Match-word-boundary Operator (@code{\b}) - -@cindex @samp{\b} -@cindex word boundaries, matching - -This operator (represented by @samp{\b}) matches the empty string at -either the beginning or the end of a word. For example, @samp{\brat\b} -matches the separate word @samp{rat}. - -@node Match-within-word Operator, Match-beginning-of-word Operator, Match-word-boundary Operator, Word Operators -@subsection The Match-within-word Operator (@code{\B}) - -@cindex @samp{\B} - -This operator (represented by @samp{\B}) matches the empty string within -a word. For example, @samp{c\Brat\Be} matches @samp{crate}, but -@samp{dirty \Brat} doesn't match @samp{dirty rat}. - -@node Match-beginning-of-word Operator, Match-end-of-word Operator, Match-within-word Operator, Word Operators -@subsection The Match-beginning-of-word Operator (@code{\<}) - -@cindex @samp{\<} - -This operator (represented by @samp{\<}) matches the empty string at the -beginning of a word. - -@node Match-end-of-word Operator, Match-word-constituent Operator, Match-beginning-of-word Operator, Word Operators -@subsection The Match-end-of-word Operator (@code{\>}) - -@cindex @samp{\>} - -This operator (represented by @samp{\>}) matches the empty string at the -end of a word. - -@node Match-word-constituent Operator, Match-non-word-constituent Operator, Match-end-of-word Operator, Word Operators -@subsection The Match-word-constituent Operator (@code{\w}) - -@cindex @samp{\w} - -This operator (represented by @samp{\w}) matches any word-constituent -character. - -@node Match-non-word-constituent Operator, , Match-word-constituent Operator, Word Operators -@subsection The Match-non-word-constituent Operator (@code{\W}) - -@cindex @samp{\W} - -This operator (represented by @samp{\W}) matches any character that is -not word-constituent. - - -@node Buffer Operators, , Word Operators, GNU Operators -@section Buffer Operators - -Following are operators which work on buffers. In Emacs, a @dfn{buffer} -is, naturally, an Emacs buffer. For other programs, Regex considers the -entire string to be matched as the buffer. - -@menu -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' -@end menu - - -@node Match-beginning-of-buffer Operator, Match-end-of-buffer Operator, , Buffer Operators -@subsection The Match-beginning-of-buffer Operator (@code{\`}) - -@cindex @samp{\`} - -This operator (represented by @samp{\`}) matches the empty string at the -beginning of the buffer. - -@node Match-end-of-buffer Operator, , Match-beginning-of-buffer Operator, Buffer Operators -@subsection The Match-end-of-buffer Operator (@code{\'}) - -@cindex @samp{\'} - -This operator (represented by @samp{\'}) matches the empty string at the -end of the buffer. - - -@node GNU Emacs Operators, What Gets Matched?, GNU Operators, Top -@chapter GNU Emacs Operators - -Following are operators that @sc{gnu} defines (and @sc{posix} doesn't) -that you can use only when Regex is compiled with the preprocessor -symbol @code{emacs} defined. - -@menu -* Syntactic Class Operators:: -@end menu - - -@node Syntactic Class Operators, , , GNU Emacs Operators -@section Syntactic Class Operators - -The operators in this section require Regex to recognize the syntactic -classes of characters. Regex uses a syntax table to determine this. - -@menu -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS -@end menu - -@node Emacs Syntax Tables, Match-syntactic-class Operator, , Syntactic Class Operators -@subsection Emacs Syntax Tables - -A @dfn{syntax table} is an array indexed by the characters in your -character set. In the @sc{ascii} encoding, therefore, a syntax table -has 256 elements. - -If Regex is compiled with the preprocessor symbol @code{emacs} defined, -then Regex expects you to define and initialize the variable -@code{re_syntax_table} to be an Emacs syntax table. Emacs' syntax -tables are more complicated than Regex's own (@pxref{Non-Emacs Syntax -Tables}). @xref{Syntax, , Syntax, emacs, The GNU Emacs User's Manual}, -for a description of Emacs' syntax tables. - -@node Match-syntactic-class Operator, Match-not-syntactic-class Operator, Emacs Syntax Tables, Syntactic Class Operators -@subsection The Match-syntactic-class Operator (@code{\s}@var{class}) - -@cindex @samp{\s} - -This operator matches any character whose syntactic class is represented -by a specified character. @samp{\s@var{class}} represents this operator -where @var{class} is the character representing the syntactic class you -want. For example, @samp{w} represents the syntactic -class of word-constituent characters, so @samp{\sw} matches any -word-constituent character. - -@node Match-not-syntactic-class Operator, , Match-syntactic-class Operator, Syntactic Class Operators -@subsection The Match-not-syntactic-class Operator (@code{\S}@var{class}) - -@cindex @samp{\S} - -This operator is similar to the match-syntactic-class operator except -that it matches any character whose syntactic class is @emph{not} -represented by the specified character. @samp{\S@var{class}} represents -this operator. For example, @samp{w} represents the syntactic class of -word-constituent characters, so @samp{\Sw} matches any character that is -not word-constituent. - - -@node What Gets Matched?, Programming with Regex, GNU Emacs Operators, Top -@chapter What Gets Matched? - -Regex usually matches strings according to the ``leftmost longest'' -rule; that is, it chooses the longest of the leftmost matches. This -does not mean that for a regular expression containing subexpressions -that it simply chooses the longest match for each subexpression, left to -right; the overall match must also be the longest possible one. - -For example, @samp{(ac*)(c*d[ac]*)\1} matches @samp{acdacaaa}, not -@samp{acdac}, as it would if it were to choose the longest match for the -first subexpression. - - -@node Programming with Regex, Copying, What Gets Matched?, Top -@chapter Programming with Regex - -Here we describe how you use the Regex data structures and functions in -C programs. Regex has three interfaces: one designed for @sc{gnu}, one -compatible with @sc{posix} and one compatible with Berkeley @sc{unix}. - -@menu -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: -@end menu - - -@node GNU Regex Functions, POSIX Regex Functions, , Programming with Regex -@section GNU Regex Functions - -If you're writing code that doesn't need to be compatible with either -@sc{posix} or Berkeley @sc{unix}, you can use these functions. They -provide more options than the other interfaces. - -@menu -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () -@end menu - - -@node GNU Pattern Buffers, GNU Regular Expression Compiling, , GNU Regex Functions -@subsection GNU Pattern Buffers - -@cindex pattern buffer, definition of -@tindex re_pattern_buffer @r{definition} -@tindex struct re_pattern_buffer @r{definition} - -To compile, match, or search for a given regular expression, you must -supply a pattern buffer. A @dfn{pattern buffer} holds one compiled -regular expression.@footnote{Regular expressions are also referred to as -``patterns,'' hence the name ``pattern buffer.''} - -You can have several different pattern buffers simultaneously, each -holding a compiled pattern for a different regular expression. - -@file{regex.h} defines the pattern buffer @code{struct} as follows: - -@example - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -@end example - - -@node GNU Regular Expression Compiling, GNU Matching, GNU Pattern Buffers, GNU Regex Functions -@subsection GNU Regular Expression Compiling - -In @sc{gnu}, you can both match and search for a given regular -expression. To do either, you must first compile it in a pattern buffer -(@pxref{GNU Pattern Buffers}). - -@cindex syntax initialization -@vindex re_syntax_options @r{initialization} -Regular expressions match according to the syntax with which they were -compiled; with @sc{gnu}, you indicate what syntax you want by setting -the variable @code{re_syntax_options} (declared in @file{regex.h} and -defined in @file{regex.c}) before calling the compiling function, -@code{re_compile_pattern} (see below). @xref{Syntax Bits}, and -@ref{Predefined Syntaxes}. - -You can change the value of @code{re_syntax_options} at any time. -Usually, however, you set its value once and then never change it. - -@cindex pattern buffer initialization -@code{re_compile_pattern} takes a pattern buffer as an argument. You -must initialize the following fields: - -@table @code - -@item translate @r{initialization} - -@item translate -@vindex translate @r{initialization} -Initialize this to point to a translate table if you want one, or to -zero if you don't. We explain translate tables in @ref{GNU Translate -Tables}. - -@item fastmap -@vindex fastmap @r{initialization} -Initialize this to nonzero if you want a fastmap, or to zero if you -don't. - -@item buffer -@itemx allocated -@vindex buffer @r{initialization} -@vindex allocated @r{initialization} -@findex malloc -If you want @code{re_compile_pattern} to allocate memory for the -compiled pattern, set both of these to zero. If you have an existing -block of memory (allocated with @code{malloc}) you want Regex to use, -set @code{buffer} to its address and @code{allocated} to its size (in -bytes). - -@code{re_compile_pattern} uses @code{realloc} to extend the space for -the compiled pattern as necessary. - -@end table - -To compile a pattern buffer, use: - -@findex re_compile_pattern -@example -char * -re_compile_pattern (const char *@var{regex}, const int @var{regex_size}, - struct re_pattern_buffer *@var{pattern_buffer}) -@end example - -@noindent -@var{regex} is the regular expression's address, @var{regex_size} is its -length, and @var{pattern_buffer} is the pattern buffer's address. - -If @code{re_compile_pattern} successfully compiles the regular -expression, it returns zero and sets @code{*@var{pattern_buffer}} to the -compiled pattern. It sets the pattern buffer's fields as follows: - -@table @code -@item buffer -@vindex buffer @r{field, set by @code{re_compile_pattern}} -to the compiled pattern. - -@item used -@vindex used @r{field, set by @code{re_compile_pattern}} -to the number of bytes the compiled pattern in @code{buffer} occupies. - -@item syntax -@vindex syntax @r{field, set by @code{re_compile_pattern}} -to the current value of @code{re_syntax_options}. - -@item re_nsub -@vindex re_nsub @r{field, set by @code{re_compile_pattern}} -to the number of subexpressions in @var{regex}. - -@item fastmap_accurate -@vindex fastmap_accurate @r{field, set by @code{re_compile_pattern}} -to zero on the theory that the pattern you're compiling is different -than the one previously compiled into @code{buffer}; in that case (since -you can't make a fastmap without a compiled pattern), -@code{fastmap} would either contain an incompatible fastmap, or nothing -at all. - -@c xx what else? -@end table - -If @code{re_compile_pattern} can't compile @var{regex}, it returns an -error string corresponding to one of the errors listed in @ref{POSIX -Regular Expression Compiling}. - - -@node GNU Matching, GNU Searching, GNU Regular Expression Compiling, GNU Regex Functions -@subsection GNU Matching - -@cindex matching with GNU functions - -Matching the @sc{gnu} way means trying to match as much of a string as -possible starting at a position within it you specify. Once you've compiled -a pattern into a pattern buffer (@pxref{GNU Regular Expression -Compiling}), you can ask the matcher to match that pattern against a -string using: - -@findex re_match -@example -int -re_match (struct re_pattern_buffer *@var{pattern_buffer}, - const char *@var{string}, const int @var{size}, - const int @var{start}, struct re_registers *@var{regs}) -@end example - -@noindent -@var{pattern_buffer} is the address of a pattern buffer containing a -compiled pattern. @var{string} is the string you want to match; it can -contain newline and null characters. @var{size} is the length of that -string. @var{start} is the string index at which you want to -begin matching; the first character of @var{string} is at index zero. -@xref{Using Registers}, for a explanation of @var{regs}; you can safely -pass zero. - -@code{re_match} matches the regular expression in @var{pattern_buffer} -against the string @var{string} according to the syntax in -@var{pattern_buffers}'s @code{syntax} field. (@xref{GNU Regular -Expression Compiling}, for how to set it.) The function returns -@math{-1} if the compiled pattern does not match any part of -@var{string} and @math{-2} if an internal error happens; otherwise, it -returns how many (possibly zero) characters of @var{string} the pattern -matched. - -An example: suppose @var{pattern_buffer} points to a pattern buffer -containing the compiled pattern for @samp{a*}, and @var{string} points -to @samp{aaaaab} (whereupon @var{size} should be 6). Then if @var{start} -is 2, @code{re_match} returns 3, i.e., @samp{a*} would have matched the -last three @samp{a}s in @var{string}. If @var{start} is 0, -@code{re_match} returns 5, i.e., @samp{a*} would have matched all the -@samp{a}s in @var{string}. If @var{start} is either 5 or 6, it returns -zero. - -If @var{start} is not between zero and @var{size}, then -@code{re_match} returns @math{-1}. - - -@node GNU Searching, Matching/Searching with Split Data, GNU Matching, GNU Regex Functions -@subsection GNU Searching - -@cindex searching with GNU functions - -@dfn{Searching} means trying to match starting at successive positions -within a string. The function @code{re_search} does this. - -Before calling @code{re_search}, you must compile your regular -expression. @xref{GNU Regular Expression Compiling}. - -Here is the function declaration: - -@findex re_search -@example -int -re_search (struct re_pattern_buffer *@var{pattern_buffer}, - const char *@var{string}, const int @var{size}, - const int @var{start}, const int @var{range}, - struct re_registers *@var{regs}) -@end example - -@noindent -@vindex start @r{argument to @code{re_search}} -@vindex range @r{argument to @code{re_search}} -whose arguments are the same as those to @code{re_match} (@pxref{GNU -Matching}) except that the two arguments @var{start} and @var{range} -replace @code{re_match}'s argument @var{start}. - -If @var{range} is positive, then @code{re_search} attempts a match -starting first at index @var{start}, then at @math{@var{start} + 1} if -that fails, and so on, up to @math{@var{start} + @var{range}}; if -@var{range} is negative, then it attempts a match starting first at -index @var{start}, then at @math{@var{start} -1} if that fails, and so -on. - -If @var{start} is not between zero and @var{size}, then @code{re_search} -returns @math{-1}. When @var{range} is positive, @code{re_search} -adjusts @var{range} so that @math{@var{start} + @var{range} - 1} is -between zero and @var{size}, if necessary; that way it won't search -outside of @var{string}. Similarly, when @var{range} is negative, -@code{re_search} adjusts @var{range} so that @math{@var{start} + -@var{range} + 1} is between zero and @var{size}, if necessary. - -If the @code{fastmap} field of @var{pattern_buffer} is zero, -@code{re_search} matches starting at consecutive positions; otherwise, -it uses @code{fastmap} to make the search more efficient. -@xref{Searching with Fastmaps}. - -If no match is found, @code{re_search} returns @math{-1}. If -a match is found, it returns the index where the match began. If an -internal error happens, it returns @math{-2}. - - -@node Matching/Searching with Split Data, Searching with Fastmaps, GNU Searching, GNU Regex Functions -@subsection Matching and Searching with Split Data - -Using the functions @code{re_match_2} and @code{re_search_2}, you can -match or search in data that is divided into two strings. - -The function: - -@findex re_match_2 -@example -int -re_match_2 (struct re_pattern_buffer *@var{buffer}, - const char *@var{string1}, const int @var{size1}, - const char *@var{string2}, const int @var{size2}, - const int @var{start}, - struct re_registers *@var{regs}, - const int @var{stop}) -@end example - -@noindent -is similar to @code{re_match} (@pxref{GNU Matching}) except that you -pass @emph{two} data strings and sizes, and an index @var{stop} beyond -which you don't want the matcher to try matching. As with -@code{re_match}, if it succeeds, @code{re_match_2} returns how many -characters of @var{string} it matched. Regard @var{string1} and -@var{string2} as concatenated when you set the arguments @var{start} and -@var{stop} and use the contents of @var{regs}; @code{re_match_2} never -returns a value larger than @math{@var{size1} + @var{size2}}. - -The function: - -@findex re_search_2 -@example -int -re_search_2 (struct re_pattern_buffer *@var{buffer}, - const char *@var{string1}, const int @var{size1}, - const char *@var{string2}, const int @var{size2}, - const int @var{start}, const int @var{range}, - struct re_registers *@var{regs}, - const int @var{stop}) -@end example - -@noindent -is similarly related to @code{re_search}. - - -@node Searching with Fastmaps, GNU Translate Tables, Matching/Searching with Split Data, GNU Regex Functions -@subsection Searching with Fastmaps - -@cindex fastmaps -If you're searching through a long string, you should use a fastmap. -Without one, the searcher tries to match at consecutive positions in the -string. Generally, most of the characters in the string could not start -a match. It takes much longer to try matching at a given position in the -string than it does to check in a table whether or not the character at -that position could start a match. A @dfn{fastmap} is such a table. - -More specifically, a fastmap is an array indexed by the characters in -your character set. Under the @sc{ascii} encoding, therefore, a fastmap -has 256 elements. If you want the searcher to use a fastmap with a -given pattern buffer, you must allocate the array and assign the array's -address to the pattern buffer's @code{fastmap} field. You either can -compile the fastmap yourself or have @code{re_search} do it for you; -when @code{fastmap} is nonzero, it automatically compiles a fastmap the -first time you search using a particular compiled pattern. - -To compile a fastmap yourself, use: - -@findex re_compile_fastmap -@example -int -re_compile_fastmap (struct re_pattern_buffer *@var{pattern_buffer}) -@end example - -@noindent -@var{pattern_buffer} is the address of a pattern buffer. If the -character @var{c} could start a match for the pattern, -@code{re_compile_fastmap} makes -@code{@var{pattern_buffer}->fastmap[@var{c}]} nonzero. It returns -@math{0} if it can compile a fastmap and @math{-2} if there is an -internal error. For example, if @samp{|} is the alternation operator -and @var{pattern_buffer} holds the compiled pattern for @samp{a|b}, then -@code{re_compile_fastmap} sets @code{fastmap['a']} and -@code{fastmap['b']} (and no others). - -@code{re_search} uses a fastmap as it moves along in the string: it -checks the string's characters until it finds one that's in the fastmap. -Then it tries matching at that character. If the match fails, it -repeats the process. So, by using a fastmap, @code{re_search} doesn't -waste time trying to match at positions in the string that couldn't -start a match. - -If you don't want @code{re_search} to use a fastmap, -store zero in the @code{fastmap} field of the pattern buffer before -calling @code{re_search}. - -Once you've initialized a pattern buffer's @code{fastmap} field, you -need never do so again---even if you compile a new pattern in -it---provided the way the field is set still reflects whether or not you -want a fastmap. @code{re_search} will still either do nothing if -@code{fastmap} is null or, if it isn't, compile a new fastmap for the -new pattern. - -@node GNU Translate Tables, Using Registers, Searching with Fastmaps, GNU Regex Functions -@subsection GNU Translate Tables - -If you set the @code{translate} field of a pattern buffer to a translate -table, then the @sc{gnu} Regex functions to which you've passed that -pattern buffer use it to apply a simple transformation -to all the regular expression and string characters at which they look. - -A @dfn{translate table} is an array indexed by the characters in your -character set. Under the @sc{ascii} encoding, therefore, a translate -table has 256 elements. The array's elements are also characters in -your character set. When the Regex functions see a character @var{c}, -they use @code{translate[@var{c}]} in its place, with one exception: the -character after a @samp{\} is not translated. (This ensures that, the -operators, e.g., @samp{\B} and @samp{\b}, are always distinguishable.) - -For example, a table that maps all lowercase letters to the -corresponding uppercase ones would cause the matcher to ignore -differences in case.@footnote{A table that maps all uppercase letters to -the corresponding lowercase ones would work just as well for this -purpose.} Such a table would map all characters except lowercase letters -to themselves, and lowercase letters to the corresponding uppercase -ones. Under the @sc{ascii} encoding, here's how you could initialize -such a table (we'll call it @code{case_fold}): - -@example -for (i = 0; i < 256; i++) - case_fold[i] = i; -for (i = 'a'; i <= 'z'; i++) - case_fold[i] = i - ('a' - 'A'); -@end example - -You tell Regex to use a translate table on a given pattern buffer by -assigning that table's address to the @code{translate} field of that -buffer. If you don't want Regex to do any translation, put zero into -this field. You'll get weird results if you change the table's contents -anytime between compiling the pattern buffer, compiling its fastmap, and -matching or searching with the pattern buffer. - -@node Using Registers, Freeing GNU Pattern Buffers, GNU Translate Tables, GNU Regex Functions -@subsection Using Registers - -A group in a regular expression can match a (posssibly empty) substring -of the string that regular expression as a whole matched. The matcher -remembers the beginning and end of the substring matched by -each group. - -To find out what they matched, pass a nonzero @var{regs} argument to a -@sc{gnu} matching or searching function (@pxref{GNU Matching} and -@ref{GNU Searching}), i.e., the address of a structure of this type, as -defined in @file{regex.h}: - -@c We don't bother to include this directly from regex.h, -@c since it changes so rarely. -@example -@tindex re_registers -@vindex num_regs @r{in @code{struct re_registers}} -@vindex start @r{in @code{struct re_registers}} -@vindex end @r{in @code{struct re_registers}} -struct re_registers -@{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -@}; -@end example - -Except for (possibly) the @var{num_regs}'th element (see below), the -@var{i}th element of the @code{start} and @code{end} arrays records -information about the @var{i}th group in the pattern. (They're declared -as C pointers, but this is only because not all C compilers accept -zero-length arrays; conceptually, it is simplest to think of them as -arrays.) - -The @code{start} and @code{end} arrays are allocated in various ways, -depending on the value of the @code{regs_allocated} -@vindex regs_allocated -field in the pattern buffer passed to the matcher. - -The simplest and perhaps most useful is to let the matcher (re)allocate -enough space to record information for all the groups in the regular -expression. If @code{regs_allocated} is @code{REGS_UNALLOCATED}, -@vindex REGS_UNALLOCATED -the matcher allocates @math{1 + @var{re_nsub}} (another field in the -pattern buffer; @pxref{GNU Pattern Buffers}). The extra element is set -to @math{-1}, and sets @code{regs_allocated} to @code{REGS_REALLOCATE}. -@vindex REGS_REALLOCATE -Then on subsequent calls with the same pattern buffer and @var{regs} -arguments, the matcher reallocates more space if necessary. - -It would perhaps be more logical to make the @code{regs_allocated} field -part of the @code{re_registers} structure, instead of part of the -pattern buffer. But in that case the caller would be forced to -initialize the structure before passing it. Much existing code doesn't -do this initialization, and it's arguably better to avoid it anyway. - -@code{re_compile_pattern} sets @code{regs_allocated} to -@code{REGS_UNALLOCATED}, -so if you use the GNU regular expression -functions, you get this behavior by default. - -xx document re_set_registers - -@sc{posix}, on the other hand, requires a different interface: the -caller is supposed to pass in a fixed-length array which the matcher -fills. Therefore, if @code{regs_allocated} is @code{REGS_FIXED} -@vindex REGS_FIXED -the matcher simply fills that array. - -The following examples illustrate the information recorded in the -@code{re_registers} structure. (In all of them, @samp{(} represents the -open-group and @samp{)} the close-group operator. The first character -in the string @var{string} is at index 0.) - -@c xx i'm not sure this is all true anymore. - -@itemize @bullet - -@item -If the regular expression has an @w{@var{i}-th} -group not contained within another group that matches a -substring of @var{string}, then the function sets -@code{@w{@var{regs}->}start[@var{i}]} to the index in @var{string} where -the substring matched by the @w{@var{i}-th} group begins, and -@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that -substring's end. The function sets @code{@w{@var{regs}->}start[0]} and -@code{@w{@var{regs}->}end[0]} to analogous information about the entire -pattern. - -For example, when you match @samp{((a)(b))} against @samp{ab}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]} - -@item -0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]} - -@item -1 in @code{@w{@var{regs}->}start[3]} and 2 in @code{@w{@var{regs}->}end[3]} -@end itemize - -@item -If a group matches more than once (as it might if followed by, -e.g., a repetition operator), then the function reports the information -about what the group @emph{last} matched. - -For example, when you match the pattern @samp{(a)*} against the string -@samp{aa}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]} - -@item -1 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]} -@end itemize - -@item -If the @w{@var{i}-th} group does not participate in a -successful match, e.g., it is an alternative not taken or a -repetition operator allows zero repetitions of it, then the function -sets @code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}. - -For example, when you match the pattern @samp{(a)*b} against -the string @samp{b}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]} -@end itemize - -@item -If the @w{@var{i}-th} group matches a zero-length string, then the -function sets @code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that -zero-length string. - -For example, when you match the pattern @samp{(a*)b} against the string -@samp{b}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]} -@end itemize - -@ignore -The function sets @code{@w{@var{regs}->}start[0]} and -@code{@w{@var{regs}->}end[0]} to analogous information about the entire -pattern. - -For example, when you match the pattern @samp{(a*)} against the empty -string, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 0 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]} -@end itemize -@end ignore - -@item -If an @w{@var{i}-th} group contains a @w{@var{j}-th} group -in turn not contained within any other group within group @var{i} and -the function reports a match of the @w{@var{i}-th} group, then it -records in @code{@w{@var{regs}->}start[@var{j}]} and -@code{@w{@var{regs}->}end[@var{j}]} the last match (if it matched) of -the @w{@var{j}-th} group. - -For example, when you match the pattern @samp{((a*)b)*} against the -string @samp{abb}, @w{group 2} last matches the empty string, so you -get what it previously matched: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]} - -@item -2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]} - -@item -2 in @code{@w{@var{regs}->}start[2]} and 2 in @code{@w{@var{regs}->}end[2]} -@end itemize - -When you match the pattern @samp{((a)*b)*} against the string -@samp{abb}, @w{group 2} doesn't participate in the last match, so you -get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]} - -@item -2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]} - -@item -0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]} -@end itemize - -@item -If an @w{@var{i}-th} group contains a @w{@var{j}-th} group -in turn not contained within any other group within group @var{i} -and the function sets -@code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}, then it also sets -@code{@w{@var{regs}->}start[@var{j}]} and -@code{@w{@var{regs}->}end[@var{j}]} to @math{-1}. - -For example, when you match the pattern @samp{((a)*b)*c} against the -string @samp{c}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[2]} and @math{-1} in @code{@w{@var{regs}->}end[2]} -@end itemize - -@end itemize - -@node Freeing GNU Pattern Buffers, , Using Registers, GNU Regex Functions -@subsection Freeing GNU Pattern Buffers - -To free any allocated fields of a pattern buffer, you can use the -@sc{posix} function described in @ref{Freeing POSIX Pattern Buffers}, -since the type @code{regex_t}---the type for @sc{posix} pattern -buffers---is equivalent to the type @code{re_pattern_buffer}. After -freeing a pattern buffer, you need to again compile a regular expression -in it (@pxref{GNU Regular Expression Compiling}) before passing it to -a matching or searching function. - - -@node POSIX Regex Functions, BSD Regex Functions, GNU Regex Functions, Programming with Regex -@section POSIX Regex Functions - -If you're writing code that has to be @sc{posix} compatible, you'll need -to use these functions. Their interfaces are as specified by @sc{posix}, -draft 1003.2/D11.2. - -@menu -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () -@end menu - - -@node POSIX Pattern Buffers, POSIX Regular Expression Compiling, , POSIX Regex Functions -@subsection POSIX Pattern Buffers - -To compile or match a given regular expression the @sc{posix} way, you -must supply a pattern buffer exactly the way you do for @sc{gnu} -(@pxref{GNU Pattern Buffers}). @sc{posix} pattern buffers have type -@code{regex_t}, which is equivalent to the @sc{gnu} pattern buffer -type @code{re_pattern_buffer}. - - -@node POSIX Regular Expression Compiling, POSIX Matching, POSIX Pattern Buffers, POSIX Regex Functions -@subsection POSIX Regular Expression Compiling - -With @sc{posix}, you can only search for a given regular expression; you -can't match it. To do this, you must first compile it in a -pattern buffer, using @code{regcomp}. - -@ignore -Before calling @code{regcomp}, you must initialize this pattern buffer -as you do for @sc{gnu} (@pxref{GNU Regular Expression Compiling}). See -below, however, for how to choose a syntax with which to compile. -@end ignore - -To compile a pattern buffer, use: - -@findex regcomp -@example -int -regcomp (regex_t *@var{preg}, const char *@var{regex}, int @var{cflags}) -@end example - -@noindent -@var{preg} is the initialized pattern buffer's address, @var{regex} is -the regular expression's address, and @var{cflags} is the compilation -flags, which Regex considers as a collection of bits. Here are the -valid bits, as defined in @file{regex.h}: - -@table @code - -@item REG_EXTENDED -@vindex REG_EXTENDED -says to use @sc{posix} Extended Regular Expression syntax; if this isn't -set, then says to use @sc{posix} Basic Regular Expression syntax. -@code{regcomp} sets @var{preg}'s @code{syntax} field accordingly. - -@item REG_ICASE -@vindex REG_ICASE -@cindex ignoring case -says to ignore case; @code{regcomp} sets @var{preg}'s @code{translate} -field to a translate table which ignores case, replacing anything you've -put there before. - -@item REG_NOSUB -@vindex REG_NOSUB -says to set @var{preg}'s @code{no_sub} field; @pxref{POSIX Matching}, -for what this means. - -@item REG_NEWLINE -@vindex REG_NEWLINE -says that a: - -@itemize @bullet - -@item -match-any-character operator (@pxref{Match-any-character -Operator}) doesn't match a newline. - -@item -nonmatching list not containing a newline (@pxref{List -Operators}) matches a newline. - -@item -match-beginning-of-line operator (@pxref{Match-beginning-of-line -Operator}) matches the empty string immediately after a newline, -regardless of how @code{REG_NOTBOL} is set (@pxref{POSIX Matching}, for -an explanation of @code{REG_NOTBOL}). - -@item -match-end-of-line operator (@pxref{Match-beginning-of-line -Operator}) matches the empty string immediately before a newline, -regardless of how @code{REG_NOTEOL} is set (@pxref{POSIX Matching}, -for an explanation of @code{REG_NOTEOL}). - -@end itemize - -@end table - -If @code{regcomp} successfully compiles the regular expression, it -returns zero and sets @code{*@var{pattern_buffer}} to the compiled -pattern. Except for @code{syntax} (which it sets as explained above), it -also sets the same fields the same way as does the @sc{gnu} compiling -function (@pxref{GNU Regular Expression Compiling}). - -If @code{regcomp} can't compile the regular expression, it returns one -of the error codes listed here. (Except when noted differently, the -syntax of in all examples below is basic regular expression syntax.) - -@table @code - -@comment repetitions -@item REG_BADRPT -For example, the consecutive repetition operators @samp{**} in -@samp{a**} are invalid. As another example, if the syntax is extended -regular expression syntax, then the repetition operator @samp{*} with -nothing on which to operate in @samp{*} is invalid. - -@item REG_BADBR -For example, the @var{count} @samp{-1} in @samp{a\@{-1} is invalid. - -@item REG_EBRACE -For example, @samp{a\@{1} is missing a close-interval operator. - -@comment lists -@item REG_EBRACK -For example, @samp{[a} is missing a close-list operator. - -@item REG_ERANGE -For example, the range ending point @samp{z} that collates lower than -does its starting point @samp{a} in @samp{[z-a]} is invalid. Also, the -range with the character class @samp{[:alpha:]} as its starting point in -@samp{[[:alpha:]-|]}. - -@item REG_ECTYPE -For example, the character class name @samp{foo} in @samp{[[:foo:]} is -invalid. - -@comment groups -@item REG_EPAREN -For example, @samp{a\)} is missing an open-group operator and @samp{\(a} -is missing a close-group operator. - -@item REG_ESUBREG -For example, the back reference @samp{\2} that refers to a nonexistent -subexpression in @samp{\(a\)\2} is invalid. - -@comment unfinished business - -@item REG_EEND -Returned when a regular expression causes no other more specific error. - -@item REG_EESCAPE -For example, the trailing backslash @samp{\} in @samp{a\} is invalid, as is the -one in @samp{\}. - -@comment kitchen sink -@item REG_BADPAT -For example, in the extended regular expression syntax, the empty group -@samp{()} in @samp{a()b} is invalid. - -@comment internal -@item REG_ESIZE -Returned when a regular expression needs a pattern buffer larger than -65536 bytes. - -@item REG_ESPACE -Returned when a regular expression makes Regex to run out of memory. - -@end table - - -@node POSIX Matching, Reporting Errors, POSIX Regular Expression Compiling, POSIX Regex Functions -@subsection POSIX Matching - -Matching the @sc{posix} way means trying to match a null-terminated -string starting at its first character. Once you've compiled a pattern -into a pattern buffer (@pxref{POSIX Regular Expression Compiling}), you -can ask the matcher to match that pattern against a string using: - -@findex regexec -@example -int -regexec (const regex_t *@var{preg}, const char *@var{string}, - size_t @var{nmatch}, regmatch_t @var{pmatch}[], int @var{eflags}) -@end example - -@noindent -@var{preg} is the address of a pattern buffer for a compiled pattern. -@var{string} is the string you want to match. - -@xref{Using Byte Offsets}, for an explanation of @var{pmatch}. If you -pass zero for @var{nmatch} or you compiled @var{preg} with the -compilation flag @code{REG_NOSUB} set, then @code{regexec} will ignore -@var{pmatch}; otherwise, you must allocate it to have at least -@var{nmatch} elements. @code{regexec} will record @var{nmatch} byte -offsets in @var{pmatch}, and set to @math{-1} any unused elements up to -@math{@var{pmatch}@code{[@var{nmatch}]} - 1}. - -@var{eflags} specifies @dfn{execution flags}---namely, the two bits -@code{REG_NOTBOL} and @code{REG_NOTEOL} (defined in @file{regex.h}). If -you set @code{REG_NOTBOL}, then the match-beginning-of-line operator -(@pxref{Match-beginning-of-line Operator}) always fails to match. -This lets you match against pieces of a line, as you would need to if, -say, searching for repeated instances of a given pattern in a line; it -would work correctly for patterns both with and without -match-beginning-of-line operators. @code{REG_NOTEOL} works analogously -for the match-end-of-line operator (@pxref{Match-end-of-line -Operator}); it exists for symmetry. - -@code{regexec} tries to find a match for @var{preg} in @var{string} -according to the syntax in @var{preg}'s @code{syntax} field. -(@xref{POSIX Regular Expression Compiling}, for how to set it.) The -function returns zero if the compiled pattern matches @var{string} and -@code{REG_NOMATCH} (defined in @file{regex.h}) if it doesn't. - -@node Reporting Errors, Using Byte Offsets, POSIX Matching, POSIX Regex Functions -@subsection Reporting Errors - -If either @code{regcomp} or @code{regexec} fail, they return a nonzero -error code, the possibilities for which are defined in @file{regex.h}. -@xref{POSIX Regular Expression Compiling}, and @ref{POSIX Matching}, for -what these codes mean. To get an error string corresponding to these -codes, you can use: - -@findex regerror -@example -size_t -regerror (int @var{errcode}, - const regex_t *@var{preg}, - char *@var{errbuf}, - size_t @var{errbuf_size}) -@end example - -@noindent -@var{errcode} is an error code, @var{preg} is the address of the pattern -buffer which provoked the error, @var{errbuf} is the error buffer, and -@var{errbuf_size} is @var{errbuf}'s size. - -@code{regerror} returns the size in bytes of the error string -corresponding to @var{errcode} (including its terminating null). If -@var{errbuf} and @var{errbuf_size} are nonzero, it also returns in -@var{errbuf} the first @math{@var{errbuf_size} - 1} characters of the -error string, followed by a null. -@var{errbuf_size} must be a nonnegative number less than or equal to the -size in bytes of @var{errbuf}. - -You can call @code{regerror} with a null @var{errbuf} and a zero -@var{errbuf_size} to determine how large @var{errbuf} need be to -accommodate @code{regerror}'s error string. - -@node Using Byte Offsets, Freeing POSIX Pattern Buffers, Reporting Errors, POSIX Regex Functions -@subsection Using Byte Offsets - -In @sc{posix}, variables of type @code{regmatch_t} hold analogous -information, but are not identical to, @sc{gnu}'s registers (@pxref{Using -Registers}). To get information about registers in @sc{posix}, pass to -@code{regexec} a nonzero @var{pmatch} of type @code{regmatch_t}, i.e., -the address of a structure of this type, defined in -@file{regex.h}: - -@tindex regmatch_t -@example -typedef struct -@{ - regoff_t rm_so; - regoff_t rm_eo; -@} regmatch_t; -@end example - -When reading in @ref{Using Registers}, about how the matching function -stores the information into the registers, substitute @var{pmatch} for -@var{regs}, @code{@w{@var{pmatch}[@var{i}]->}rm_so} for -@code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{pmatch}[@var{i}]->}rm_eo} for -@code{@w{@var{regs}->}end[@var{i}]}. - -@node Freeing POSIX Pattern Buffers, , Using Byte Offsets, POSIX Regex Functions -@subsection Freeing POSIX Pattern Buffers - -To free any allocated fields of a pattern buffer, use: - -@findex regfree -@example -void -regfree (regex_t *@var{preg}) -@end example - -@noindent -@var{preg} is the pattern buffer whose allocated fields you want freed. -@code{regfree} also sets @var{preg}'s @code{allocated} and @code{used} -fields to zero. After freeing a pattern buffer, you need to again -compile a regular expression in it (@pxref{POSIX Regular Expression -Compiling}) before passing it to the matching function (@pxref{POSIX -Matching}). - - -@node BSD Regex Functions, , POSIX Regex Functions, Programming with Regex -@section BSD Regex Functions - -If you're writing code that has to be Berkeley @sc{unix} compatible, -you'll need to use these functions whose interfaces are the same as those -in Berkeley @sc{unix}. - -@menu -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () -@end menu - -@node BSD Regular Expression Compiling, BSD Searching, , BSD Regex Functions -@subsection BSD Regular Expression Compiling - -With Berkeley @sc{unix}, you can only search for a given regular -expression; you can't match one. To search for it, you must first -compile it. Before you compile it, you must indicate the regular -expression syntax you want it compiled according to by setting the -variable @code{re_syntax_options} (declared in @file{regex.h} to some -syntax (@pxref{Regular Expression Syntax}). - -To compile a regular expression use: - -@findex re_comp -@example -char * -re_comp (char *@var{regex}) -@end example - -@noindent -@var{regex} is the address of a null-terminated regular expression. -@code{re_comp} uses an internal pattern buffer, so you can use only the -most recently compiled pattern buffer. This means that if you want to -use a given regular expression that you've already compiled---but it -isn't the latest one you've compiled---you'll have to recompile it. If -you call @code{re_comp} with the null string (@emph{not} the empty -string) as the argument, it doesn't change the contents of the pattern -buffer. - -If @code{re_comp} successfully compiles the regular expression, it -returns zero. If it can't compile the regular expression, it returns -an error string. @code{re_comp}'s error messages are identical to those -of @code{re_compile_pattern} (@pxref{GNU Regular Expression -Compiling}). - -@node BSD Searching, , BSD Regular Expression Compiling, BSD Regex Functions -@subsection BSD Searching - -Searching the Berkeley @sc{unix} way means searching in a string -starting at its first character and trying successive positions within -it to find a match. Once you've compiled a pattern using @code{re_comp} -(@pxref{BSD Regular Expression Compiling}), you can ask Regex -to search for that pattern in a string using: - -@findex re_exec -@example -int -re_exec (char *@var{string}) -@end example - -@noindent -@var{string} is the address of the null-terminated string in which you -want to search. - -@code{re_exec} returns either 1 for success or 0 for failure. It -automatically uses a @sc{gnu} fastmap (@pxref{Searching with Fastmaps}). - - -@node Copying, Index, Programming with Regex, Top -@appendix GNU GENERAL PUBLIC LICENSE -@center Version 2, June 1991 - -@display -Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc. -675 Mass Ave, Cambridge, MA 02139, USA - -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. -@end display - -@unnumberedsec Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software---to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - -@iftex -@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -@end iftex -@ifinfo -@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -@end ifinfo - -@enumerate -@item -This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The ``Program'', below, -refers to any such program or work, and a ``work based on the Program'' -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term ``modification''.) Each licensee is addressed as ``you''. - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - -@item -You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - -@item -You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - -@enumerate a -@item -You must cause the modified files to carry prominent notices -stating that you changed the files and the date of any change. - -@item -You must cause any work that you distribute or publish, that in -whole or in part contains or is derived from the Program or any -part thereof, to be licensed as a whole at no charge to all third -parties under the terms of this License. - -@item -If the modified program normally reads commands interactively -when run, you must cause it, when started running for such -interactive use in the most ordinary way, to print or display an -announcement including an appropriate copyright notice and a -notice that there is no warranty (or else, saying that you provide -a warranty) and that users may redistribute the program under -these conditions, and telling the user how to view a copy of this -License. (Exception: if the Program itself is interactive but -does not normally print such an announcement, your work based on -the Program is not required to print an announcement.) -@end enumerate - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - -@item -You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - -@enumerate a -@item -Accompany it with the complete corresponding machine-readable -source code, which must be distributed under the terms of Sections -1 and 2 above on a medium customarily used for software interchange; or, - -@item -Accompany it with a written offer, valid for at least three -years, to give any third party, for a charge no more than your -cost of physically performing source distribution, a complete -machine-readable copy of the corresponding source code, to be -distributed under the terms of Sections 1 and 2 above on a medium -customarily used for software interchange; or, - -@item -Accompany it with the information you received as to the offer -to distribute corresponding source code. (This alternative is -allowed only for noncommercial distribution and only if you -received the program in object code or executable form with such -an offer, in accord with Subsection b above.) -@end enumerate - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - -@item -You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - -@item -You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - -@item -Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - -@item -If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - -@item -If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - -@item -The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and ``any -later version'', you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - -@item -If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - -@iftex -@heading NO WARRANTY -@end iftex -@ifinfo -@center NO WARRANTY -@end ifinfo - -@item -BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - -@item -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. -@end enumerate - -@iftex -@heading END OF TERMS AND CONDITIONS -@end iftex -@ifinfo -@center END OF TERMS AND CONDITIONS -@end ifinfo - -@page -@unnumberedsec Appendix: How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the ``copyright'' line and a pointer to where the full notice is found. - -@smallexample -@var{one line to give the program's name and a brief idea of what it does.} -Copyright (C) 19@var{yy} @var{name of author} - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -@end smallexample - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - -@smallexample -Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author} -Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. -This is free software, and you are welcome to redistribute it -under certain conditions; type `show c' for details. -@end smallexample - -The hypothetical commands @samp{show w} and @samp{show c} should show -the appropriate parts of the General Public License. Of course, the -commands you use may be called something other than @samp{show w} and -@samp{show c}; they could even be mouse-clicks or menu items---whatever -suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a ``copyright disclaimer'' for the program, if -necessary. Here is a sample; alter the names: - -@example -Yoyodyne, Inc., hereby disclaims all copyright interest in the program -`Gnomovision' (which makes passes at compilers) written by James Hacker. - -@var{signature of Ty Coon}, 1 April 1989 -Ty Coon, President of Vice -@end example - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. - - -@node Index, , Copying, Top -@unnumbered Index - -@printindex cp - -@contents - -@bye diff --git a/gnu/libregex/doc/texinfo.tex b/gnu/libregex/doc/texinfo.tex deleted file mode 100644 index d10917e237db..000000000000 --- a/gnu/libregex/doc/texinfo.tex +++ /dev/null @@ -1,3941 +0,0 @@ -%% TeX macros to handle texinfo files - -% Copyright (C) 1985, 86, 88, 90, 91, 92, 1993 Free Software Foundation, Inc. - -%This texinfo.tex file is free software; you can redistribute it and/or -%modify it under the terms of the GNU General Public License as -%published by the Free Software Foundation; either version 2, or (at -%your option) any later version. - -%This texinfo.tex file is distributed in the hope that it will be -%useful, but WITHOUT ANY WARRANTY; without even the implied warranty -%of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -%General Public License for more details. - -%You should have received a copy of the GNU General Public License -%along with this texinfo.tex file; see the file COPYING. If not, write -%to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, -%USA. - - -%In other words, you are welcome to use, share and improve this program. -%You are forbidden to forbid anyone else to use, share and improve -%what you give them. Help stamp out software-hoarding! - -\def\texinfoversion{2.104} -\message{Loading texinfo package [Version \texinfoversion]:} -\message{} - -% Print the version number if in a .fmt file. -\everyjob{\message{[Texinfo version \texinfoversion]}\message{}} - -% Save some parts of plain tex whose names we will redefine. - -\let\ptexlbrace=\{ -\let\ptexrbrace=\} -\let\ptexdots=\dots -\let\ptexdot=\. -\let\ptexstar=\* -\let\ptexend=\end -\let\ptexbullet=\bullet -\let\ptexb=\b -\let\ptexc=\c -\let\ptexi=\i -\let\ptext=\t -\let\ptexl=\l -\let\ptexL=\L - -\def\tie{\penalty 10000\ } % Save plain tex definition of ~. - -\message{Basics,} -\chardef\other=12 - -% If this character appears in an error message or help string, it -% starts a new line in the output. -\newlinechar = `^^J - -% Ignore a token. -% -\def\gobble#1{} - -\hyphenation{ap-pen-dix} -\hyphenation{mini-buf-fer mini-buf-fers} -\hyphenation{eshell} - -% Margin to add to right of even pages, to left of odd pages. -\newdimen \bindingoffset \bindingoffset=0pt -\newdimen \normaloffset \normaloffset=\hoffset -\newdimen\pagewidth \newdimen\pageheight -\pagewidth=\hsize \pageheight=\vsize - -% Sometimes it is convenient to have everything in the transcript file -% and nothing on the terminal. We don't just call \tracingall here, -% since that produces some useless output on the terminal. -% -\def\gloggingall{\begingroup \globaldefs = 1 \loggingall \endgroup}% -\def\loggingall{\tracingcommands2 \tracingstats2 - \tracingpages1 \tracingoutput1 \tracinglostchars1 - \tracingmacros2 \tracingparagraphs1 \tracingrestores1 - \showboxbreadth\maxdimen\showboxdepth\maxdimen -}% - -%---------------------Begin change----------------------- -% -%%%% For @cropmarks command. -% Dimensions to add cropmarks at corners Added by P. A. MacKay, 12 Nov. 1986 -% -\newdimen\cornerlong \newdimen\cornerthick -\newdimen \topandbottommargin -\newdimen \outerhsize \newdimen \outervsize -\cornerlong=1pc\cornerthick=.3pt % These set size of cropmarks -\outerhsize=7in -%\outervsize=9.5in -% Alternative @smallbook page size is 9.25in -\outervsize=9.25in -\topandbottommargin=.75in -% -%---------------------End change----------------------- - -% \onepageout takes a vbox as an argument. Note that \pagecontents -% does insertions itself, but you have to call it yourself. -\chardef\PAGE=255 \output={\onepageout{\pagecontents\PAGE}} -\def\onepageout#1{\hoffset=\normaloffset -\ifodd\pageno \advance\hoffset by \bindingoffset -\else \advance\hoffset by -\bindingoffset\fi -{\escapechar=`\\\relax % makes sure backslash is used in output files. -\shipout\vbox{{\let\hsize=\pagewidth \makeheadline} \pagebody{#1}% -{\let\hsize=\pagewidth \makefootline}}}% -\advancepageno \ifnum\outputpenalty>-20000 \else\dosupereject\fi} - -%%%% For @cropmarks command %%%% - -% Here is a modification of the main output routine for Near East Publications -% This provides right-angle cropmarks at all four corners. -% The contents of the page are centerlined into the cropmarks, -% and any desired binding offset is added as an \hskip on either -% site of the centerlined box. (P. A. MacKay, 12 November, 1986) -% -\def\croppageout#1{\hoffset=0pt % make sure this doesn't mess things up -{\escapechar=`\\\relax % makes sure backslash is used in output files. - \shipout - \vbox to \outervsize{\hsize=\outerhsize - \vbox{\line{\ewtop\hfill\ewtop}} - \nointerlineskip - \line{\vbox{\moveleft\cornerthick\nstop} - \hfill - \vbox{\moveright\cornerthick\nstop}} - \vskip \topandbottommargin - \centerline{\ifodd\pageno\hskip\bindingoffset\fi - \vbox{ - {\let\hsize=\pagewidth \makeheadline} - \pagebody{#1} - {\let\hsize=\pagewidth \makefootline}} - \ifodd\pageno\else\hskip\bindingoffset\fi} - \vskip \topandbottommargin plus1fill minus1fill - \boxmaxdepth\cornerthick - \line{\vbox{\moveleft\cornerthick\nsbot} - \hfill - \vbox{\moveright\cornerthick\nsbot}} - \nointerlineskip - \vbox{\line{\ewbot\hfill\ewbot}} - }} - \advancepageno - \ifnum\outputpenalty>-20000 \else\dosupereject\fi} -% -% Do @cropmarks to get crop marks -\def\cropmarks{\let\onepageout=\croppageout } - -\def\pagebody#1{\vbox to\pageheight{\boxmaxdepth=\maxdepth #1}} -{\catcode`\@ =11 -\gdef\pagecontents#1{\ifvoid\topins\else\unvbox\topins\fi -\dimen@=\dp#1 \unvbox#1 -\ifvoid\footins\else\vskip\skip\footins\footnoterule \unvbox\footins\fi -\ifr@ggedbottom \kern-\dimen@ \vfil \fi} -} - -% -% Here are the rules for the cropmarks. Note that they are -% offset so that the space between them is truly \outerhsize or \outervsize -% (P. A. MacKay, 12 November, 1986) -% -\def\ewtop{\vrule height\cornerthick depth0pt width\cornerlong} -\def\nstop{\vbox - {\hrule height\cornerthick depth\cornerlong width\cornerthick}} -\def\ewbot{\vrule height0pt depth\cornerthick width\cornerlong} -\def\nsbot{\vbox - {\hrule height\cornerlong depth\cornerthick width\cornerthick}} - -% Parse an argument, then pass it to #1. The argument is the rest of -% the input line (except we remove a trailing comment). #1 should be a -% macro which expects an ordinary undelimited TeX argument. -% -\def\parsearg#1{% - \let\next = #1% - \begingroup - \obeylines - \futurelet\temp\parseargx -} - -% If the next token is an obeyed space (from an @example environment or -% the like), remove it and recurse. Otherwise, we're done. -\def\parseargx{% - % \obeyedspace is defined far below, after the definition of \sepspaces. - \ifx\obeyedspace\temp - \expandafter\parseargdiscardspace - \else - \expandafter\parseargline - \fi -} - -% Remove a single space (as the delimiter token to the macro call). -{\obeyspaces % - \gdef\parseargdiscardspace {\futurelet\temp\parseargx}} - -{\obeylines % - \gdef\parseargline#1^^M{% - \endgroup % End of the group started in \parsearg. - % - % First remove any @c comment, then any @comment. - % Result of each macro is put in \toks0. - \argremovec #1\c\relax % - \expandafter\argremovecomment \the\toks0 \comment\relax % - % - % Call the caller's macro, saved as \next in \parsearg. - \expandafter\next\expandafter{\the\toks0}% - }% -} - -% Since all \c{,omment} does is throw away the argument, we can let TeX -% do that for us. The \relax here is matched by the \relax in the call -% in \parseargline; it could be more or less anything, its purpose is -% just to delimit the argument to the \c. -\def\argremovec#1\c#2\relax{\toks0 = {#1}} -\def\argremovecomment#1\comment#2\relax{\toks0 = {#1}} - -% \argremovec{,omment} might leave us with trailing spaces, though; e.g., -% @end itemize @c foo -% will have two active spaces as part of the argument with the -% `itemize'. Here we remove all active spaces from #1, and assign the -% result to \toks0. -% -% This loses if there are any *other* active characters besides spaces -% in the argument -- _ ^ +, for example -- since they get expanded. -% Fortunately, Texinfo does not define any such commands. (If it ever -% does, the catcode of the characters in questionwill have to be changed -% here.) But this means we cannot call \removeactivespaces as part of -% \argremovec{,omment}, since @c uses \parsearg, and thus the argument -% that \parsearg gets might well have any character at all in it. -% -\def\removeactivespaces#1{% - \begingroup - \ignoreactivespaces - \edef\temp{#1}% - \global\toks0 = \expandafter{\temp}% - \endgroup -} - -% Change the active space to expand to nothing. -% -\begingroup - \obeyspaces - \gdef\ignoreactivespaces{\obeyspaces\let =\empty} -\endgroup - - -\def\flushcr{\ifx\par\lisppar \def\next##1{}\else \let\next=\relax \fi \next} - -%% These are used to keep @begin/@end levels from running away -%% Call \inENV within environments (after a \begingroup) -\newif\ifENV \ENVfalse \def\inENV{\ifENV\relax\else\ENVtrue\fi} -\def\ENVcheck{% -\ifENV\errmessage{Still within an environment. Type Return to continue.} -\endgroup\fi} % This is not perfect, but it should reduce lossage - -% @begin foo is the same as @foo, for now. -\newhelp\EMsimple{Type <Return> to continue.} - -\outer\def\begin{\parsearg\beginxxx} - -\def\beginxxx #1{% -\expandafter\ifx\csname #1\endcsname\relax -{\errhelp=\EMsimple \errmessage{Undefined command @begin #1}}\else -\csname #1\endcsname\fi} - -% @end foo executes the definition of \Efoo. -% -\def\end{\parsearg\endxxx} -\def\endxxx #1{% - \removeactivespaces{#1}% - \edef\endthing{\the\toks0}% - % - \expandafter\ifx\csname E\endthing\endcsname\relax - \expandafter\ifx\csname \endthing\endcsname\relax - % There's no \foo, i.e., no ``environment'' foo. - \errhelp = \EMsimple - \errmessage{Undefined command `@end \endthing'}% - \else - \unmatchedenderror\endthing - \fi - \else - % Everything's ok; the right environment has been started. - \csname E\endthing\endcsname - \fi -} - -% There is an environment #1, but it hasn't been started. Give an error. -% -\def\unmatchedenderror#1{% - \errhelp = \EMsimple - \errmessage{This `@end #1' doesn't have a matching `@#1'}% -} - -% Define the control sequence \E#1 to give an unmatched @end error. -% -\def\defineunmatchedend#1{% - \expandafter\def\csname E#1\endcsname{\unmatchedenderror{#1}}% -} - - -% Single-spacing is done by various environments. - -\newskip\singlespaceskip \singlespaceskip = \baselineskip -\def\singlespace{% -{\advance \baselineskip by -\singlespaceskip -\kern \baselineskip}% -\baselineskip=\singlespaceskip -} - -%% Simple single-character @ commands - -% @@ prints an @ -% Kludge this until the fonts are right (grr). -\def\@{{\tt \char '100}} - -% This is turned off because it was never documented -% and you can use @w{...} around a quote to suppress ligatures. -%% Define @` and @' to be the same as ` and ' -%% but suppressing ligatures. -%\def\`{{`}} -%\def\'{{'}} - -% Used to generate quoted braces. - -\def\mylbrace {{\tt \char '173}} -\def\myrbrace {{\tt \char '175}} -\let\{=\mylbrace -\let\}=\myrbrace - -% @: forces normal size whitespace following. -\def\:{\spacefactor=1000 } - -% @* forces a line break. -\def\*{\hfil\break\hbox{}\ignorespaces} - -% @. is an end-of-sentence period. -\def\.{.\spacefactor=3000 } - -% @w prevents a word break. Without the \leavevmode, @w at the -% beginning of a paragraph, when TeX is still in vertical mode, would -% produce a whole line of output instead of starting the paragraph. -\def\w#1{\leavevmode\hbox{#1}} - -% @group ... @end group forces ... to be all on one page, by enclosing -% it in a TeX vbox. We use \vtop instead of \vbox to construct the box -% to keep its height that of a normal line. According to the rules for -% \topskip (p.114 of the TeXbook), the glue inserted is -% max (\topskip - \ht (first item), 0). If that height is large, -% therefore, no glue is inserted, and the space between the headline and -% the text is small, which looks bad. -% -\def\group{\begingroup - \ifnum\catcode13=\active \else - \errhelp = \groupinvalidhelp - \errmessage{@group invalid in context where filling is enabled}% - \fi - % - % The \vtop we start below produces a box with normal height and large - % depth; thus, TeX puts \baselineskip glue before it, and (when the - % next line of text is done) \lineskip glue after it. (See p.82 of - % the TeXbook.) But the next line of text also gets us \parskip glue. - % Final result: space below is slightly more than space above. - \def\Egroup{% - \egroup % End the \vtop. - \endgroup % End the \group. - }% - % - \vtop\bgroup - % We have to put a strut on the last line in case the @group is in - % the midst of an example, rather than completely enclosing it. - % Otherwise, the interline space between the last line of the group - % and the first line afterwards is too small. But we can't put the - % strut in \Egroup, since there it would be on a line by itself. - % Hence this just inserts a strut at the beginning of each line. - \everypar = {\strut}% - % - % We do @comment here in case we are called inside an environment, - % such as @example, where each end-of-line in the input causes an - % end-of-line in the output. We don't want the end-of-line after - % the `@group' to put extra space in the output. Since @group - % should appear on a line by itself (according to the Texinfo - % manual), we don't worry about eating any user text. - \comment -} -% -% TeX puts in an \escapechar (i.e., `@') at the beginning of the help -% message, so this ends up printing `@group can only ...'. -% -\newhelp\groupinvalidhelp{% -group can only be used in environments such as @example,^^J% -where each line of input produces a line of output.} - -% @need space-in-mils -% forces a page break if there is not space-in-mils remaining. - -\newdimen\mil \mil=0.001in - -\def\need{\parsearg\needx} - -% Old definition--didn't work. -%\def\needx #1{\par % -%% This method tries to make TeX break the page naturally -%% if the depth of the box does not fit. -%{\baselineskip=0pt% -%\vtop to #1\mil{\vfil}\kern -#1\mil\penalty 10000 -%\prevdepth=-1000pt -%}} - -\def\needx#1{% - % Go into vertical mode, so we don't make a big box in the middle of a - % paragraph. - \par - % - % Don't add any leading before our big empty box, but allow a page - % break, since the best break might be right here. - \allowbreak - \nointerlineskip - \vtop to #1\mil{\vfil}% - % - % TeX does not even consider page breaks if a penalty added to the - % main vertical list is 10000 or more. But in order to see if the - % empty box we just added fits on the page, we must make it consider - % page breaks. On the other hand, we don't want to actually break the - % page after the empty box. So we use a penalty of 9999. - % - % There is an extremely small chance that TeX will actually break the - % page at this \penalty, if there are no other feasible breakpoints in - % sight. (If the user is using lots of big @group commands, which - % almost-but-not-quite fill up a page, TeX will have a hard time doing - % good page breaking, for example.) However, I could not construct an - % example where a page broke at this \penalty; if it happens in a real - % document, then we can reconsider our strategy. - \penalty9999 - % - % Back up by the size of the box, whether we did a page break or not. - \kern -#1\mil - % - % Do not allow a page break right after this kern. - \nobreak -} - -% @br forces paragraph break - -\let\br = \par - -% @dots{} output some dots - -\def\dots{$\ldots$} - -% @page forces the start of a new page - -\def\page{\par\vfill\supereject} - -% @exdent text.... -% outputs text on separate line in roman font, starting at standard page margin - -% This records the amount of indent in the innermost environment. -% That's how much \exdent should take out. -\newskip\exdentamount - -% This defn is used inside fill environments such as @defun. -\def\exdent{\parsearg\exdentyyy} -\def\exdentyyy #1{{\hfil\break\hbox{\kern -\exdentamount{\rm#1}}\hfil\break}} - -% This defn is used inside nofill environments such as @example. -\def\nofillexdent{\parsearg\nofillexdentyyy} -\def\nofillexdentyyy #1{{\advance \leftskip by -\exdentamount -\leftline{\hskip\leftskip{\rm#1}}}} - -%\hbox{{\rm#1}}\hfil\break}} - -% @include file insert text of that file as input. - -\def\include{\parsearg\includezzz} -%Use \input\thisfile to avoid blank after \input, which may be an active -%char (in which case the blank would become the \input argument). -%The grouping keeps the value of \thisfile correct even when @include -%is nested. -\def\includezzz #1{\begingroup -\def\thisfile{#1}\input\thisfile -\endgroup} - -\def\thisfile{} - -% @center line outputs that line, centered - -\def\center{\parsearg\centerzzz} -\def\centerzzz #1{{\advance\hsize by -\leftskip -\advance\hsize by -\rightskip -\centerline{#1}}} - -% @sp n outputs n lines of vertical space - -\def\sp{\parsearg\spxxx} -\def\spxxx #1{\par \vskip #1\baselineskip} - -% @comment ...line which is ignored... -% @c is the same as @comment -% @ignore ... @end ignore is another way to write a comment - -\def\comment{\catcode 64=\other \catcode 123=\other \catcode 125=\other% -\parsearg \commentxxx} - -\def\commentxxx #1{\catcode 64=0 \catcode 123=1 \catcode 125=2 } - -\let\c=\comment - -% Prevent errors for section commands. -% Used in @ignore and in failing conditionals. -\def\ignoresections{% -\let\chapter=\relax -\let\unnumbered=\relax -\let\top=\relax -\let\unnumberedsec=\relax -\let\unnumberedsection=\relax -\let\unnumberedsubsec=\relax -\let\unnumberedsubsection=\relax -\let\unnumberedsubsubsec=\relax -\let\unnumberedsubsubsection=\relax -\let\section=\relax -\let\subsec=\relax -\let\subsubsec=\relax -\let\subsection=\relax -\let\subsubsection=\relax -\let\appendix=\relax -\let\appendixsec=\relax -\let\appendixsection=\relax -\let\appendixsubsec=\relax -\let\appendixsubsection=\relax -\let\appendixsubsubsec=\relax -\let\appendixsubsubsection=\relax -\let\contents=\relax -\let\smallbook=\relax -\let\titlepage=\relax -} - -% Used in nested conditionals, where we have to parse the Texinfo source -% and so want to turn off most commands, in case they are used -% incorrectly. -% -\def\ignoremorecommands{% - \let\defcv = \relax - \let\deffn = \relax - \let\deffnx = \relax - \let\defindex = \relax - \let\defivar = \relax - \let\defmac = \relax - \let\defmethod = \relax - \let\defop = \relax - \let\defopt = \relax - \let\defspec = \relax - \let\deftp = \relax - \let\deftypefn = \relax - \let\deftypefun = \relax - \let\deftypevar = \relax - \let\deftypevr = \relax - \let\defun = \relax - \let\defvar = \relax - \let\defvr = \relax - \let\ref = \relax - \let\xref = \relax - \let\printindex = \relax - \let\pxref = \relax - \let\settitle = \relax - \let\include = \relax -} - -% Ignore @ignore ... @end ignore. -% -\def\ignore{\doignore{ignore}} - -% Also ignore @ifinfo, @menu, and @direntry text. -% -\def\ifinfo{\doignore{ifinfo}} -\def\menu{\doignore{menu}} -\def\direntry{\doignore{direntry}} - -% Ignore text until a line `@end #1'. -% -\def\doignore#1{\begingroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define a command to swallow text until we reach `@end #1'. - \long\def\doignoretext##1\end #1{\enddoignore}% - % - % Make sure that spaces turn into tokens that match what \doignoretext wants. - \catcode32 = 10 - % - % And now expand that command. - \doignoretext -} - -% What we do to finish off ignored text. -% -\def\enddoignore{\endgroup\ignorespaces}% - -\newif\ifwarnedobs\warnedobsfalse -\def\obstexwarn{% - \ifwarnedobs\relax\else - % We need to warn folks that they may have trouble with TeX 3.0. - % This uses \immediate\write16 rather than \message to get newlines. - \immediate\write16{} - \immediate\write16{***WARNING*** for users of Unix TeX 3.0!} - \immediate\write16{This manual trips a bug in TeX version 3.0 (tex hangs).} - \immediate\write16{If you are running another version of TeX, relax.} - \immediate\write16{If you are running Unix TeX 3.0, kill this TeX process.} - \immediate\write16{ Then upgrade your TeX installation if you can.} - \immediate\write16{If you are stuck with version 3.0, run the} - \immediate\write16{ script ``tex3patch'' from the Texinfo distribution} - \immediate\write16{ to use a workaround.} - \immediate\write16{} - \warnedobstrue - \fi -} - -% **In TeX 3.0, setting text in \nullfont hangs tex. For a -% workaround (which requires the file ``dummy.tfm'' to be installed), -% uncomment the following line: -%%%%%\font\nullfont=dummy\let\obstexwarn=\relax - -% Ignore text, except that we keep track of conditional commands for -% purposes of nesting, up to an `@end #1' command. -% -\def\nestedignore#1{% - \obstexwarn - % We must actually expand the ignored text to look for the @end - % command, so that nested ignore constructs work. Thus, we put the - % text into a \vbox and then do nothing with the result. To minimize - % the change of memory overflow, we follow the approach outlined on - % page 401 of the TeXbook: make the current font be a dummy font. - % - \setbox0 = \vbox\bgroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define `@end #1' to end the box, which will in turn undefine the - % @end command again. - \expandafter\def\csname E#1\endcsname{\egroup\ignorespaces}% - % - % We are going to be parsing Texinfo commands. Most cause no - % trouble when they are used incorrectly, but some commands do - % complicated argument parsing or otherwise get confused, so we - % undefine them. - % - % We can't do anything about stray @-signs, unfortunately; - % they'll produce `undefined control sequence' errors. - \ignoremorecommands - % - % Set the current font to be \nullfont, a TeX primitive, and define - % all the font commands to also use \nullfont. We don't use - % dummy.tfm, as suggested in the TeXbook, because not all sites - % might have that installed. Therefore, math mode will still - % produce output, but that should be an extremely small amount of - % stuff compared to the main input. - % - \nullfont - \let\tenrm = \nullfont \let\tenit = \nullfont \let\tensl = \nullfont - \let\tenbf = \nullfont \let\tentt = \nullfont \let\smallcaps = \nullfont - \let\tensf = \nullfont - % - % Don't complain when characters are missing from the fonts. - \tracinglostchars = 0 - % - % Don't bother to do space factor calculations. - \frenchspacing - % - % Don't report underfull hboxes. - \hbadness = 10000 - % - % Do minimal line-breaking. - \pretolerance = 10000 - % - % Do not execute instructions in @tex - \def\tex{\doignore{tex}} -} - -% @set VAR sets the variable VAR to an empty value. -% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE. -% -% Since we want to separate VAR from REST-OF-LINE (which might be -% empty), we can't just use \parsearg; we have to insert a space of our -% own to delimit the rest of the line, and then take it out again if we -% didn't need it. -% -\def\set{\parsearg\setxxx} -\def\setxxx#1{\setyyy#1 \endsetyyy} -\def\setyyy#1 #2\endsetyyy{% - \def\temp{#2}% - \ifx\temp\empty \global\expandafter\let\csname SET#1\endcsname = \empty - \else \setzzz{#1}#2\endsetzzz % Remove the trailing space \setxxx inserted. - \fi -} -\def\setzzz#1#2 \endsetzzz{\expandafter\xdef\csname SET#1\endcsname{#2}} - -% @clear VAR clears (i.e., unsets) the variable VAR. -% -\def\clear{\parsearg\clearxxx} -\def\clearxxx#1{\global\expandafter\let\csname SET#1\endcsname=\relax} - -% @value{foo} gets the text saved in variable foo. -% -\def\value#1{\expandafter - \ifx\csname SET#1\endcsname\relax - {\{No value for ``#1''\}} - \else \csname SET#1\endcsname \fi} - -% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined -% with @set. -% -\def\ifset{\parsearg\ifsetxxx} -\def\ifsetxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifsetfail - \else - \expandafter\ifsetsucceed - \fi -} -\def\ifsetsucceed{\conditionalsucceed{ifset}} -\def\ifsetfail{\nestedignore{ifset}} -\defineunmatchedend{ifset} - -% @ifclear VAR ... @end ifclear reads the `...' iff VAR has never been -% defined with @set, or has been undefined with @clear. -% -\def\ifclear{\parsearg\ifclearxxx} -\def\ifclearxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifclearsucceed - \else - \expandafter\ifclearfail - \fi -} -\def\ifclearsucceed{\conditionalsucceed{ifclear}} -\def\ifclearfail{\nestedignore{ifclear}} -\defineunmatchedend{ifclear} - -% @iftex always succeeds; we read the text following, through @end -% iftex). But `@end iftex' should be valid only after an @iftex. -% -\def\iftex{\conditionalsucceed{iftex}} -\defineunmatchedend{iftex} - -% We can't just want to start a group at @iftex (for example) and end it -% at @end iftex, since then @set commands inside the conditional have no -% effect (they'd get reverted at the end of the group). So we must -% define \Eiftex to redefine itself to be its previous value. (We can't -% just define it to fail again with an ``unmatched end'' error, since -% the @ifset might be nested.) -% -\def\conditionalsucceed#1{% - \edef\temp{% - % Remember the current value of \E#1. - \let\nece{prevE#1} = \nece{E#1}% - % - % At the `@end #1', redefine \E#1 to be its previous value. - \def\nece{E#1}{\let\nece{E#1} = \nece{prevE#1}}% - }% - \temp -} - -% We need to expand lots of \csname's, but we don't want to expand the -% control sequences after we've constructed them. -% -\def\nece#1{\expandafter\noexpand\csname#1\endcsname} - -% @asis just yields its argument. Used with @table, for example. -% -\def\asis#1{#1} - -% @math means output in math mode. -% We don't use $'s directly in the definition of \math because control -% sequences like \math are expanded when the toc file is written. Then, -% we read the toc file back, the $'s will be normal characters (as they -% should be, according to the definition of Texinfo). So we must use a -% control sequence to switch into and out of math mode. -% -% This isn't quite enough for @math to work properly in indices, but it -% seems unlikely it will ever be needed there. -% -\let\implicitmath = $ -\def\math#1{\implicitmath #1\implicitmath} - -% @bullet and @minus need the same treatment as @math, just above. -\def\bullet{\implicitmath\ptexbullet\implicitmath} -\def\minus{\implicitmath-\implicitmath} - -\def\node{\ENVcheck\parsearg\nodezzz} -\def\nodezzz#1{\nodexxx [#1,]} -\def\nodexxx[#1,#2]{\gdef\lastnode{#1}} -\let\nwnode=\node -\let\lastnode=\relax - -\def\donoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\setref{\lastnode}\fi -\let\lastnode=\relax} - -\def\unnumbnoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\unnumbsetref{\lastnode}\fi -\let\lastnode=\relax} - -\def\appendixnoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\appendixsetref{\lastnode}\fi -\let\lastnode=\relax} - -\let\refill=\relax - -% @setfilename is done at the beginning of every texinfo file. -% So open here the files we need to have open while reading the input. -% This makes it possible to make a .fmt file for texinfo. -\def\setfilename{% - \readauxfile - \opencontents - \openindices - \fixbackslash % Turn off hack to swallow `\input texinfo'. - \global\let\setfilename=\comment % Ignore extra @setfilename cmds. - \comment % Ignore the actual filename. -} - -\outer\def\bye{\pagealignmacro\tracingstats=1\ptexend} - -\def\inforef #1{\inforefzzz #1,,,,**} -\def\inforefzzz #1,#2,#3,#4**{See Info file \file{\ignorespaces #3{}}, - node \samp{\ignorespaces#1{}}} - -\message{fonts,} - -% Font-change commands. - -% Texinfo supports the sans serif font style, which plain TeX does not. -% So we set up a \sf analogous to plain's \rm, etc. -\newfam\sffam -\def\sf{\fam=\sffam \tensf} -\let\li = \sf % Sometimes we call it \li, not \sf. - -%% Try out Computer Modern fonts at \magstephalf -\let\mainmagstep=\magstephalf - -\ifx\bigger\relax -\let\mainmagstep=\magstep1 -\font\textrm=cmr12 -\font\texttt=cmtt12 -\else -\font\textrm=cmr10 scaled \mainmagstep -\font\texttt=cmtt10 scaled \mainmagstep -\fi -% Instead of cmb10, you many want to use cmbx10. -% cmbx10 is a prettier font on its own, but cmb10 -% looks better when embedded in a line with cmr10. -\font\textbf=cmb10 scaled \mainmagstep -\font\textit=cmti10 scaled \mainmagstep -\font\textsl=cmsl10 scaled \mainmagstep -\font\textsf=cmss10 scaled \mainmagstep -\font\textsc=cmcsc10 scaled \mainmagstep -\font\texti=cmmi10 scaled \mainmagstep -\font\textsy=cmsy10 scaled \mainmagstep - -% A few fonts for @defun, etc. -\font\defbf=cmbx10 scaled \magstep1 %was 1314 -\font\deftt=cmtt10 scaled \magstep1 -\def\df{\let\tentt=\deftt \let\tenbf = \defbf \bf} - -% Fonts for indices and small examples. -% We actually use the slanted font rather than the italic, -% because texinfo normally uses the slanted fonts for that. -% Do not make many font distinctions in general in the index, since they -% aren't very useful. -\font\ninett=cmtt9 -\font\indrm=cmr9 -\font\indit=cmsl9 -\let\indsl=\indit -\let\indtt=\ninett -\let\indsf=\indrm -\let\indbf=\indrm -\let\indsc=\indrm -\font\indi=cmmi9 -\font\indsy=cmsy9 - -% Fonts for headings -\font\chaprm=cmbx12 scaled \magstep2 -\font\chapit=cmti12 scaled \magstep2 -\font\chapsl=cmsl12 scaled \magstep2 -\font\chaptt=cmtt12 scaled \magstep2 -\font\chapsf=cmss12 scaled \magstep2 -\let\chapbf=\chaprm -\font\chapsc=cmcsc10 scaled\magstep3 -\font\chapi=cmmi12 scaled \magstep2 -\font\chapsy=cmsy10 scaled \magstep3 - -\font\secrm=cmbx12 scaled \magstep1 -\font\secit=cmti12 scaled \magstep1 -\font\secsl=cmsl12 scaled \magstep1 -\font\sectt=cmtt12 scaled \magstep1 -\font\secsf=cmss12 scaled \magstep1 -\font\secbf=cmbx12 scaled \magstep1 -\font\secsc=cmcsc10 scaled\magstep2 -\font\seci=cmmi12 scaled \magstep1 -\font\secsy=cmsy10 scaled \magstep2 - -% \font\ssecrm=cmbx10 scaled \magstep1 % This size an font looked bad. -% \font\ssecit=cmti10 scaled \magstep1 % The letters were too crowded. -% \font\ssecsl=cmsl10 scaled \magstep1 -% \font\ssectt=cmtt10 scaled \magstep1 -% \font\ssecsf=cmss10 scaled \magstep1 - -%\font\ssecrm=cmb10 scaled 1315 % Note the use of cmb rather than cmbx. -%\font\ssecit=cmti10 scaled 1315 % Also, the size is a little larger than -%\font\ssecsl=cmsl10 scaled 1315 % being scaled magstep1. -%\font\ssectt=cmtt10 scaled 1315 -%\font\ssecsf=cmss10 scaled 1315 - -%\let\ssecbf=\ssecrm - -\font\ssecrm=cmbx12 scaled \magstephalf -\font\ssecit=cmti12 scaled \magstephalf -\font\ssecsl=cmsl12 scaled \magstephalf -\font\ssectt=cmtt12 scaled \magstephalf -\font\ssecsf=cmss12 scaled \magstephalf -\font\ssecbf=cmbx12 scaled \magstephalf -\font\ssecsc=cmcsc10 scaled \magstep1 -\font\sseci=cmmi12 scaled \magstephalf -\font\ssecsy=cmsy10 scaled \magstep1 -% The smallcaps and symbol fonts should actually be scaled \magstep1.5, -% but that is not a standard magnification. - -% Fonts for title page: -\font\titlerm = cmbx12 scaled \magstep3 -\let\authorrm = \secrm - -% In order for the font changes to affect most math symbols and letters, -% we have to define the \textfont of the standard families. Since -% texinfo doesn't allow for producing subscripts and superscripts, we -% don't bother to reset \scriptfont and \scriptscriptfont (which would -% also require loading a lot more fonts). -% -\def\resetmathfonts{% - \textfont0 = \tenrm \textfont1 = \teni \textfont2 = \tensy - \textfont\itfam = \tenit \textfont\slfam = \tensl \textfont\bffam = \tenbf - \textfont\ttfam = \tentt \textfont\sffam = \tensf -} - - -% The font-changing commands redefine the meanings of \tenSTYLE, instead -% of just \STYLE. We do this so that font changes will continue to work -% in math mode, where it is the current \fam that is relevant in most -% cases, not the current. Plain TeX does, for example, -% \def\bf{\fam=\bffam \tenbf} By redefining \tenbf, we obviate the need -% to redefine \bf itself. -\def\textfonts{% - \let\tenrm=\textrm \let\tenit=\textit \let\tensl=\textsl - \let\tenbf=\textbf \let\tentt=\texttt \let\smallcaps=\textsc - \let\tensf=\textsf \let\teni=\texti \let\tensy=\textsy - \resetmathfonts} -\def\chapfonts{% - \let\tenrm=\chaprm \let\tenit=\chapit \let\tensl=\chapsl - \let\tenbf=\chapbf \let\tentt=\chaptt \let\smallcaps=\chapsc - \let\tensf=\chapsf \let\teni=\chapi \let\tensy=\chapsy - \resetmathfonts} -\def\secfonts{% - \let\tenrm=\secrm \let\tenit=\secit \let\tensl=\secsl - \let\tenbf=\secbf \let\tentt=\sectt \let\smallcaps=\secsc - \let\tensf=\secsf \let\teni=\seci \let\tensy=\secsy - \resetmathfonts} -\def\subsecfonts{% - \let\tenrm=\ssecrm \let\tenit=\ssecit \let\tensl=\ssecsl - \let\tenbf=\ssecbf \let\tentt=\ssectt \let\smallcaps=\ssecsc - \let\tensf=\ssecsf \let\teni=\sseci \let\tensy=\ssecsy - \resetmathfonts} -\def\indexfonts{% - \let\tenrm=\indrm \let\tenit=\indit \let\tensl=\indsl - \let\tenbf=\indbf \let\tentt=\indtt \let\smallcaps=\indsc - \let\tensf=\indsf \let\teni=\indi \let\tensy=\indsy - \resetmathfonts} - -% Set up the default fonts, so we can use them for creating boxes. -% -\textfonts - -% Count depth in font-changes, for error checks -\newcount\fontdepth \fontdepth=0 - -% Fonts for short table of contents. -\font\shortcontrm=cmr12 -\font\shortcontbf=cmbx12 -\font\shortcontsl=cmsl12 - -%% Add scribe-like font environments, plus @l for inline lisp (usually sans -%% serif) and @ii for TeX italic - -% \smartitalic{ARG} outputs arg in italics, followed by an italic correction -% unless the following character is such as not to need one. -\def\smartitalicx{\ifx\next,\else\ifx\next-\else\ifx\next.\else\/\fi\fi\fi} -\def\smartitalic#1{{\sl #1}\futurelet\next\smartitalicx} - -\let\i=\smartitalic -\let\var=\smartitalic -\let\dfn=\smartitalic -\let\emph=\smartitalic -\let\cite=\smartitalic - -\def\b#1{{\bf #1}} -\let\strong=\b - -% We can't just use \exhyphenpenalty, because that only has effect at -% the end of a paragraph. Restore normal hyphenation at the end of the -% group within which \nohyphenation is presumably called. -% -\def\nohyphenation{\hyphenchar\font = -1 \aftergroup\restorehyphenation} -\def\restorehyphenation{\hyphenchar\font = `- } - -\def\t#1{% - {\tt \nohyphenation \rawbackslash \frenchspacing #1}% - \null -} -\let\ttfont = \t -%\def\samp #1{`{\tt \rawbackslash \frenchspacing #1}'\null} -\def\samp #1{`\tclose{#1}'\null} -\def\key #1{{\tt \nohyphenation \uppercase{#1}}\null} -\def\ctrl #1{{\tt \rawbackslash \hat}#1} - -\let\file=\samp - -% @code is a modification of @t, -% which makes spaces the same size as normal in the surrounding text. -\def\tclose#1{% - {% - % Change normal interword space to be same as for the current font. - \spaceskip = \fontdimen2\font - % - % Switch to typewriter. - \tt - % - % But `\ ' produces the large typewriter interword space. - \def\ {{\spaceskip = 0pt{} }}% - % - % Turn off hyphenation. - \nohyphenation - % - \rawbackslash - \frenchspacing - #1% - }% - \null -} -\let\code=\tclose -%\let\exp=\tclose %Was temporary - -% @kbd is like @code, except that if the argument is just one @key command, -% then @kbd has no effect. - -\def\xkey{\key} -\def\kbdfoo#1#2#3\par{\def\one{#1}\def\three{#3}\def\threex{??}% -\ifx\one\xkey\ifx\threex\three \key{#2}% -\else\tclose{\look}\fi -\else\tclose{\look}\fi} - -% Typeset a dimension, e.g., `in' or `pt'. The only reason for the -% argument is to make the input look right: @dmn{pt} instead of -% @dmn{}pt. -% -\def\dmn#1{\thinspace #1} - -\def\kbd#1{\def\look{#1}\expandafter\kbdfoo\look??\par} - -\def\l#1{{\li #1}\null} % - -\def\r#1{{\rm #1}} % roman font -% Use of \lowercase was suggested. -\def\sc#1{{\smallcaps#1}} % smallcaps font -\def\ii#1{{\it #1}} % italic font - -\message{page headings,} - -\newskip\titlepagetopglue \titlepagetopglue = 1.5in -\newskip\titlepagebottomglue \titlepagebottomglue = 2pc - -% First the title page. Must do @settitle before @titlepage. -\def\titlefont#1{{\titlerm #1}} - -\newif\ifseenauthor -\newif\iffinishedtitlepage - -\def\shorttitlepage{\parsearg\shorttitlepagezzz} -\def\shorttitlepagezzz #1{\begingroup\hbox{}\vskip 1.5in \chaprm \centerline{#1}% - \endgroup\page\hbox{}\page} - -\def\titlepage{\begingroup \parindent=0pt \textfonts - \let\subtitlerm=\tenrm -% I deinstalled the following change because \cmr12 is undefined. -% This change was not in the ChangeLog anyway. --rms. -% \let\subtitlerm=\cmr12 - \def\subtitlefont{\subtitlerm \normalbaselineskip = 13pt \normalbaselines}% - % - \def\authorfont{\authorrm \normalbaselineskip = 16pt \normalbaselines}% - % - % Leave some space at the very top of the page. - \vglue\titlepagetopglue - % - % Now you can print the title using @title. - \def\title{\parsearg\titlezzz}% - \def\titlezzz##1{\leftline{\titlefont{##1}} - % print a rule at the page bottom also. - \finishedtitlepagefalse - \vskip4pt \hrule height 4pt \vskip4pt}% - % No rule at page bottom unless we print one at the top with @title. - \finishedtitlepagetrue - % - % Now you can put text using @subtitle. - \def\subtitle{\parsearg\subtitlezzz}% - \def\subtitlezzz##1{{\subtitlefont \rightline{##1}}}% - % - % @author should come last, but may come many times. - \def\author{\parsearg\authorzzz}% - \def\authorzzz##1{\ifseenauthor\else\vskip 0pt plus 1filll\seenauthortrue\fi - {\authorfont \leftline{##1}}}% - % - % Most title ``pages'' are actually two pages long, with space - % at the top of the second. We don't want the ragged left on the second. - \let\oldpage = \page - \def\page{% - \iffinishedtitlepage\else - \finishtitlepage - \fi - \oldpage - \let\page = \oldpage - \hbox{}}% -% \def\page{\oldpage \hbox{}} -} - -\def\Etitlepage{% - \iffinishedtitlepage\else - \finishtitlepage - \fi - % It is important to do the page break before ending the group, - % because the headline and footline are only empty inside the group. - % If we use the new definition of \page, we always get a blank page - % after the title page, which we certainly don't want. - \oldpage - \endgroup - \HEADINGSon -} - -\def\finishtitlepage{% - \vskip4pt \hrule height 2pt - \vskip\titlepagebottomglue - \finishedtitlepagetrue -} - -%%% Set up page headings and footings. - -\let\thispage=\folio - -\newtoks \evenheadline % Token sequence for heading line of even pages -\newtoks \oddheadline % Token sequence for heading line of odd pages -\newtoks \evenfootline % Token sequence for footing line of even pages -\newtoks \oddfootline % Token sequence for footing line of odd pages - -% Now make Tex use those variables -\headline={{\textfonts\rm \ifodd\pageno \the\oddheadline - \else \the\evenheadline \fi}} -\footline={{\textfonts\rm \ifodd\pageno \the\oddfootline - \else \the\evenfootline \fi}\HEADINGShook} -\let\HEADINGShook=\relax - -% Commands to set those variables. -% For example, this is what @headings on does -% @evenheading @thistitle|@thispage|@thischapter -% @oddheading @thischapter|@thispage|@thistitle -% @evenfooting @thisfile|| -% @oddfooting ||@thisfile - -\def\evenheading{\parsearg\evenheadingxxx} -\def\oddheading{\parsearg\oddheadingxxx} -\def\everyheading{\parsearg\everyheadingxxx} - -\def\evenfooting{\parsearg\evenfootingxxx} -\def\oddfooting{\parsearg\oddfootingxxx} -\def\everyfooting{\parsearg\everyfootingxxx} - -{\catcode`\@=0 % - -\gdef\evenheadingxxx #1{\evenheadingyyy #1@|@|@|@|\finish} -\gdef\evenheadingyyy #1@|#2@|#3@|#4\finish{% -\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\oddheadingxxx #1{\oddheadingyyy #1@|@|@|@|\finish} -\gdef\oddheadingyyy #1@|#2@|#3@|#4\finish{% -\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\everyheadingxxx #1{\everyheadingyyy #1@|@|@|@|\finish} -\gdef\everyheadingyyy #1@|#2@|#3@|#4\finish{% -\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}} -\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\evenfootingxxx #1{\evenfootingyyy #1@|@|@|@|\finish} -\gdef\evenfootingyyy #1@|#2@|#3@|#4\finish{% -\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\oddfootingxxx #1{\oddfootingyyy #1@|@|@|@|\finish} -\gdef\oddfootingyyy #1@|#2@|#3@|#4\finish{% -\global\oddfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\everyfootingxxx #1{\everyfootingyyy #1@|@|@|@|\finish} -\gdef\everyfootingyyy #1@|#2@|#3@|#4\finish{% -\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}} -\global\oddfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} -% -}% unbind the catcode of @. - -% @headings double turns headings on for double-sided printing. -% @headings single turns headings on for single-sided printing. -% @headings off turns them off. -% @headings on same as @headings double, retained for compatibility. -% @headings after turns on double-sided headings after this page. -% @headings doubleafter turns on double-sided headings after this page. -% @headings singleafter turns on single-sided headings after this page. -% By default, they are off. - -\def\headings #1 {\csname HEADINGS#1\endcsname} - -\def\HEADINGSoff{ -\global\evenheadline={\hfil} \global\evenfootline={\hfil} -\global\oddheadline={\hfil} \global\oddfootline={\hfil}} -\HEADINGSoff -% When we turn headings on, set the page number to 1. -% For double-sided printing, put current file name in lower left corner, -% chapter name on inside top of right hand pages, document -% title on inside top of left hand pages, and page numbers on outside top -% edge of all pages. -\def\HEADINGSdouble{ -%\pagealignmacro -\global\pageno=1 -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\folio\hfil\thistitle}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} -% For single-sided printing, chapter title goes across top left of page, -% page number on top right. -\def\HEADINGSsingle{ -%\pagealignmacro -\global\pageno=1 -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\thischapter\hfil\folio}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} -\def\HEADINGSon{\HEADINGSdouble} - -\def\HEADINGSafter{\let\HEADINGShook=\HEADINGSdoublex} -\let\HEADINGSdoubleafter=\HEADINGSafter -\def\HEADINGSdoublex{% -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\folio\hfil\thistitle}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} - -\def\HEADINGSsingleafter{\let\HEADINGShook=\HEADINGSsinglex} -\def\HEADINGSsinglex{% -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\thischapter\hfil\folio}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -} - -% Subroutines used in generating headings -% Produces Day Month Year style of output. -\def\today{\number\day\space -\ifcase\month\or -January\or February\or March\or April\or May\or June\or -July\or August\or September\or October\or November\or December\fi -\space\number\year} - -% Use this if you want the Month Day, Year style of output. -%\def\today{\ifcase\month\or -%January\or February\or March\or April\or May\or June\or -%July\or August\or September\or October\or November\or December\fi -%\space\number\day, \number\year} - -% @settitle line... specifies the title of the document, for headings -% It generates no output of its own - -\def\thistitle{No Title} -\def\settitle{\parsearg\settitlezzz} -\def\settitlezzz #1{\gdef\thistitle{#1}} - -\message{tables,} - -% @tabs -- simple alignment - -% These don't work. For one thing, \+ is defined as outer. -% So these macros cannot even be defined. - -%\def\tabs{\parsearg\tabszzz} -%\def\tabszzz #1{\settabs\+#1\cr} -%\def\tabline{\parsearg\tablinezzz} -%\def\tablinezzz #1{\+#1\cr} -%\def\&{&} - -% Tables -- @table, @ftable, @vtable, @item(x), @kitem(x), @xitem(x). - -% default indentation of table text -\newdimen\tableindent \tableindent=.8in -% default indentation of @itemize and @enumerate text -\newdimen\itemindent \itemindent=.3in -% margin between end of table item and start of table text. -\newdimen\itemmargin \itemmargin=.1in - -% used internally for \itemindent minus \itemmargin -\newdimen\itemmax - -% Note @table, @vtable, and @vtable define @item, @itemx, etc., with -% these defs. -% They also define \itemindex -% to index the item name in whatever manner is desired (perhaps none). - -\def\internalBitem{\smallbreak \parsearg\itemzzz} -\def\internalBitemx{\par \parsearg\itemzzz} - -\def\internalBxitem "#1"{\def\xitemsubtopix{#1} \smallbreak \parsearg\xitemzzz} -\def\internalBxitemx "#1"{\def\xitemsubtopix{#1} \par \parsearg\xitemzzz} - -\def\internalBkitem{\smallbreak \parsearg\kitemzzz} -\def\internalBkitemx{\par \parsearg\kitemzzz} - -\def\kitemzzz #1{\dosubind {kw}{\code{#1}}{for {\bf \lastfunction}}% - \itemzzz {#1}} - -\def\xitemzzz #1{\dosubind {kw}{\code{#1}}{for {\bf \xitemsubtopic}}% - \itemzzz {#1}} - -\def\itemzzz #1{\begingroup % - \advance\hsize by -\rightskip - \advance\hsize by -\tableindent - \setbox0=\hbox{\itemfont{#1}}% - \itemindex{#1}% - \nobreak % This prevents a break before @itemx. - % - % Be sure we are not still in the middle of a paragraph. - \parskip=0in - \par - % - % If the item text does not fit in the space we have, put it on a line - % by itself, and do not allow a page break either before or after that - % line. We do not start a paragraph here because then if the next - % command is, e.g., @kindex, the whatsit would get put into the - % horizontal list on a line by itself, resulting in extra blank space. - \ifdim \wd0>\itemmax - \setbox0=\hbox{\hskip \leftskip \hskip -\tableindent \unhbox0}\box0 - \nobreak - \else - % The item text fits into the space. Start a paragraph, so that the - % following text (if any) will end up on the same line. Since that - % text will be indented by \tableindent, we make the item text be in - % a zero-width box. - \noindent - \rlap{\hskip -\tableindent\box0}% - \fi - \endgroup -} - -\def\item{\errmessage{@item while not in a table}} -\def\itemx{\errmessage{@itemx while not in a table}} -\def\kitem{\errmessage{@kitem while not in a table}} -\def\kitemx{\errmessage{@kitemx while not in a table}} -\def\xitem{\errmessage{@xitem while not in a table}} -\def\xitemx{\errmessage{@xitemx while not in a table}} - -%% Contains a kludge to get @end[description] to work -\def\description{\tablez{\dontindex}{1}{}{}{}{}} - -\def\table{\begingroup\inENV\obeylines\obeyspaces\tablex} -{\obeylines\obeyspaces% -\gdef\tablex #1^^M{% -\tabley\dontindex#1 \endtabley}} - -\def\ftable{\begingroup\inENV\obeylines\obeyspaces\ftablex} -{\obeylines\obeyspaces% -\gdef\ftablex #1^^M{% -\tabley\fnitemindex#1 \endtabley -\def\Eftable{\endgraf\endgroup\afterenvbreak}% -\let\Etable=\relax}} - -\def\vtable{\begingroup\inENV\obeylines\obeyspaces\vtablex} -{\obeylines\obeyspaces% -\gdef\vtablex #1^^M{% -\tabley\vritemindex#1 \endtabley -\def\Evtable{\endgraf\endgroup\afterenvbreak}% -\let\Etable=\relax}} - -\def\dontindex #1{} -\def\fnitemindex #1{\doind {fn}{\code{#1}}}% -\def\vritemindex #1{\doind {vr}{\code{#1}}}% - -{\obeyspaces % -\gdef\tabley#1#2 #3 #4 #5 #6 #7\endtabley{\endgroup% -\tablez{#1}{#2}{#3}{#4}{#5}{#6}}} - -\def\tablez #1#2#3#4#5#6{% -\aboveenvbreak % -\begingroup % -\def\Edescription{\Etable}% Neccessary kludge. -\let\itemindex=#1% -\ifnum 0#3>0 \advance \leftskip by #3\mil \fi % -\ifnum 0#4>0 \tableindent=#4\mil \fi % -\ifnum 0#5>0 \advance \rightskip by #5\mil \fi % -\def\itemfont{#2}% -\itemmax=\tableindent % -\advance \itemmax by -\itemmargin % -\advance \leftskip by \tableindent % -\exdentamount=\tableindent -\parindent = 0pt -\parskip = \smallskipamount -\ifdim \parskip=0pt \parskip=2pt \fi% -\def\Etable{\endgraf\endgroup\afterenvbreak}% -\let\item = \internalBitem % -\let\itemx = \internalBitemx % -\let\kitem = \internalBkitem % -\let\kitemx = \internalBkitemx % -\let\xitem = \internalBxitem % -\let\xitemx = \internalBxitemx % -} - -% This is the counter used by @enumerate, which is really @itemize - -\newcount \itemno - -\def\itemize{\parsearg\itemizezzz} - -\def\itemizezzz #1{% - \begingroup % ended by the @end itemsize - \itemizey {#1}{\Eitemize} -} - -\def\itemizey #1#2{% -\aboveenvbreak % -\itemmax=\itemindent % -\advance \itemmax by -\itemmargin % -\advance \leftskip by \itemindent % -\exdentamount=\itemindent -\parindent = 0pt % -\parskip = \smallskipamount % -\ifdim \parskip=0pt \parskip=2pt \fi% -\def#2{\endgraf\endgroup\afterenvbreak}% -\def\itemcontents{#1}% -\let\item=\itemizeitem} - -% Set sfcode to normal for the chars that usually have another value. -% These are `.?!:;,' -\def\frenchspacing{\sfcode46=1000 \sfcode63=1000 \sfcode33=1000 - \sfcode58=1000 \sfcode59=1000 \sfcode44=1000 } - -% \splitoff TOKENS\endmark defines \first to be the first token in -% TOKENS, and \rest to be the remainder. -% -\def\splitoff#1#2\endmark{\def\first{#1}\def\rest{#2}}% - -% Allow an optional argument of an uppercase letter, lowercase letter, -% or number, to specify the first label in the enumerated list. No -% argument is the same as `1'. -% -\def\enumerate{\parsearg\enumeratezzz} -\def\enumeratezzz #1{\enumeratey #1 \endenumeratey} -\def\enumeratey #1 #2\endenumeratey{% - \begingroup % ended by the @end enumerate - % - % If we were given no argument, pretend we were given `1'. - \def\thearg{#1}% - \ifx\thearg\empty \def\thearg{1}\fi - % - % Detect if the argument is a single token. If so, it might be a - % letter. Otherwise, the only valid thing it can be is a number. - % (We will always have one token, because of the test we just made. - % This is a good thing, since \splitoff doesn't work given nothing at - % all -- the first parameter is undelimited.) - \expandafter\splitoff\thearg\endmark - \ifx\rest\empty - % Only one token in the argument. It could still be anything. - % A ``lowercase letter'' is one whose \lccode is nonzero. - % An ``uppercase letter'' is one whose \lccode is both nonzero, and - % not equal to itself. - % Otherwise, we assume it's a number. - % - % We need the \relax at the end of the \ifnum lines to stop TeX from - % continuing to look for a <number>. - % - \ifnum\lccode\expandafter`\thearg=0\relax - \numericenumerate % a number (we hope) - \else - % It's a letter. - \ifnum\lccode\expandafter`\thearg=\expandafter`\thearg\relax - \lowercaseenumerate % lowercase letter - \else - \uppercaseenumerate % uppercase letter - \fi - \fi - \else - % Multiple tokens in the argument. We hope it's a number. - \numericenumerate - \fi -} - -% An @enumerate whose labels are integers. The starting integer is -% given in \thearg. -% -\def\numericenumerate{% - \itemno = \thearg - \startenumeration{\the\itemno}% -} - -% The starting (lowercase) letter is in \thearg. -\def\lowercaseenumerate{% - \itemno = \expandafter`\thearg - \startenumeration{% - % Be sure we're not beyond the end of the alphabet. - \ifnum\itemno=0 - \errmessage{No more lowercase letters in @enumerate; get a bigger - alphabet}% - \fi - \char\lccode\itemno - }% -} - -% The starting (uppercase) letter is in \thearg. -\def\uppercaseenumerate{% - \itemno = \expandafter`\thearg - \startenumeration{% - % Be sure we're not beyond the end of the alphabet. - \ifnum\itemno=0 - \errmessage{No more uppercase letters in @enumerate; get a bigger - alphabet} - \fi - \char\uccode\itemno - }% -} - -% Call itemizey, adding a period to the first argument and supplying the -% common last two arguments. Also subtract one from the initial value in -% \itemno, since @item increments \itemno. -% -\def\startenumeration#1{% - \advance\itemno by -1 - \itemizey{#1.}\Eenumerate\flushcr -} - -% @alphaenumerate and @capsenumerate are abbreviations for giving an arg -% to @enumerate. -% -\def\alphaenumerate{\enumerate{a}} -\def\capsenumerate{\enumerate{A}} -\def\Ealphaenumerate{\Eenumerate} -\def\Ecapsenumerate{\Eenumerate} - -% Definition of @item while inside @itemize. - -\def\itemizeitem{% -\advance\itemno by 1 -{\let\par=\endgraf \smallbreak}% -\ifhmode \errmessage{\in hmode at itemizeitem}\fi -{\parskip=0in \hskip 0pt -\hbox to 0pt{\hss \itemcontents\hskip \itemmargin}% -\vadjust{\penalty 1200}}% -\flushcr} - -\message{indexing,} -% Index generation facilities - -% Define \newwrite to be identical to plain tex's \newwrite -% except not \outer, so it can be used within \newindex. -{\catcode`\@=11 -\gdef\newwrite{\alloc@7\write\chardef\sixt@@n}} - -% \newindex {foo} defines an index named foo. -% It automatically defines \fooindex such that -% \fooindex ...rest of line... puts an entry in the index foo. -% It also defines \fooindfile to be the number of the output channel for -% the file that accumulates this index. The file's extension is foo. -% The name of an index should be no more than 2 characters long -% for the sake of vms. - -\def\newindex #1{ -\expandafter\newwrite \csname#1indfile\endcsname% Define number for output file -\openout \csname#1indfile\endcsname \jobname.#1 % Open the file -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\doindex {#1}} -} - -% @defindex foo == \newindex{foo} - -\def\defindex{\parsearg\newindex} - -% Define @defcodeindex, like @defindex except put all entries in @code. - -\def\newcodeindex #1{ -\expandafter\newwrite \csname#1indfile\endcsname% Define number for output file -\openout \csname#1indfile\endcsname \jobname.#1 % Open the file -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\docodeindex {#1}} -} - -\def\defcodeindex{\parsearg\newcodeindex} - -% @synindex foo bar makes index foo feed into index bar. -% Do this instead of @defindex foo if you don't want it as a separate index. -\def\synindex #1 #2 {% -\expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname -\expandafter\let\csname#1indfile\endcsname=\synindexfoo -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\doindex {#2}}% -} - -% @syncodeindex foo bar similar, but put all entries made for index foo -% inside @code. -\def\syncodeindex #1 #2 {% -\expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname -\expandafter\let\csname#1indfile\endcsname=\synindexfoo -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\docodeindex {#2}}% -} - -% Define \doindex, the driver for all \fooindex macros. -% Argument #1 is generated by the calling \fooindex macro, -% and it is "foo", the name of the index. - -% \doindex just uses \parsearg; it calls \doind for the actual work. -% This is because \doind is more useful to call from other macros. - -% There is also \dosubind {index}{topic}{subtopic} -% which makes an entry in a two-level index such as the operation index. - -\def\doindex#1{\edef\indexname{#1}\parsearg\singleindexer} -\def\singleindexer #1{\doind{\indexname}{#1}} - -% like the previous two, but they put @code around the argument. -\def\docodeindex#1{\edef\indexname{#1}\parsearg\singlecodeindexer} -\def\singlecodeindexer #1{\doind{\indexname}{\code{#1}}} - -\def\indexdummies{% -\def\_{{\realbackslash _}}% -\def\w{\realbackslash w }% -\def\bf{\realbackslash bf }% -\def\rm{\realbackslash rm }% -\def\sl{\realbackslash sl }% -\def\sf{\realbackslash sf}% -\def\tt{\realbackslash tt}% -\def\gtr{\realbackslash gtr}% -\def\less{\realbackslash less}% -\def\hat{\realbackslash hat}% -\def\char{\realbackslash char}% -\def\TeX{\realbackslash TeX}% -\def\dots{\realbackslash dots }% -\def\copyright{\realbackslash copyright }% -\def\tclose##1{\realbackslash tclose {##1}}% -\def\code##1{\realbackslash code {##1}}% -\def\samp##1{\realbackslash samp {##1}}% -\def\t##1{\realbackslash r {##1}}% -\def\r##1{\realbackslash r {##1}}% -\def\i##1{\realbackslash i {##1}}% -\def\b##1{\realbackslash b {##1}}% -\def\cite##1{\realbackslash cite {##1}}% -\def\key##1{\realbackslash key {##1}}% -\def\file##1{\realbackslash file {##1}}% -\def\var##1{\realbackslash var {##1}}% -\def\kbd##1{\realbackslash kbd {##1}}% -\def\dfn##1{\realbackslash dfn {##1}}% -\def\emph##1{\realbackslash emph {##1}}% -} - -% \indexnofonts no-ops all font-change commands. -% This is used when outputting the strings to sort the index by. -\def\indexdummyfont#1{#1} -\def\indexdummytex{TeX} -\def\indexdummydots{...} - -\def\indexnofonts{% -\let\w=\indexdummyfont -\let\t=\indexdummyfont -\let\r=\indexdummyfont -\let\i=\indexdummyfont -\let\b=\indexdummyfont -\let\emph=\indexdummyfont -\let\strong=\indexdummyfont -\let\cite=\indexdummyfont -\let\sc=\indexdummyfont -%Don't no-op \tt, since it isn't a user-level command -% and is used in the definitions of the active chars like <, >, |... -%\let\tt=\indexdummyfont -\let\tclose=\indexdummyfont -\let\code=\indexdummyfont -\let\file=\indexdummyfont -\let\samp=\indexdummyfont -\let\kbd=\indexdummyfont -\let\key=\indexdummyfont -\let\var=\indexdummyfont -\let\TeX=\indexdummytex -\let\dots=\indexdummydots -} - -% To define \realbackslash, we must make \ not be an escape. -% We must first make another character (@) an escape -% so we do not become unable to do a definition. - -{\catcode`\@=0 \catcode`\\=\other -@gdef@realbackslash{\}} - -\let\indexbackslash=0 %overridden during \printindex. - -\def\doind #1#2{% -{\count10=\lastpenalty % -{\indexdummies % Must do this here, since \bf, etc expand at this stage -\escapechar=`\\% -{\let\folio=0% Expand all macros now EXCEPT \folio -\def\rawbackslashxx{\indexbackslash}% \indexbackslash isn't defined now -% so it will be output as is; and it will print as backslash in the indx. -% -% Now process the index-string once, with all font commands turned off, -% to get the string to sort the index by. -{\indexnofonts -\xdef\temp1{#2}% -}% -% Now produce the complete index entry. We process the index-string again, -% this time with font commands expanded, to get what to print in the index. -\edef\temp{% -\write \csname#1indfile\endcsname{% -\realbackslash entry {\temp1}{\folio}{#2}}}% -\temp }% -}\penalty\count10}} - -\def\dosubind #1#2#3{% -{\count10=\lastpenalty % -{\indexdummies % Must do this here, since \bf, etc expand at this stage -\escapechar=`\\% -{\let\folio=0% -\def\rawbackslashxx{\indexbackslash}% -% -% Now process the index-string once, with all font commands turned off, -% to get the string to sort the index by. -{\indexnofonts -\xdef\temp1{#2 #3}% -}% -% Now produce the complete index entry. We process the index-string again, -% this time with font commands expanded, to get what to print in the index. -\edef\temp{% -\write \csname#1indfile\endcsname{% -\realbackslash entry {\temp1}{\folio}{#2}{#3}}}% -\temp }% -}\penalty\count10}} - -% The index entry written in the file actually looks like -% \entry {sortstring}{page}{topic} -% or -% \entry {sortstring}{page}{topic}{subtopic} -% The texindex program reads in these files and writes files -% containing these kinds of lines: -% \initial {c} -% before the first topic whose initial is c -% \entry {topic}{pagelist} -% for a topic that is used without subtopics -% \primary {topic} -% for the beginning of a topic that is used with subtopics -% \secondary {subtopic}{pagelist} -% for each subtopic. - -% Define the user-accessible indexing commands -% @findex, @vindex, @kindex, @cindex. - -\def\findex {\fnindex} -\def\kindex {\kyindex} -\def\cindex {\cpindex} -\def\vindex {\vrindex} -\def\tindex {\tpindex} -\def\pindex {\pgindex} - -\def\cindexsub {\begingroup\obeylines\cindexsub} -{\obeylines % -\gdef\cindexsub "#1" #2^^M{\endgroup % -\dosubind{cp}{#2}{#1}}} - -% Define the macros used in formatting output of the sorted index material. - -% This is what you call to cause a particular index to get printed. -% Write -% @unnumbered Function Index -% @printindex fn - -\def\printindex{\parsearg\doprintindex} - -\def\doprintindex#1{% - \tex - \dobreak \chapheadingskip {10000} - \catcode`\%=\other\catcode`\&=\other\catcode`\#=\other - \catcode`\$=\other\catcode`\_=\other - \catcode`\~=\other - % - % The following don't help, since the chars were translated - % when the raw index was written, and their fonts were discarded - % due to \indexnofonts. - %\catcode`\"=\active - %\catcode`\^=\active - %\catcode`\_=\active - %\catcode`\|=\active - %\catcode`\<=\active - %\catcode`\>=\active - % % - \def\indexbackslash{\rawbackslashxx} - \indexfonts\rm \tolerance=9500 \advance\baselineskip -1pt - \begindoublecolumns - % - % See if the index file exists and is nonempty. - \openin 1 \jobname.#1s - \ifeof 1 - % \enddoublecolumns gets confused if there is no text in the index, - % and it loses the chapter title and the aux file entries for the - % index. The easiest way to prevent this problem is to make sure - % there is some text. - (Index is nonexistent) - \else - % - % If the index file exists but is empty, then \openin leaves \ifeof - % false. We have to make TeX try to read something from the file, so - % it can discover if there is anything in it. - \read 1 to \temp - \ifeof 1 - (Index is empty) - \else - \input \jobname.#1s - \fi - \fi - \closein 1 - \enddoublecolumns - \Etex -} - -% These macros are used by the sorted index file itself. -% Change them to control the appearance of the index. - -% Same as \bigskipamount except no shrink. -% \balancecolumns gets confused if there is any shrink. -\newskip\initialskipamount \initialskipamount 12pt plus4pt - -\def\initial #1{% -{\let\tentt=\sectt \let\tt=\sectt \let\sf=\sectt -\ifdim\lastskip<\initialskipamount -\removelastskip \penalty-200 \vskip \initialskipamount\fi -\line{\secbf#1\hfill}\kern 2pt\penalty10000}} - -% This typesets a paragraph consisting of #1, dot leaders, and then #2 -% flush to the right margin. It is used for index and table of contents -% entries. The paragraph is indented by \leftskip. -% -\def\entry #1#2{\begingroup - % - % Start a new paragraph if necessary, so our assignments below can't - % affect previous text. - \par - % - % Do not fill out the last line with white space. - \parfillskip = 0in - % - % No extra space above this paragraph. - \parskip = 0in - % - % Do not prefer a separate line ending with a hyphen to fewer lines. - \finalhyphendemerits = 0 - % - % \hangindent is only relevant when the entry text and page number - % don't both fit on one line. In that case, bob suggests starting the - % dots pretty far over on the line. Unfortunately, a large - % indentation looks wrong when the entry text itself is broken across - % lines. So we use a small indentation and put up with long leaders. - % - % \hangafter is reset to 1 (which is the value we want) at the start - % of each paragraph, so we need not do anything with that. - \hangindent=2em - % - % When the entry text needs to be broken, just fill out the first line - % with blank space. - \rightskip = 0pt plus1fil - % - % Start a ``paragraph'' for the index entry so the line breaking - % parameters we've set above will have an effect. - \noindent - % - % Insert the text of the index entry. TeX will do line-breaking on it. - #1% - % - % If we must, put the page number on a line of its own, and fill out - % this line with blank space. (The \hfil is overwhelmed with the - % fill leaders glue in \indexdotfill if the page number does fit.) - \hfil\penalty50 - \null\nobreak\indexdotfill % Have leaders before the page number. - % - % The `\ ' here is removed by the implicit \unskip that TeX does as - % part of (the primitive) \par. Without it, a spurious underfull - % \hbox ensues. - \ #2% The page number ends the paragraph. - \par -\endgroup} - -% Like \dotfill except takes at least 1 em. -\def\indexdotfill{\cleaders - \hbox{$\mathsurround=0pt \mkern1.5mu . \mkern1.5mu$}\hskip 1em plus 1fill} - -\def\primary #1{\line{#1\hfil}} - -\newskip\secondaryindent \secondaryindent=0.5cm - -\def\secondary #1#2{ -{\parfillskip=0in \parskip=0in -\hangindent =1in \hangafter=1 -\noindent\hskip\secondaryindent\hbox{#1}\indexdotfill #2\par -}} - -%% Define two-column mode, which is used in indexes. -%% Adapted from the TeXbook, page 416. -\catcode `\@=11 - -\newbox\partialpage - -\newdimen\doublecolumnhsize - -\def\begindoublecolumns{\begingroup - % Grab any single-column material above us. - \output = {\global\setbox\partialpage - =\vbox{\unvbox255\kern -\topskip \kern \baselineskip}}% - \eject - % - % Now switch to the double-column output routine. - \output={\doublecolumnout}% - % - % Change the page size parameters. We could do this once outside this - % routine, in each of @smallbook, @afourpaper, and the default 8.5x11 - % format, but then we repeat the same computation. Repeating a couple - % of assignments once per index is clearly meaningless for the - % execution time, so we may as well do it once. - % - % First we halve the line length, less a little for the gutter between - % the columns. We compute the gutter based on the line length, so it - % changes automatically with the paper format. The magic constant - % below is chosen so that the gutter has the same value (well, +- < - % 1pt) as it did when we hard-coded it. - % - % We put the result in a separate register, \doublecolumhsize, so we - % can restore it in \pagesofar, after \hsize itself has (potentially) - % been clobbered. - % - \doublecolumnhsize = \hsize - \advance\doublecolumnhsize by -.04154\hsize - \divide\doublecolumnhsize by 2 - \hsize = \doublecolumnhsize - % - % Double the \vsize as well. (We don't need a separate register here, - % since nobody clobbers \vsize.) - \vsize = 2\vsize - \doublecolumnpagegoal -} - -\def\enddoublecolumns{\eject \endgroup \pagegoal=\vsize \unvbox\partialpage} - -\def\doublecolumnsplit{\splittopskip=\topskip \splitmaxdepth=\maxdepth - \global\dimen@=\pageheight \global\advance\dimen@ by-\ht\partialpage - \global\setbox1=\vsplit255 to\dimen@ \global\setbox0=\vbox{\unvbox1} - \global\setbox3=\vsplit255 to\dimen@ \global\setbox2=\vbox{\unvbox3} - \ifdim\ht0>\dimen@ \setbox255=\vbox{\unvbox0\unvbox2} \global\setbox255=\copy5 \fi - \ifdim\ht2>\dimen@ \setbox255=\vbox{\unvbox0\unvbox2} \global\setbox255=\copy5 \fi -} -\def\doublecolumnpagegoal{% - \dimen@=\vsize \advance\dimen@ by-2\ht\partialpage \global\pagegoal=\dimen@ -} -\def\pagesofar{\unvbox\partialpage % - \hsize=\doublecolumnhsize % have to restore this since output routine - \wd0=\hsize \wd2=\hsize \hbox to\pagewidth{\box0\hfil\box2}} -\def\doublecolumnout{% - \setbox5=\copy255 - {\vbadness=10000 \doublecolumnsplit} - \ifvbox255 - \setbox0=\vtop to\dimen@{\unvbox0} - \setbox2=\vtop to\dimen@{\unvbox2} - \onepageout\pagesofar \unvbox255 \penalty\outputpenalty - \else - \setbox0=\vbox{\unvbox5} - \ifvbox0 - \dimen@=\ht0 \advance\dimen@ by\topskip \advance\dimen@ by-\baselineskip - \divide\dimen@ by2 \splittopskip=\topskip \splitmaxdepth=\maxdepth - {\vbadness=10000 - \loop \global\setbox5=\copy0 - \setbox1=\vsplit5 to\dimen@ - \setbox3=\vsplit5 to\dimen@ - \ifvbox5 \global\advance\dimen@ by1pt \repeat - \setbox0=\vbox to\dimen@{\unvbox1} - \setbox2=\vbox to\dimen@{\unvbox3} - \global\setbox\partialpage=\vbox{\pagesofar} - \doublecolumnpagegoal - } - \fi - \fi -} - -\catcode `\@=\other -\message{sectioning,} -% Define chapters, sections, etc. - -\newcount \chapno -\newcount \secno \secno=0 -\newcount \subsecno \subsecno=0 -\newcount \subsubsecno \subsubsecno=0 - -% This counter is funny since it counts through charcodes of letters A, B, ... -\newcount \appendixno \appendixno = `\@ -\def\appendixletter{\char\the\appendixno} - -\newwrite \contentsfile -% This is called from \setfilename. -\def\opencontents{\openout \contentsfile = \jobname.toc} - -% Each @chapter defines this as the name of the chapter. -% page headings and footings can use it. @section does likewise - -\def\thischapter{} \def\thissection{} -\def\seccheck#1{\if \pageno<0 % -\errmessage{@#1 not allowed after generating table of contents}\fi -% -} - -\def\chapternofonts{% -\let\rawbackslash=\relax% -\let\frenchspacing=\relax% -\def\result{\realbackslash result} -\def\equiv{\realbackslash equiv} -\def\expansion{\realbackslash expansion} -\def\print{\realbackslash print} -\def\TeX{\realbackslash TeX} -\def\dots{\realbackslash dots} -\def\copyright{\realbackslash copyright} -\def\tt{\realbackslash tt} -\def\bf{\realbackslash bf } -\def\w{\realbackslash w} -\def\less{\realbackslash less} -\def\gtr{\realbackslash gtr} -\def\hat{\realbackslash hat} -\def\char{\realbackslash char} -\def\tclose##1{\realbackslash tclose {##1}} -\def\code##1{\realbackslash code {##1}} -\def\samp##1{\realbackslash samp {##1}} -\def\r##1{\realbackslash r {##1}} -\def\b##1{\realbackslash b {##1}} -\def\key##1{\realbackslash key {##1}} -\def\file##1{\realbackslash file {##1}} -\def\kbd##1{\realbackslash kbd {##1}} -% These are redefined because @smartitalic wouldn't work inside xdef. -\def\i##1{\realbackslash i {##1}} -\def\cite##1{\realbackslash cite {##1}} -\def\var##1{\realbackslash var {##1}} -\def\emph##1{\realbackslash emph {##1}} -\def\dfn##1{\realbackslash dfn {##1}} -} - -\newcount\absseclevel % used to calculate proper heading level -\newcount\secbase\secbase=0 % @raise/lowersections modify this count - -% @raisesections: treat @section as chapter, @subsection as section, etc. -\def\raisesections{\global\advance\secbase by -1} -\let\up=\raisesections % original BFox name - -% @lowersections: treat @chapter as section, @section as subsection, etc. -\def\lowersections{\global\advance\secbase by 1} -\let\down=\lowersections % original BFox name - -% Choose a numbered-heading macro -% #1 is heading level if unmodified by @raisesections or @lowersections -% #2 is text for heading -\def\numhead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \chapterzzz{#2} -\or - \seczzz{#2} -\or - \numberedsubseczzz{#2} -\or - \numberedsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \chapterzzz{#2} - \else - \numberedsubsubseczzz{#2} - \fi -\fi -} - -% like \numhead, but chooses appendix heading levels -\def\apphead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \appendixzzz{#2} -\or - \appendixsectionzzz{#2} -\or - \appendixsubseczzz{#2} -\or - \appendixsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \appendixzzz{#2} - \else - \appendixsubsubseczzz{#2} - \fi -\fi -} - -% like \numhead, but chooses numberless heading levels -\def\unnmhead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \unnumberedzzz{#2} -\or - \unnumberedseczzz{#2} -\or - \unnumberedsubseczzz{#2} -\or - \unnumberedsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \unnumberedzzz{#2} - \else - \unnumberedsubsubseczzz{#2} - \fi -\fi -} - - -\def\thischaptername{No Chapter Title} -\outer\def\chapter{\parsearg\chapteryyy} -\def\chapteryyy #1{\numhead0{#1}} % normally numhead0 calls chapterzzz -\def\chapterzzz #1{\seccheck{chapter}% -\secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \chapno by 1 \message{Chapter \the\chapno}% -\chapmacro {#1}{\the\chapno}% -\gdef\thissection{#1}% -\gdef\thischaptername{#1}% -% We don't substitute the actual chapter name into \thischapter -% because we don't want its macros evaluated now. -\xdef\thischapter{Chapter \the\chapno: \noexpand\thischaptername}% -{\chapternofonts% -\edef\temp{{\realbackslash chapentry {#1}{\the\chapno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\global\let\section = \numberedsec -\global\let\subsection = \numberedsubsec -\global\let\subsubsection = \numberedsubsubsec -}} - -\outer\def\appendix{\parsearg\appendixyyy} -\def\appendixyyy #1{\apphead0{#1}} % normally apphead0 calls appendixzzz -\def\appendixzzz #1{\seccheck{appendix}% -\secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \appendixno by 1 \message{Appendix \appendixletter}% -\chapmacro {#1}{Appendix \appendixletter}% -\gdef\thissection{#1}% -\gdef\thischaptername{#1}% -\xdef\thischapter{Appendix \appendixletter: \noexpand\thischaptername}% -{\chapternofonts% -\edef\temp{{\realbackslash chapentry - {#1}{Appendix \appendixletter}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\global\let\section = \appendixsec -\global\let\subsection = \appendixsubsec -\global\let\subsubsection = \appendixsubsubsec -}} - -\outer\def\top{\parsearg\unnumberedyyy} -\outer\def\unnumbered{\parsearg\unnumberedyyy} -\def\unnumberedyyy #1{\unnmhead0{#1}} % normally unnmhead0 calls unnumberedzzz -\def\unnumberedzzz #1{\seccheck{unnumbered}% -\secno=0 \subsecno=0 \subsubsecno=0 -% -% This used to be simply \message{#1}, but TeX fully expands the -% argument to \message. Therefore, if #1 contained @-commands, TeX -% expanded them. For example, in `@unnumbered The @cite{Book}', TeX -% expanded @cite (which turns out to cause errors because \cite is meant -% to be executed, not expanded). -% -% Anyway, we don't want the fully-expanded definition of @cite to appear -% as a result of the \message, we just want `@cite' itself. We use -% \the<toks register> to achieve this: TeX expands \the<toks> only once, -% simply yielding the contents of the <toks register>. -\toks0 = {#1}\message{(\the\toks0)}% -% -\unnumbchapmacro {#1}% -\gdef\thischapter{#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbchapentry {#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\global\let\section = \unnumberedsec -\global\let\subsection = \unnumberedsubsec -\global\let\subsubsection = \unnumberedsubsubsec -}} - -\outer\def\numberedsec{\parsearg\secyyy} -\def\secyyy #1{\numhead1{#1}} % normally calls seczzz -\def\seczzz #1{\seccheck{section}% -\subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % -\gdef\thissection{#1}\secheading {#1}{\the\chapno}{\the\secno}% -{\chapternofonts% -\edef\temp{{\realbackslash secentry % -{#1}{\the\chapno}{\the\secno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} - -\outer\def\appenixsection{\parsearg\appendixsecyyy} -\outer\def\appendixsec{\parsearg\appendixsecyyy} -\def\appendixsecyyy #1{\apphead1{#1}} % normally calls appendixsectionzzz -\def\appendixsectionzzz #1{\seccheck{appendixsection}% -\subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % -\gdef\thissection{#1}\secheading {#1}{\appendixletter}{\the\secno}% -{\chapternofonts% -\edef\temp{{\realbackslash secentry % -{#1}{\appendixletter}{\the\secno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} - -\outer\def\unnumberedsec{\parsearg\unnumberedsecyyy} -\def\unnumberedsecyyy #1{\unnmhead1{#1}} % normally calls unnumberedseczzz -\def\unnumberedseczzz #1{\seccheck{unnumberedsec}% -\plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbsecentry{#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} - -\outer\def\numberedsubsec{\parsearg\numberedsubsecyyy} -\def\numberedsubsecyyy #1{\numhead2{#1}} % normally calls numberedsubseczzz -\def\numberedsubseczzz #1{\seccheck{subsection}% -\gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % -\subsecheading {#1}{\the\chapno}{\the\secno}{\the\subsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsecentry % -{#1}{\the\chapno}{\the\secno}{\the\subsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} - -\outer\def\appendixsubsec{\parsearg\appendixsubsecyyy} -\def\appendixsubsecyyy #1{\apphead2{#1}} % normally calls appendixsubseczzz -\def\appendixsubseczzz #1{\seccheck{appendixsubsec}% -\gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % -\subsecheading {#1}{\appendixletter}{\the\secno}{\the\subsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsecentry % -{#1}{\appendixletter}{\the\secno}{\the\subsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} - -\outer\def\unnumberedsubsec{\parsearg\unnumberedsubsecyyy} -\def\unnumberedsubsecyyy #1{\unnmhead2{#1}} %normally calls unnumberedsubseczzz -\def\unnumberedsubseczzz #1{\seccheck{unnumberedsubsec}% -\plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbsubsecentry{#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} - -\outer\def\numberedsubsubsec{\parsearg\numberedsubsubsecyyy} -\def\numberedsubsubsecyyy #1{\numhead3{#1}} % normally numberedsubsubseczzz -\def\numberedsubsubseczzz #1{\seccheck{subsubsection}% -\gdef\thissection{#1}\global\advance \subsubsecno by 1 % -\subsubsecheading {#1} - {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsubsecentry % - {#1} - {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno} - {\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} - -\outer\def\appendixsubsubsec{\parsearg\appendixsubsubsecyyy} -\def\appendixsubsubsecyyy #1{\apphead3{#1}} % normally appendixsubsubseczzz -\def\appendixsubsubseczzz #1{\seccheck{appendixsubsubsec}% -\gdef\thissection{#1}\global\advance \subsubsecno by 1 % -\subsubsecheading {#1} - {\appendixletter}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -{\chapternofonts% -\edef\temp{{\realbackslash subsubsecentry{#1}% - {\appendixletter} - {\the\secno}{\the\subsecno}{\the\subsubsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} - -\outer\def\unnumberedsubsubsec{\parsearg\unnumberedsubsubsecyyy} -\def\unnumberedsubsubsecyyy #1{\unnmhead3{#1}} %normally unnumberedsubsubseczzz -\def\unnumberedsubsubseczzz #1{\seccheck{unnumberedsubsubsec}% -\plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% -\edef\temp{{\realbackslash unnumbsubsubsecentry{#1}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} - -% These are variants which are not "outer", so they can appear in @ifinfo. -% Actually, they should now be obsolete; ordinary section commands should work. -\def\infotop{\parsearg\unnumberedzzz} -\def\infounnumbered{\parsearg\unnumberedzzz} -\def\infounnumberedsec{\parsearg\unnumberedseczzz} -\def\infounnumberedsubsec{\parsearg\unnumberedsubseczzz} -\def\infounnumberedsubsubsec{\parsearg\unnumberedsubsubseczzz} - -\def\infoappendix{\parsearg\appendixzzz} -\def\infoappendixsec{\parsearg\appendixseczzz} -\def\infoappendixsubsec{\parsearg\appendixsubseczzz} -\def\infoappendixsubsubsec{\parsearg\appendixsubsubseczzz} - -\def\infochapter{\parsearg\chapterzzz} -\def\infosection{\parsearg\sectionzzz} -\def\infosubsection{\parsearg\subsectionzzz} -\def\infosubsubsection{\parsearg\subsubsectionzzz} - -% These macros control what the section commands do, according -% to what kind of chapter we are in (ordinary, appendix, or unnumbered). -% Define them by default for a numbered chapter. -\global\let\section = \numberedsec -\global\let\subsection = \numberedsubsec -\global\let\subsubsection = \numberedsubsubsec - -% Define @majorheading, @heading and @subheading - -% NOTE on use of \vbox for chapter headings, section headings, and -% such: -% 1) We use \vbox rather than the earlier \line to permit -% overlong headings to fold. -% 2) \hyphenpenalty is set to 10000 because hyphenation in a -% heading is obnoxious; this forbids it. -% 3) Likewise, headings look best if no \parindent is used, and -% if justification is not attempted. Hence \raggedright. - - -\def\majorheading{\parsearg\majorheadingzzz} -\def\majorheadingzzz #1{% -{\advance\chapheadingskip by 10pt \chapbreak }% -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 200} - -\def\chapheading{\parsearg\chapheadingzzz} -\def\chapheadingzzz #1{\chapbreak % -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 200} - -\def\heading{\parsearg\secheadingi} - -\def\subheading{\parsearg\subsecheadingi} - -\def\subsubheading{\parsearg\subsubsecheadingi} - -% These macros generate a chapter, section, etc. heading only -% (including whitespace, linebreaking, etc. around it), -% given all the information in convenient, parsed form. - -%%% Args are the skip and penalty (usually negative) -\def\dobreak#1#2{\par\ifdim\lastskip<#1\removelastskip\penalty#2\vskip#1\fi} - -\def\setchapterstyle #1 {\csname CHAPF#1\endcsname} - -%%% Define plain chapter starts, and page on/off switching for it -% Parameter controlling skip before chapter headings (if needed) - -\newskip \chapheadingskip \chapheadingskip = 30pt plus 8pt minus 4pt - -\def\chapbreak{\dobreak \chapheadingskip {-4000}} -\def\chappager{\par\vfill\supereject} -\def\chapoddpage{\chappager \ifodd\pageno \else \hbox to 0pt{} \chappager\fi} - -\def\setchapternewpage #1 {\csname CHAPPAG#1\endcsname} - -\def\CHAPPAGoff{ -\global\let\pchapsepmacro=\chapbreak -\global\let\pagealignmacro=\chappager} - -\def\CHAPPAGon{ -\global\let\pchapsepmacro=\chappager -\global\let\pagealignmacro=\chappager -\global\def\HEADINGSon{\HEADINGSsingle}} - -\def\CHAPPAGodd{ -\global\let\pchapsepmacro=\chapoddpage -\global\let\pagealignmacro=\chapoddpage -\global\def\HEADINGSon{\HEADINGSdouble}} - -\CHAPPAGon - -\def\CHAPFplain{ -\global\let\chapmacro=\chfplain -\global\let\unnumbchapmacro=\unnchfplain} - -\def\chfplain #1#2{% - \pchapsepmacro - {% - \chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #2\enspace #1}% - }% - \bigskip - \penalty5000 -} - -\def\unnchfplain #1{% -\pchapsepmacro % -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 10000 % -} -\CHAPFplain % The default - -\def\unnchfopen #1{% -\chapoddpage {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 10000 % -} - -\def\chfopen #1#2{\chapoddpage {\chapfonts -\vbox to 3in{\vfil \hbox to\hsize{\hfil #2} \hbox to\hsize{\hfil #1} \vfil}}% -\par\penalty 5000 % -} - -\def\CHAPFopen{ -\global\let\chapmacro=\chfopen -\global\let\unnumbchapmacro=\unnchfopen} - -% Parameter controlling skip before section headings. - -\newskip \subsecheadingskip \subsecheadingskip = 17pt plus 8pt minus 4pt -\def\subsecheadingbreak{\dobreak \subsecheadingskip {-500}} - -\newskip \secheadingskip \secheadingskip = 21pt plus 8pt minus 4pt -\def\secheadingbreak{\dobreak \secheadingskip {-1000}} - -% @paragraphindent is defined for the Info formatting commands only. -\let\paragraphindent=\comment - -% Section fonts are the base font at magstep2, which produces -% a size a bit more than 14 points in the default situation. - -\def\secheading #1#2#3{\secheadingi {#2.#3\enspace #1}} -\def\plainsecheading #1{\secheadingi {#1}} -\def\secheadingi #1{{\advance \secheadingskip by \parskip % -\secheadingbreak}% -{\secfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}% -\ifdim \parskip<10pt \kern 10pt\kern -\parskip\fi \penalty 10000 } - - -% Subsection fonts are the base font at magstep1, -% which produces a size of 12 points. - -\def\subsecheading #1#2#3#4{\subsecheadingi {#2.#3.#4\enspace #1}} -\def\subsecheadingi #1{{\advance \subsecheadingskip by \parskip % -\subsecheadingbreak}% -{\subsecfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}% -\ifdim \parskip<10pt \kern 10pt\kern -\parskip\fi \penalty 10000 } - -\def\subsubsecfonts{\subsecfonts} % Maybe this should change: - % Perhaps make sssec fonts scaled - % magstep half -\def\subsubsecheading #1#2#3#4#5{\subsubsecheadingi {#2.#3.#4.#5\enspace #1}} -\def\subsubsecheadingi #1{{\advance \subsecheadingskip by \parskip % -\subsecheadingbreak}% -{\subsubsecfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}% -\ifdim \parskip<10pt \kern 10pt\kern -\parskip\fi \penalty 10000} - - -\message{toc printing,} - -% Finish up the main text and prepare to read what we've written -% to \contentsfile. - -\newskip\contentsrightmargin \contentsrightmargin=1in -\def\startcontents#1{% - \pagealignmacro - \immediate\closeout \contentsfile - \ifnum \pageno>0 - \pageno = -1 % Request roman numbered pages. - \fi - % Don't need to put `Contents' or `Short Contents' in the headline. - % It is abundantly clear what they are. - \unnumbchapmacro{#1}\def\thischapter{}% - \begingroup % Set up to handle contents files properly. - \catcode`\\=0 \catcode`\{=1 \catcode`\}=2 \catcode`\@=11 - \raggedbottom % Worry more about breakpoints than the bottom. - \advance\hsize by -\contentsrightmargin % Don't use the full line length. -} - - -% Normal (long) toc. -\outer\def\contents{% - \startcontents{Table of Contents}% - \input \jobname.toc - \endgroup - \vfill \eject -} - -% And just the chapters. -\outer\def\summarycontents{% - \startcontents{Short Contents}% - % - \let\chapentry = \shortchapentry - \let\unnumbchapentry = \shortunnumberedentry - % We want a true roman here for the page numbers. - \secfonts - \let\rm=\shortcontrm \let\bf=\shortcontbf \let\sl=\shortcontsl - \rm - \advance\baselineskip by 1pt % Open it up a little. - \def\secentry ##1##2##3##4{} - \def\unnumbsecentry ##1##2{} - \def\subsecentry ##1##2##3##4##5{} - \def\unnumbsubsecentry ##1##2{} - \def\subsubsecentry ##1##2##3##4##5##6{} - \def\unnumbsubsubsecentry ##1##2{} - \input \jobname.toc - \endgroup - \vfill \eject -} -\let\shortcontents = \summarycontents - -% These macros generate individual entries in the table of contents. -% The first argument is the chapter or section name. -% The last argument is the page number. -% The arguments in between are the chapter number, section number, ... - -% Chapter-level things, for both the long and short contents. -\def\chapentry#1#2#3{\dochapentry{#2\labelspace#1}{#3}} - -% See comments in \dochapentry re vbox and related settings -\def\shortchapentry#1#2#3{% - \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno{#3}}% -} - -% Typeset the label for a chapter or appendix for the short contents. -% The arg is, e.g. `Appendix A' for an appendix, or `3' for a chapter. -% We could simplify the code here by writing out an \appendixentry -% command in the toc file for appendices, instead of using \chapentry -% for both, but it doesn't seem worth it. -\setbox0 = \hbox{\shortcontrm Appendix } -\newdimen\shortappendixwidth \shortappendixwidth = \wd0 - -\def\shortchaplabel#1{% - % We typeset #1 in a box of constant width, regardless of the text of - % #1, so the chapter titles will come out aligned. - \setbox0 = \hbox{#1}% - \dimen0 = \ifdim\wd0 > \shortappendixwidth \shortappendixwidth \else 0pt \fi - % - % This space should be plenty, since a single number is .5em, and the - % widest letter (M) is 1em, at least in the Computer Modern fonts. - % (This space doesn't include the extra space that gets added after - % the label; that gets put in in \shortchapentry above.) - \advance\dimen0 by 1.1em - \hbox to \dimen0{#1\hfil}% -} - -\def\unnumbchapentry#1#2{\dochapentry{#1}{#2}} -\def\shortunnumberedentry#1#2{\tocentry{#1}{\doshortpageno{#2}}} - -% Sections. -\def\secentry#1#2#3#4{\dosecentry{#2.#3\labelspace#1}{#4}} -\def\unnumbsecentry#1#2{\dosecentry{#1}{#2}} - -% Subsections. -\def\subsecentry#1#2#3#4#5{\dosubsecentry{#2.#3.#4\labelspace#1}{#5}} -\def\unnumbsubsecentry#1#2{\dosubsecentry{#1}{#2}} - -% And subsubsections. -\def\subsubsecentry#1#2#3#4#5#6{% - \dosubsubsecentry{#2.#3.#4.#5\labelspace#1}{#6}} -\def\unnumbsubsubsecentry#1#2{\dosubsubsecentry{#1}{#2}} - - -% This parameter controls the indentation of the various levels. -\newdimen\tocindent \tocindent = 3pc - -% Now for the actual typesetting. In all these, #1 is the text and #2 is the -% page number. -% -% If the toc has to be broken over pages, we would want to be at chapters -% if at all possible; hence the \penalty. -\def\dochapentry#1#2{% - \penalty-300 \vskip\baselineskip - \begingroup - \chapentryfonts - \tocentry{#1}{\dopageno{#2}}% - \endgroup - \nobreak\vskip .25\baselineskip -} - -\def\dosecentry#1#2{\begingroup - \secentryfonts \leftskip=\tocindent - \tocentry{#1}{\dopageno{#2}}% -\endgroup} - -\def\dosubsecentry#1#2{\begingroup - \subsecentryfonts \leftskip=2\tocindent - \tocentry{#1}{\dopageno{#2}}% -\endgroup} - -\def\dosubsubsecentry#1#2{\begingroup - \subsubsecentryfonts \leftskip=3\tocindent - \tocentry{#1}{\dopageno{#2}}% -\endgroup} - -% Final typesetting of a toc entry; we use the same \entry macro as for -% the index entries, but we want to suppress hyphenation here. (We -% can't do that in the \entry macro, since index entries might consist -% of hyphenated-identifiers-that-do-not-fit-on-a-line-and-nothing-else.) -% -\def\tocentry#1#2{\begingroup - \hyphenpenalty = 10000 - \entry{#1}{#2}% -\endgroup} - -% Space between chapter (or whatever) number and the title. -\def\labelspace{\hskip1em \relax} - -\def\dopageno#1{{\rm #1}} -\def\doshortpageno#1{{\rm #1}} - -\def\chapentryfonts{\secfonts \rm} -\def\secentryfonts{\textfonts} -\let\subsecentryfonts = \textfonts -\let\subsubsecentryfonts = \textfonts - - -\message{environments,} - -% Since these characters are used in examples, it should be an even number of -% \tt widths. Each \tt character is 1en, so two makes it 1em. -% Furthermore, these definitions must come after we define our fonts. -\newbox\dblarrowbox \newbox\longdblarrowbox -\newbox\pushcharbox \newbox\bullbox -\newbox\equivbox \newbox\errorbox - -\let\ptexequiv = \equiv - -%{\tentt -%\global\setbox\dblarrowbox = \hbox to 1em{\hfil$\Rightarrow$\hfil} -%\global\setbox\longdblarrowbox = \hbox to 1em{\hfil$\mapsto$\hfil} -%\global\setbox\pushcharbox = \hbox to 1em{\hfil$\dashv$\hfil} -%\global\setbox\equivbox = \hbox to 1em{\hfil$\ptexequiv$\hfil} -% Adapted from the manmac format (p.420 of TeXbook) -%\global\setbox\bullbox = \hbox to 1em{\kern.15em\vrule height .75ex width .85ex -% depth .1ex\hfil} -%} - -\def\point{$\star$} - -\def\result{\leavevmode\raise.15ex\hbox to 1em{\hfil$\Rightarrow$\hfil}} -\def\expansion{\leavevmode\raise.1ex\hbox to 1em{\hfil$\mapsto$\hfil}} -\def\print{\leavevmode\lower.1ex\hbox to 1em{\hfil$\dashv$\hfil}} - -\def\equiv{\leavevmode\lower.1ex\hbox to 1em{\hfil$\ptexequiv$\hfil}} - -% Adapted from the TeXbook's \boxit. -{\tentt \global\dimen0 = 3em}% Width of the box. -\dimen2 = .55pt % Thickness of rules -% The text. (`r' is open on the right, `e' somewhat less so on the left.) -\setbox0 = \hbox{\kern-.75pt \tensf error\kern-1.5pt} - -\global\setbox\errorbox=\hbox to \dimen0{\hfil - \hsize = \dimen0 \advance\hsize by -5.8pt % Space to left+right. - \advance\hsize by -2\dimen2 % Rules. - \vbox{ - \hrule height\dimen2 - \hbox{\vrule width\dimen2 \kern3pt % Space to left of text. - \vtop{\kern2.4pt \box0 \kern2.4pt}% Space above/below. - \kern3pt\vrule width\dimen2}% Space to right. - \hrule height\dimen2} - \hfil} - -% The @error{} command. -\def\error{\leavevmode\lower.7ex\copy\errorbox} - -% @tex ... @end tex escapes into raw Tex temporarily. -% One exception: @ is still an escape character, so that @end tex works. -% But \@ or @@ will get a plain tex @ character. - -\def\tex{\begingroup -\catcode `\\=0 \catcode `\{=1 \catcode `\}=2 -\catcode `\$=3 \catcode `\&=4 \catcode `\#=6 -\catcode `\^=7 \catcode `\_=8 \catcode `\~=13 \let~=\tie -\catcode `\%=14 -\catcode 43=12 -\catcode`\"=12 -\catcode`\==12 -\catcode`\|=12 -\catcode`\<=12 -\catcode`\>=12 -\escapechar=`\\ -% -\let\{=\ptexlbrace -\let\}=\ptexrbrace -\let\.=\ptexdot -\let\*=\ptexstar -\let\dots=\ptexdots -\def\@{@}% -\let\bullet=\ptexbullet -\let\b=\ptexb \let\c=\ptexc \let\i=\ptexi \let\t=\ptext \let\l=\ptexl -\let\L=\ptexL -% -\let\Etex=\endgroup} - -% Define @lisp ... @endlisp. -% @lisp does a \begingroup so it can rebind things, -% including the definition of @endlisp (which normally is erroneous). - -% Amount to narrow the margins by for @lisp. -\newskip\lispnarrowing \lispnarrowing=0.4in - -% This is the definition that ^M gets inside @lisp -% phr: changed space to \null, to avoid overfull hbox problems. -{\obeyspaces% -\gdef\lisppar{\null\endgraf}} - -% Make each space character in the input produce a normal interword -% space in the output. Don't allow a line break at this space, as this -% is used only in environments like @example, where each line of input -% should produce a line of output anyway. -% -{\obeyspaces % -\gdef\sepspaces{\obeyspaces\let =\tie}} - -% Define \obeyedspace to be our active space, whatever it is. This is -% for use in \parsearg. -{\sepspaces % -\global\let\obeyedspace= } - -% This space is always present above and below environments. -\newskip\envskipamount \envskipamount = 0pt - -% Make spacing and below environment symmetrical. -\def\aboveenvbreak{{\advance\envskipamount by \parskip -\endgraf \ifdim\lastskip<\envskipamount -\removelastskip \penalty-50 \vskip\envskipamount \fi}} - -\let\afterenvbreak = \aboveenvbreak - -% \nonarrowing is a flag. If "set", @lisp etc don't narrow margins. -\let\nonarrowing=\relax - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \cartouche: draw rectangle w/rounded corners around argument -\font\circle=lcircle10 -\newdimen\circthick -\newdimen\cartouter\newdimen\cartinner -\newskip\normbskip\newskip\normpskip\newskip\normlskip -\circthick=\fontdimen8\circle -% -\def\ctl{{\circle\char'013\hskip -6pt}}% 6pt from pl file: 1/2charwidth -\def\ctr{{\hskip 6pt\circle\char'010}} -\def\cbl{{\circle\char'012\hskip -6pt}} -\def\cbr{{\hskip 6pt\circle\char'011}} -\def\carttop{\hbox to \cartouter{\hskip\lskip - \ctl\leaders\hrule height\circthick\hfil\ctr - \hskip\rskip}} -\def\cartbot{\hbox to \cartouter{\hskip\lskip - \cbl\leaders\hrule height\circthick\hfil\cbr - \hskip\rskip}} -% -\newskip\lskip\newskip\rskip - -\long\def\cartouche{% -\begingroup - \lskip=\leftskip \rskip=\rightskip - \leftskip=0pt\rightskip=0pt %we want these *outside*. - \cartinner=\hsize \advance\cartinner by-\lskip - \advance\cartinner by-\rskip - \cartouter=\hsize - \advance\cartouter by 18pt % allow for 3pt kerns on either -% side, and for 6pt waste from -% each corner char - \normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip - % Flag to tell @lisp, etc., not to narrow margin. - \let\nonarrowing=\comment - \vbox\bgroup - \baselineskip=0pt\parskip=0pt\lineskip=0pt - \carttop - \hbox\bgroup - \hskip\lskip - \vrule\kern3pt - \vbox\bgroup - \hsize=\cartinner - \kern3pt - \begingroup - \baselineskip=\normbskip - \lineskip=\normlskip - \parskip=\normpskip - \vskip -\parskip -\def\Ecartouche{% - \endgroup - \kern3pt - \egroup - \kern3pt\vrule - \hskip\rskip - \egroup - \cartbot - \egroup -\endgroup -}} - - -% This macro is called at the beginning of all the @example variants, -% inside a group. -\def\nonfillstart{% - \aboveenvbreak - \inENV % This group ends at the end of the body - \hfuzz = 12pt % Don't be fussy - \sepspaces % Make spaces be word-separators rather than space tokens. - \singlespace % single space lines - \let\par = \lisppar % don't ignore blank lines - \obeylines % each line of input is a line of output - \parskip = 0pt - \parindent = 0pt - \emergencystretch = 0pt % don't try to avoid overfull boxes - % @cartouche defines \nonarrowing to inhibit narrowing - % at next level down. - \ifx\nonarrowing\relax - \advance \leftskip by \lispnarrowing - \exdentamount=\lispnarrowing - \let\exdent=\nofillexdent - \let\nonarrowing=\relax - \fi -} - -\def\Elisp{\endgroup\afterenvbreak}% - -\def\lisp{\begingroup - \nonfillstart - \def\Elisp{\endgroup\afterenvbreak}% - \tt - \rawbackslash % output the \ character from the current font - \gobble -} - -% Define the \E... control sequence only if we are inside the -% environment, so the error checking in \end will work. -% -% We must call \lisp last in the definition, since it reads the -% return following the @example (or whatever) command. -% -\def\example{\begingroup \def\Eexample{\Elisp\endgroup}\lisp} -\def\smallexample{\begingroup \def\Esmallexample{\Elisp\endgroup}\lisp} - -% Macro for 9 pt. examples, necessary to print with 5" lines. From -% Pavel@xerox. This is not used for @smallexamples unless the -% @smallbook command is given. -% -\def\smalllispx{\begingroup - \nonfillstart - \def\Esmalllisp{\endgroup\afterenvbreak}% - % - % Smaller interline space and fonts for small examples. - \baselineskip 10pt - \indexfonts \tt - \rawbackslash % output the \ character from the current font - \gobble -} - -% This is @display; same as @lisp except use roman font. -% -\def\display{\begingroup - \nonfillstart - \def\Edisplay{\endgroup\afterenvbreak}% - \gobble -} - -% This is @format; same as @display except don't narrow margins. -% -\def\format{\begingroup - \let\nonarrowing = t - \nonfillstart - \def\Eformat{\endgroup\afterenvbreak} - \gobble -} - -% @flushleft (same as @format) and @flushright. -% -\def\flushleft{\begingroup - \let\nonarrowing = t - \nonfillstart - \def\Eflushleft{\endgroup\afterenvbreak}% - \gobble -} -\def\flushright{\begingroup - \let\nonarrowing = t - \nonfillstart - \def\Eflushright{\endgroup\afterenvbreak}% - \advance\leftskip by 0pt plus 1fill - \gobble} - -% @quotation does normal linebreaking and narrows the margins. -% -\def\quotation{% -\begingroup\inENV %This group ends at the end of the @quotation body -{\parskip=0pt % because we will skip by \parskip too, later -\aboveenvbreak}% -\singlespace -\parindent=0pt -\def\Equotation{\par\endgroup\afterenvbreak}% -% @cartouche defines \nonarrowing to inhibit narrowing -% at next level down. -\ifx\nonarrowing\relax -\advance \leftskip by \lispnarrowing -\advance \rightskip by \lispnarrowing -\exdentamount=\lispnarrowing -\let\nonarrowing=\relax -\fi} - -\message{defuns,} -% Define formatter for defuns -% First, allow user to change definition object font (\df) internally -\def\setdeffont #1 {\csname DEF#1\endcsname} - -\newskip\defbodyindent \defbodyindent=.4in -\newskip\defargsindent \defargsindent=50pt -\newskip\deftypemargin \deftypemargin=12pt -\newskip\deflastargmargin \deflastargmargin=18pt - -\newcount\parencount -% define \functionparens, which makes ( and ) and & do special things. -% \functionparens affects the group it is contained in. -\def\activeparens{% -\catcode`\(=\active \catcode`\)=\active \catcode`\&=\active -\catcode`\[=\active \catcode`\]=\active} - -% Make control sequences which act like normal parenthesis chars. -\let\lparen = ( \let\rparen = ) - -{\activeparens % Now, smart parens don't turn on until &foo (see \amprm) - -% Be sure that we always have a definition for `(', etc. For example, -% if the fn name has parens in it, \boldbrax will not be in effect yet, -% so TeX would otherwise complain about undefined control sequence. -\global\let(=\lparen \global\let)=\rparen -\global\let[=\lbrack \global\let]=\rbrack - -\gdef\functionparens{\boldbrax\let&=\amprm\parencount=0 } -\gdef\boldbrax{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb} - -% Definitions of (, ) and & used in args for functions. -% This is the definition of ( outside of all parentheses. -\gdef\oprm#1 {{\rm\char`\(}#1 \bf \let(=\opnested % -\global\advance\parencount by 1 } -% -% This is the definition of ( when already inside a level of parens. -\gdef\opnested{\char`\(\global\advance\parencount by 1 } -% -\gdef\clrm{% Print a paren in roman if it is taking us back to depth of 0. -% also in that case restore the outer-level definition of (. -\ifnum \parencount=1 {\rm \char `\)}\sl \let(=\oprm \else \char `\) \fi -\global\advance \parencount by -1 } -% If we encounter &foo, then turn on ()-hacking afterwards -\gdef\amprm#1 {{\rm\}\let(=\oprm \let)=\clrm\ } -% -\gdef\normalparens{\boldbrax\let&=\ampnr} -} % End of definition inside \activeparens -%% These parens (in \boldbrax) actually are a little bolder than the -%% contained text. This is especially needed for [ and ] -\def\opnr{{\sf\char`\(}} \def\clnr{{\sf\char`\)}} \def\ampnr{\&} -\def\lbrb{{\bf\char`\[}} \def\rbrb{{\bf\char`\]}} - -% First, defname, which formats the header line itself. -% #1 should be the function name. -% #2 should be the type of definition, such as "Function". - -\def\defname #1#2{% -% Get the values of \leftskip and \rightskip as they were -% outside the @def... -\dimen2=\leftskip -\advance\dimen2 by -\defbodyindent -\dimen3=\rightskip -\advance\dimen3 by -\defbodyindent -\noindent % -\setbox0=\hbox{\hskip \deflastargmargin{\rm #2}\hskip \deftypemargin}% -\dimen0=\hsize \advance \dimen0 by -\wd0 % compute size for first line -\dimen1=\hsize \advance \dimen1 by -\defargsindent %size for continuations -\parshape 2 0in \dimen0 \defargsindent \dimen1 % -% Now output arg 2 ("Function" or some such) -% ending at \deftypemargin from the right margin, -% but stuck inside a box of width 0 so it does not interfere with linebreaking -{% Adjust \hsize to exclude the ambient margins, -% so that \rightline will obey them. -\advance \hsize by -\dimen2 \advance \hsize by -\dimen3 -\rlap{\rightline{{\rm #2}\hskip \deftypemargin}}}% -% Make all lines underfull and no complaints: -\tolerance=10000 \hbadness=10000 -\advance\leftskip by -\defbodyindent -\exdentamount=\defbodyindent -{\df #1}\enskip % Generate function name -} - -% Actually process the body of a definition -% #1 should be the terminating control sequence, such as \Edefun. -% #2 should be the "another name" control sequence, such as \defunx. -% #3 should be the control sequence that actually processes the header, -% such as \defunheader. - -\def\defparsebody #1#2#3{\begingroup\inENV% Environment for definitionbody -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2{\begingroup\obeylines\activeparens\spacesplit#3}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup % -\catcode 61=\active % -\obeylines\activeparens\spacesplit#3} - -\def\defmethparsebody #1#2#3#4 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 {\begingroup\obeylines\activeparens\spacesplit{#3{##1}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\activeparens\spacesplit{#3{#4}}} - -\def\defopparsebody #1#2#3#4#5 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 ##2 {\def#4{##1}% -\begingroup\obeylines\activeparens\spacesplit{#3{##2}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\activeparens\spacesplit{#3{#5}}} - -% These parsing functions are similar to the preceding ones -% except that they do not make parens into active characters. -% These are used for "variables" since they have no arguments. - -\def\defvarparsebody #1#2#3{\begingroup\inENV% Environment for definitionbody -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2{\begingroup\obeylines\spacesplit#3}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup % -\catcode 61=\active % -\obeylines\spacesplit#3} - -\def\defvrparsebody #1#2#3#4 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 {\begingroup\obeylines\spacesplit{#3{##1}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\spacesplit{#3{#4}}} - -% This seems to work right in all cases. -\let\deftpparsebody=\defvrparsebody -% This fails to work. When given `@deftp {Data Type} foo_t', -% it thinks the type name is just `f'. -%%% This is the same as all the others except for the last line. We need -%%% to parse the arguments differently for @deftp, since the ``attributes'' -%%% there are optional. -%%% -%%\def\deftpparsebody #1#2#3#4 {\begingroup\inENV % -%%\medbreak % -%%% Define the end token that this defining construct specifies -%%% so that it will exit this group. -%%\def#1{\endgraf\endgroup\medbreak}% -%%\def#2##1 {\begingroup\obeylines\spacesplit{#3{##1}}}% -%%\parindent=0in -%%\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -%%\exdentamount=\defbodyindent -%%\begingroup\obeylines\parsetpheaderline{#3{#4}}} - -%%{\obeylines % -%% % Parse the type name and any attributes (field names, etc.). -%% % #1 is the beginning of the macro call that will produce the output, -%% % i.e., \deftpheader{CLASS}; this is passed from \deftpparsebody. -%% % #2 is the type name, e.g., `struct termios'. -%% % #3 is the (possibly empty) attribute list. -%% % -%% \gdef\parsetpheaderline#1#2#3^^M{% -%% \endgroup % Started in \deftpparsebody. -%% % -%% % If the attribute list is in fact empty, there will be no space after -%% % #2; so we can't put a space in our TeX parameter list. But if it -%% % isn't empty, then #3 will begin with an unwanted space. -%% \def\theargs{\ignorespaces #3}% -%% % -%% % Call the macro to produce the output. -%% #1{#2}\theargs % -%% }% -%%} - -\def\defopvarparsebody #1#2#3#4#5 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 ##2 {\def#4{##1}% -\begingroup\obeylines\spacesplit{#3{##2}}}% -\parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\spacesplit{#3{#5}}} - -% Split up #2 at the first space token. -% call #1 with two arguments: -% the first is all of #2 before the space token, -% the second is all of #2 after that space token. -% If #2 contains no space token, all of it is passed as the first arg -% and the second is passed as empty. - -{\obeylines -\gdef\spacesplit#1#2^^M{\endgroup\spacesplitfoo{#1}#2 \relax\spacesplitfoo}% -\long\gdef\spacesplitfoo#1#2 #3#4\spacesplitfoo{% -\ifx\relax #3% -#1{#2}{}\else #1{#2}{#3#4}\fi}} - -% So much for the things common to all kinds of definitions. - -% Define @defun. - -% First, define the processing that is wanted for arguments of \defun -% Use this to expand the args and terminate the paragraph they make up - -\def\defunargs #1{\functionparens \sl -% Expand, preventing hyphenation at `-' chars. -% Note that groups don't affect changes in \hyphenchar. -\hyphenchar\tensl=0 -#1% -\hyphenchar\tensl=45 -\ifnum\parencount=0 \else \errmessage{unbalanced parens in @def arguments}\fi% -\interlinepenalty=10000 -\advance\rightskip by 0pt plus 1fil -\endgraf\penalty 10000\vskip -\parskip\penalty 10000% -} - -\def\deftypefunargs #1{% -% Expand, preventing hyphenation at `-' chars. -% Note that groups don't affect changes in \hyphenchar. -\functionparens -\code{#1}% -\interlinepenalty=10000 -\advance\rightskip by 0pt plus 1fil -\endgraf\penalty 10000\vskip -\parskip\penalty 10000% -} - -% Do complete processing of one @defun or @defunx line already parsed. - -% @deffn Command forward-char nchars - -\def\deffn{\defmethparsebody\Edeffn\deffnx\deffnheader} - -\def\deffnheader #1#2#3{\doind {fn}{\code{#2}}% -\begingroup\defname {#2}{#1}\defunargs{#3}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defun == @deffn Function - -\def\defun{\defparsebody\Edefun\defunx\defunheader} - -\def\defunheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Function}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @deftypefun int foobar (int @var{foo}, float @var{bar}) - -\def\deftypefun{\defparsebody\Edeftypefun\deftypefunx\deftypefunheader} - -% #1 is the data type. #2 is the name and args. -\def\deftypefunheader #1#2{\deftypefunheaderx{#1}#2 \relax} -% #1 is the data type, #2 the name, #3 the args. -\def\deftypefunheaderx #1#2 #3\relax{% -\doind {fn}{\code{#2}}% Make entry in function index -\begingroup\defname {\code{#1} #2}{Function}% -\deftypefunargs {#3}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @deftypefn {Library Function} int foobar (int @var{foo}, float @var{bar}) - -\def\deftypefn{\defmethparsebody\Edeftypefn\deftypefnx\deftypefnheader} - -% #1 is the classification. #2 is the data type. #3 is the name and args. -\def\deftypefnheader #1#2#3{\deftypefnheaderx{#1}{#2}#3 \relax} -% #1 is the classification, #2 the data type, #3 the name, #4 the args. -\def\deftypefnheaderx #1#2#3 #4\relax{% -\doind {fn}{\code{#3}}% Make entry in function index -\begingroup\defname {\code{#2} #3}{#1}% -\deftypefunargs {#4}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defmac == @deffn Macro - -\def\defmac{\defparsebody\Edefmac\defmacx\defmacheader} - -\def\defmacheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Macro}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defspec == @deffn Special Form - -\def\defspec{\defparsebody\Edefspec\defspecx\defspecheader} - -\def\defspecheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Special Form}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% This definition is run if you use @defunx -% anywhere other than immediately after a @defun or @defunx. - -\def\deffnx #1 {\errmessage{@deffnx in invalid context}} -\def\defunx #1 {\errmessage{@defunx in invalid context}} -\def\defmacx #1 {\errmessage{@defmacx in invalid context}} -\def\defspecx #1 {\errmessage{@defspecx in invalid context}} -\def\deftypefnx #1 {\errmessage{@deftypefnx in invalid context}} -\def\deftypeunx #1 {\errmessage{@deftypeunx in invalid context}} - -% @defmethod, and so on - -% @defop {Funny Method} foo-class frobnicate argument - -\def\defop #1 {\def\defoptype{#1}% -\defopparsebody\Edefop\defopx\defopheader\defoptype} - -\def\defopheader #1#2#3{% -\dosubind {fn}{\code{#2}}{on #1}% Make entry in function index -\begingroup\defname {#2}{\defoptype{} on #1}% -\defunargs {#3}\endgroup % -} - -% @defmethod == @defop Method - -\def\defmethod{\defmethparsebody\Edefmethod\defmethodx\defmethodheader} - -\def\defmethodheader #1#2#3{% -\dosubind {fn}{\code{#2}}{on #1}% entry in function index -\begingroup\defname {#2}{Method on #1}% -\defunargs {#3}\endgroup % -} - -% @defcv {Class Option} foo-class foo-flag - -\def\defcv #1 {\def\defcvtype{#1}% -\defopvarparsebody\Edefcv\defcvx\defcvarheader\defcvtype} - -\def\defcvarheader #1#2#3{% -\dosubind {vr}{\code{#2}}{of #1}% Make entry in var index -\begingroup\defname {#2}{\defcvtype{} of #1}% -\defvarargs {#3}\endgroup % -} - -% @defivar == @defcv {Instance Variable} - -\def\defivar{\defvrparsebody\Edefivar\defivarx\defivarheader} - -\def\defivarheader #1#2#3{% -\dosubind {vr}{\code{#2}}{of #1}% Make entry in var index -\begingroup\defname {#2}{Instance Variable of #1}% -\defvarargs {#3}\endgroup % -} - -% These definitions are run if you use @defmethodx, etc., -% anywhere other than immediately after a @defmethod, etc. - -\def\defopx #1 {\errmessage{@defopx in invalid context}} -\def\defmethodx #1 {\errmessage{@defmethodx in invalid context}} -\def\defcvx #1 {\errmessage{@defcvx in invalid context}} -\def\defivarx #1 {\errmessage{@defivarx in invalid context}} - -% Now @defvar - -% First, define the processing that is wanted for arguments of @defvar. -% This is actually simple: just print them in roman. -% This must expand the args and terminate the paragraph they make up -\def\defvarargs #1{\normalparens #1% -\interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000} - -% @defvr Counter foo-count - -\def\defvr{\defvrparsebody\Edefvr\defvrx\defvrheader} - -\def\defvrheader #1#2#3{\doind {vr}{\code{#2}}% -\begingroup\defname {#2}{#1}\defvarargs{#3}\endgroup} - -% @defvar == @defvr Variable - -\def\defvar{\defvarparsebody\Edefvar\defvarx\defvarheader} - -\def\defvarheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{Variable}% -\defvarargs {#2}\endgroup % -} - -% @defopt == @defvr {User Option} - -\def\defopt{\defvarparsebody\Edefopt\defoptx\defoptheader} - -\def\defoptheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{User Option}% -\defvarargs {#2}\endgroup % -} - -% @deftypevar int foobar - -\def\deftypevar{\defvarparsebody\Edeftypevar\deftypevarx\deftypevarheader} - -% #1 is the data type. #2 is the name. -\def\deftypevarheader #1#2{% -\doind {vr}{\code{#2}}% Make entry in variables index -\begingroup\defname {\code{#1} #2}{Variable}% -\interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000 -\endgroup} - -% @deftypevr {Global Flag} int enable - -\def\deftypevr{\defvrparsebody\Edeftypevr\deftypevrx\deftypevrheader} - -\def\deftypevrheader #1#2#3{\doind {vr}{\code{#3}}% -\begingroup\defname {\code{#2} #3}{#1} -\interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000 -\endgroup} - -% This definition is run if you use @defvarx -% anywhere other than immediately after a @defvar or @defvarx. - -\def\defvrx #1 {\errmessage{@defvrx in invalid context}} -\def\defvarx #1 {\errmessage{@defvarx in invalid context}} -\def\defoptx #1 {\errmessage{@defoptx in invalid context}} -\def\deftypevarx #1 {\errmessage{@deftypevarx in invalid context}} -\def\deftypevrx #1 {\errmessage{@deftypevrx in invalid context}} - -% Now define @deftp -% Args are printed in bold, a slight difference from @defvar. - -\def\deftpargs #1{\bf \defvarargs{#1}} - -% @deftp Class window height width ... - -\def\deftp{\deftpparsebody\Edeftp\deftpx\deftpheader} - -\def\deftpheader #1#2#3{\doind {tp}{\code{#2}}% -\begingroup\defname {#2}{#1}\deftpargs{#3}\endgroup} - -% This definition is run if you use @deftpx, etc -% anywhere other than immediately after a @deftp, etc. - -\def\deftpx #1 {\errmessage{@deftpx in invalid context}} - -\message{cross reference,} -% Define cross-reference macros -\newwrite \auxfile - -\newif\ifhavexrefs % True if xref values are known. -\newif\ifwarnedxrefs % True if we warned once that they aren't known. - -% \setref{foo} defines a cross-reference point named foo. - -\def\setref#1{% -%\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Ysectionnumberandtype}} - -\def\unnumbsetref#1{% -%\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Ynothing}} - -\def\appendixsetref#1{% -%\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Yappendixletterandtype}} - -% \xref, \pxref, and \ref generate cross-references to specified points. -% For \xrefX, #1 is the node name, #2 the name of the Info -% cross-reference, #3 the printed node name, #4 the name of the Info -% file, #5 the name of the printed manual. All but the node name can be -% omitted. -% -\def\pxref#1{see \xrefX[#1,,,,,,,]} -\def\xref#1{See \xrefX[#1,,,,,,,]} -\def\ref#1{\xrefX[#1,,,,,,,]} -\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup% -\def\printedmanual{\ignorespaces #5}% -\def\printednodename{\ignorespaces #3}% -% -\setbox1=\hbox{\printedmanual}% -\setbox0=\hbox{\printednodename}% -\ifdim \wd0=0pt% -\def\printednodename{\ignorespaces #1}% -%%% Uncommment the following line to make the actual chapter or section title -%%% appear inside the square brackets. -%\def\printednodename{#1-title}% -\fi% -% -% -% If we use \unhbox0 and \unhbox1 to print the node names, TeX does -% not insert empty discretionaries after hyphens, which means that it -% will not find a line break at a hyphen in a node names. Since some -% manuals are best written with fairly long node names, containing -% hyphens, this is a loss. Therefore, we simply give the text of -% the node name again, so it is as if TeX is seeing it for the first -% time. -\ifdim \wd1>0pt -section ``\printednodename'' in \cite{\printedmanual}% -\else% -\turnoffactive% -\refx{#1-snt}{} [\printednodename], page\tie\refx{#1-pg}{}% -\fi -\endgroup} - -% \dosetq is the interface for calls from other macros - -% Use \turnoffactive so that punctuation chars such as underscore -% work in node names. -\def\dosetq #1#2{{\let\folio=0 \turnoffactive% -\edef\next{\write\auxfile{\internalsetq {#1}{#2}}}% -\next}} - -% \internalsetq {foo}{page} expands into -% CHARACTERS 'xrdef {foo}{...expansion of \Ypage...} -% When the aux file is read, ' is the escape character - -\def\internalsetq #1#2{'xrdef {#1}{\csname #2\endcsname}} - -% Things to be expanded by \internalsetq - -\def\Ypagenumber{\folio} - -\def\Ytitle{\thischapter} - -\def\Ynothing{} - -\def\Ysectionnumberandtype{% -\ifnum\secno=0 Chapter\xreftie\the\chapno % -\else \ifnum \subsecno=0 Section\xreftie\the\chapno.\the\secno % -\else \ifnum \subsubsecno=0 % -Section\xreftie\the\chapno.\the\secno.\the\subsecno % -\else % -Section\xreftie\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno % -\fi \fi \fi } - -\def\Yappendixletterandtype{% -\ifnum\secno=0 Appendix\xreftie'char\the\appendixno{}% -\else \ifnum \subsecno=0 Section\xreftie'char\the\appendixno.\the\secno % -\else \ifnum \subsubsecno=0 % -Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno % -\else % -Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno % -\fi \fi \fi } - -\gdef\xreftie{'tie} - -% Use TeX 3.0's \inputlineno to get the line number, for better error -% messages, but if we're using an old version of TeX, don't do anything. -% -\ifx\inputlineno\thisisundefined - \let\linenumber = \empty % Non-3.0. -\else - \def\linenumber{\the\inputlineno:\space} -\fi - -% Define \refx{NAME}{SUFFIX} to reference a cross-reference string named NAME. -% If its value is nonempty, SUFFIX is output afterward. - -\def\refx#1#2{% - \expandafter\ifx\csname X#1\endcsname\relax - % If not defined, say something at least. - $\langle$un\-de\-fined$\rangle$% - \ifhavexrefs - \message{\linenumber Undefined cross reference `#1'.}% - \else - \ifwarnedxrefs\else - \global\warnedxrefstrue - \message{Cross reference values unknown; you must run TeX again.}% - \fi - \fi - \else - % It's defined, so just use it. - \csname X#1\endcsname - \fi - #2% Output the suffix in any case. -} - -% Read the last existing aux file, if any. No error if none exists. - -% This is the macro invoked by entries in the aux file. -\def\xrdef #1#2{ -{\catcode`\'=\other\expandafter \gdef \csname X#1\endcsname {#2}}} - -\def\readauxfile{% -\begingroup -\catcode `\^^@=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\^^C=\other -\catcode `\^^D=\other -\catcode `\^^E=\other -\catcode `\^^F=\other -\catcode `\^^G=\other -\catcode `\^^H=\other -\catcode `\=\other -\catcode `\^^L=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode `\=\other -\catcode 26=\other -\catcode `\^^[=\other -\catcode `\^^\=\other -\catcode `\^^]=\other -\catcode `\^^^=\other -\catcode `\^^_=\other -\catcode `\@=\other -\catcode `\^=\other -\catcode `\~=\other -\catcode `\[=\other -\catcode `\]=\other -\catcode`\"=\other -\catcode`\_=\other -\catcode`\|=\other -\catcode`\<=\other -\catcode`\>=\other -\catcode `\$=\other -\catcode `\#=\other -\catcode `\&=\other -% `\+ does not work, so use 43. -\catcode 43=\other -% the aux file uses ' as the escape. -% Turn off \ as an escape so we do not lose on -% entries which were dumped with control sequences in their names. -% For example, 'xrdef {$\leq $-fun}{page ...} made by @defun ^^ -% Reference to such entries still does not work the way one would wish, -% but at least they do not bomb out when the aux file is read in. -\catcode `\{=1 \catcode `\}=2 -\catcode `\%=\other -\catcode `\'=0 -\catcode `\\=\other -\openin 1 \jobname.aux -\ifeof 1 \else \closein 1 \input \jobname.aux \global\havexrefstrue -\global\warnedobstrue -\fi -% Open the new aux file. Tex will close it automatically at exit. -\openout \auxfile=\jobname.aux -\endgroup} - - -% Footnotes. - -\newcount \footnoteno - -% The trailing space in the following definition for supereject is -% vital for proper filling; pages come out unaligned when you do a -% pagealignmacro call if that space before the closing brace is -% removed. -\def\supereject{\par\penalty -20000\footnoteno =0 } - -% @footnotestyle is meaningful for info output only.. -\let\footnotestyle=\comment - -\let\ptexfootnote=\footnote - -{\catcode `\@=11 -% -% Auto-number footnotes. Otherwise like plain. -\gdef\footnote{% - \global\advance\footnoteno by \@ne - \edef\thisfootno{$^{\the\footnoteno}$}% - % - % In case the footnote comes at the end of a sentence, preserve the - % extra spacing after we do the footnote number. - \let\@sf\empty - \ifhmode\edef\@sf{\spacefactor\the\spacefactor}\/\fi - % - % Remove inadvertent blank space before typesetting the footnote number. - \unskip - \thisfootno\@sf - \footnotezzz -}% - -% Don't bother with the trickery in plain.tex to not require the -% footnote text as a parameter. Our footnotes don't need to be so general. -% -\long\gdef\footnotezzz#1{\insert\footins{% - % We want to typeset this text as a normal paragraph, even if the - % footnote reference occurs in (for example) a display environment. - % So reset some parameters. - \interlinepenalty\interfootnotelinepenalty - \splittopskip\ht\strutbox % top baseline for broken footnotes - \splitmaxdepth\dp\strutbox - \floatingpenalty\@MM - \leftskip\z@skip - \rightskip\z@skip - \spaceskip\z@skip - \xspaceskip\z@skip - \parindent\defaultparindent - % - % Hang the footnote text off the number. - \hang - \textindent{\thisfootno}% - % - % Don't crash into the line above the footnote text. Since this - % expands into a box, it must come within the paragraph, lest it - % provide a place where TeX can split the footnote. - \footstrut - #1\strut}% -} - -}%end \catcode `\@=11 - -% Set the baselineskip to #1, and the lineskip and strut size -% correspondingly. There is no deep meaning behind these magic numbers -% used as factors; they just match (closely enough) what Knuth defined. -% -\def\lineskipfactor{.1} -\def\strutheightpercent{.71} -\def\strutdepthpercent{.29} -% -\def\setleading#1{% - \baselineskip = #1\relax - \normalbaselineskip = \baselineskip - \lineskip = \lineskipfactor\baselineskip - \setbox\strutbox =\hbox{% - \vrule width0pt height\strutheightpercent\baselineskip - depth \strutdepthpercent \baselineskip - }% -} - -% @| inserts a changebar to the left of the current line. It should -% surround any changed text. This approach does *not* work if the -% change spans more than two lines of output. To handle that, we would -% have adopt a much more difficult approach (putting marks into the main -% vertical list for the beginning and end of each change). -% -\def\|{% - % \vadjust can only be used in horizontal mode. - \leavevmode - % - % Append this vertical mode material after the current line in the output. - \vadjust{% - % We want to insert a rule with the height and depth of the current - % leading; that is exactly what \strutbox is supposed to record. - \vskip-\baselineskip - % - % \vadjust-items are inserted at the left edge of the type. So - % the \llap here moves out into the left-hand margin. - \llap{% - % - % For a thicker or thinner bar, change the `1pt'. - \vrule height\baselineskip width1pt - % - % This is the space between the bar and the text. - \hskip 12pt - }% - }% -} - -% For a final copy, take out the rectangles -% that mark overfull boxes (in case you have decided -% that the text looks ok even though it passes the margin). -% -\def\finalout{\overfullrule=0pt} - - -% End of control word definitions. - -\message{and turning on texinfo input format.} - -\def\openindices{% - \newindex{cp}% - \newcodeindex{fn}% - \newcodeindex{vr}% - \newcodeindex{tp}% - \newcodeindex{ky}% - \newcodeindex{pg}% -} - -% Set some numeric style parameters, for 8.5 x 11 format. - -%\hsize = 6.5in -\newdimen\defaultparindent \defaultparindent = 15pt -\parindent = \defaultparindent -\parskip 18pt plus 1pt -\setleading{15pt} -\advance\topskip by 1.2cm - -% Prevent underfull vbox error messages. -\vbadness=10000 - -% Following George Bush, just get rid of widows and orphans. -\widowpenalty=10000 -\clubpenalty=10000 - -% Use TeX 3.0's \emergencystretch to help line breaking, but if we're -% using an old version of TeX, don't do anything. We want the amount of -% stretch added to depend on the line length, hence the dependence on -% \hsize. This makes it come to about 9pt for the 8.5x11 format. -% -\ifx\emergencystretch\thisisundefined - % Allow us to assign to \emergencystretch anyway. - \def\emergencystretch{\dimen0}% -\else - \emergencystretch = \hsize - \divide\emergencystretch by 45 -\fi - -% Use @smallbook to reset parameters for 7x9.5 format (or else 7x9.25) -\def\smallbook{ - -% These values for secheadingskip and subsecheadingskip are -% experiments. RJC 7 Aug 1992 -\global\secheadingskip = 17pt plus 6pt minus 3pt -\global\subsecheadingskip = 14pt plus 6pt minus 3pt - -\global\lispnarrowing = 0.3in -\setleading{12pt} -\advance\topskip by -1cm -\global\parskip 3pt plus 1pt -\global\hsize = 5in -\global\vsize=7.5in -\global\tolerance=700 -\global\hfuzz=1pt -\global\contentsrightmargin=0pt - -\global\pagewidth=\hsize -\global\pageheight=\vsize - -\global\let\smalllisp=\smalllispx -\global\let\smallexample=\smalllispx -\global\def\Esmallexample{\Esmalllisp} -} - -% Use @afourpaper to print on European A4 paper. -\def\afourpaper{ -\global\tolerance=700 -\global\hfuzz=1pt -\setleading{12pt} -\global\parskip 15pt plus 1pt - -\global\vsize= 53\baselineskip -\advance\vsize by \topskip -%\global\hsize= 5.85in % A4 wide 10pt -\global\hsize= 6.5in -\global\outerhsize=\hsize -\global\advance\outerhsize by 0.5in -\global\outervsize=\vsize -\global\advance\outervsize by 0.6in - -\global\pagewidth=\hsize -\global\pageheight=\vsize -} - -% Define macros to output various characters with catcode for normal text. -\catcode`\"=\other -\catcode`\~=\other -\catcode`\^=\other -\catcode`\_=\other -\catcode`\|=\other -\catcode`\<=\other -\catcode`\>=\other -\catcode`\+=\other -\def\normaldoublequote{"} -\def\normaltilde{~} -\def\normalcaret{^} -\def\normalunderscore{_} -\def\normalverticalbar{|} -\def\normalless{<} -\def\normalgreater{>} -\def\normalplus{+} - -% This macro is used to make a character print one way in ttfont -% where it can probably just be output, and another way in other fonts, -% where something hairier probably needs to be done. -% -% #1 is what to print if we are indeed using \tt; #2 is what to print -% otherwise. Since all the Computer Modern typewriter fonts have zero -% interword stretch (and shrink), and it is reasonable to expect all -% typewriter fonts to have this, we can check that font parameter. -% -\def\ifusingtt#1#2{\ifdim \fontdimen3\the\font=0pt #1\else #2\fi} - -% Turn off all special characters except @ -% (and those which the user can use as if they were ordinary). -% Most of these we simply print from the \tt font, but for some, we can -% use math or other variants that look better in normal text. - -\catcode`\"=\active -\def\activedoublequote{{\tt \char '042}} -\let"=\activedoublequote -\catcode`\~=\active -\def~{{\tt \char '176}} -\chardef\hat=`\^ -\catcode`\^=\active -\def^{{\tt \hat}} - -\catcode`\_=\active -\def_{\ifusingtt\normalunderscore\_} -% Subroutine for the previous macro. -\def\_{\lvvmode \kern.06em \vbox{\hrule width.3em height.1ex}} - -% \lvvmode is equivalent in function to \leavevmode. -% Using \leavevmode runs into trouble when written out to -% an index file due to the expansion of \leavevmode into ``\unhbox -% \voidb@x'' ---which looks to TeX like ``\unhbox \voidb\x'' due to our -% magic tricks with @. -\def\lvvmode{\vbox to 0pt{}} - -\catcode`\|=\active -\def|{{\tt \char '174}} -\chardef \less=`\< -\catcode`\<=\active -\def<{{\tt \less}} -\chardef \gtr=`\> -\catcode`\>=\active -\def>{{\tt \gtr}} -\catcode`\+=\active -\def+{{\tt \char 43}} -%\catcode 27=\active -%\def^^[{$\diamondsuit$} - -% Used sometimes to turn off (effectively) the active characters -% even after parsing them. -\def\turnoffactive{\let"=\normaldoublequote -\let~=\normaltilde -\let^=\normalcaret -\let_=\normalunderscore -\let|=\normalverticalbar -\let<=\normalless -\let>=\normalgreater -\let+=\normalplus} - -% Set up an active definition for =, but don't enable it most of the time. -{\catcode`\==\active -\global\def={{\tt \char 61}}} - -\catcode`\@=0 - -% \rawbackslashxx output one backslash character in current font -\global\chardef\rawbackslashxx=`\\ -%{\catcode`\\=\other -%@gdef@rawbackslashxx{\}} - -% \rawbackslash redefines \ as input to do \rawbackslashxx. -{\catcode`\\=\active -@gdef@rawbackslash{@let\=@rawbackslashxx }} - -% \normalbackslash outputs one backslash in fixed width font. -\def\normalbackslash{{\tt\rawbackslashxx}} - -% Say @foo, not \foo, in error messages. -\escapechar=`\@ - -% \catcode 17=0 % Define control-q -\catcode`\\=\active - -% If a .fmt file is being used, we don't want the `\input texinfo' to show up. -% That is what \eatinput is for; after that, the `\' should revert to printing -% a backslash. -% -@gdef@eatinput input texinfo{@fixbackslash} -@global@let\ = @eatinput - -% On the other hand, perhaps the file did not have a `\input texinfo'. Then -% the first `\{ in the file would cause an error. This macro tries to fix -% that, assuming it is called before the first `\' could plausibly occur. -% -@gdef@fixbackslash{@ifx\@eatinput @let\ = @normalbackslash @fi} - -%% These look ok in all fonts, so just make them not special. The @rm below -%% makes sure that the current font starts out as the newly loaded cmr10 -@catcode`@$=@other @catcode`@%=@other @catcode`@&=@other @catcode`@#=@other - -@textfonts -@rm - -@c Local variables: -@c page-delimiter: "^\\\\message" -@c End: diff --git a/gnu/libregex/doc/xregex.texi b/gnu/libregex/doc/xregex.texi deleted file mode 100644 index 9292b356ef75..000000000000 --- a/gnu/libregex/doc/xregex.texi +++ /dev/null @@ -1,3021 +0,0 @@ -\input texinfo -@c %**start of header -@setfilename regex.info -@settitle Regex -@c %**end of header - -@c \\{fill-paragraph} works better (for me, anyway) if the text in the -@c source file isn't indented. -@paragraphindent 2 - -@c Define a new index for our magic constants. -@defcodeindex cn - -@c Put everything in one index (arbitrarily chosen to be the concept index). -@syncodeindex cn cp -@syncodeindex ky cp -@syncodeindex pg cp -@syncodeindex tp cp -@syncodeindex vr cp - -@c Here is what we use in the Info `dir' file: -@c * Regex: (regex). Regular expression library. - - -@ifinfo -This file documents the GNU regular expression library. - -Copyright (C) 1992, 1993 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries a copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). -@end ignore - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled ``GNU General Public License'' is included exactly as -in the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that the section entitled ``GNU General Public License'' may be -included in a translation approved by the Free Software Foundation -instead of in the original English. -@end ifinfo - - -@titlepage - -@title Regex -@subtitle edition 0.12a -@subtitle 19 September 1992 -@author Kathryn A. Hargreaves -@author Karl Berry - -@page - -@vskip 0pt plus 1filll -Copyright @copyright{} 1992 Free Software Foundation. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled ``GNU General Public License'' is included exactly as -in the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this -one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that the section entitled ``GNU General Public License'' may be -included in a translation approved by the Free Software Foundation -instead of in the original English. - -@end titlepage - - -@ifinfo -@node Top, Overview, (dir), (dir) -@top Regular Expression Library - -This manual documents how to program with the GNU regular expression -library. This is edition 0.12a of the manual, 19 September 1992. - -The first part of this master menu lists the major nodes in this Info -document, including the index. The rest of the menu lists all the -lower level nodes in the document. - -@menu -* Overview:: -* Regular Expression Syntax:: -* Common Operators:: -* GNU Operators:: -* GNU Emacs Operators:: -* What Gets Matched?:: -* Programming with Regex:: -* Copying:: Copying and sharing Regex. -* Index:: General index. - --- The Detailed Node Listing --- - -Regular Expression Syntax - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - -Common Operators - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? @{@} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - -Repetition Operators - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: @{@} - -List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]}) - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - -Anchoring Operators - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - -GNU Operators - -* Word Operators:: -* Buffer Operators:: - -Word Operators - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - -Buffer Operators - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - -GNU Emacs Operators - -* Syntactic Class Operators:: - -Syntactic Class Operators - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - -Programming with Regex - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - -GNU Regex Functions - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - -POSIX Regex Functions - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - -BSD Regex Functions - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () -@end menu -@end ifinfo -@node Overview, Regular Expression Syntax, Top, Top -@chapter Overview - -A @dfn{regular expression} (or @dfn{regexp}, or @dfn{pattern}) is a text -string that describes some (mathematical) set of strings. A regexp -@var{r} @dfn{matches} a string @var{s} if @var{s} is in the set of -strings described by @var{r}. - -Using the Regex library, you can: - -@itemize @bullet - -@item -see if a string matches a specified pattern as a whole, and - -@item -search within a string for a substring matching a specified pattern. - -@end itemize - -Some regular expressions match only one string, i.e., the set they -describe has only one member. For example, the regular expression -@samp{foo} matches the string @samp{foo} and no others. Other regular -expressions match more than one string, i.e., the set they describe has -more than one member. For example, the regular expression @samp{f*} -matches the set of strings made up of any number (including zero) of -@samp{f}s. As you can see, some characters in regular expressions match -themselves (such as @samp{f}) and some don't (such as @samp{*}); the -ones that don't match themselves instead let you specify patterns that -describe many different strings. - -To either match or search for a regular expression with the Regex -library functions, you must first compile it with a Regex pattern -compiling function. A @dfn{compiled pattern} is a regular expression -converted to the internal format used by the library functions. Once -you've compiled a pattern, you can use it for matching or searching any -number of times. - -The Regex library consists of two source files: @file{regex.h} and -@file{regex.c}. -@pindex regex.h -@pindex regex.c -Regex provides three groups of functions with which you can operate on -regular expressions. One group---the @sc{gnu} group---is more powerful -but not completely compatible with the other two, namely the @sc{posix} -and Berkeley @sc{unix} groups; its interface was designed specifically -for @sc{gnu}. The other groups have the same interfaces as do the -regular expression functions in @sc{posix} and Berkeley -@sc{unix}. - -We wrote this chapter with programmers in mind, not users of -programs---such as Emacs---that use Regex. We describe the Regex -library in its entirety, not how to write regular expressions that a -particular program understands. - - -@node Regular Expression Syntax, Common Operators, Overview, Top -@chapter Regular Expression Syntax - -@cindex regular expressions, syntax of -@cindex syntax of regular expressions - -@dfn{Characters} are things you can type. @dfn{Operators} are things in -a regular expression that match one or more characters. You compose -regular expressions from operators, which in turn you specify using one -or more characters. - -Most characters represent what we call the match-self operator, i.e., -they match themselves; we call these characters @dfn{ordinary}. Other -characters represent either all or parts of fancier operators; e.g., -@samp{.} represents what we call the match-any-character operator -(which, no surprise, matches (almost) any character); we call these -characters @dfn{special}. Two different things determine what -characters represent what operators: - -@enumerate -@item -the regular expression syntax your program has told the Regex library to -recognize, and - -@item -the context of the character in the regular expression. -@end enumerate - -In the following sections, we describe these things in more detail. - -@menu -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: -@end menu - - -@node Syntax Bits, Predefined Syntaxes, , Regular Expression Syntax -@section Syntax Bits - -@cindex syntax bits - -In any particular syntax for regular expressions, some characters are -always special, others are sometimes special, and others are never -special. The particular syntax that Regex recognizes for a given -regular expression depends on the value in the @code{syntax} field of -the pattern buffer of that regular expression. - -You get a pattern buffer by compiling a regular expression. @xref{GNU -Pattern Buffers}, and @ref{POSIX Pattern Buffers}, for more information -on pattern buffers. @xref{GNU Regular Expression Compiling}, @ref{POSIX -Regular Expression Compiling}, and @ref{BSD Regular Expression -Compiling}, for more information on compiling. - -Regex considers the value of the @code{syntax} field to be a collection -of bits; we refer to these bits as @dfn{syntax bits}. In most cases, -they affect what characters represent what operators. We describe the -meanings of the operators to which we refer in @ref{Common Operators}, -@ref{GNU Operators}, and @ref{GNU Emacs Operators}. - -For reference, here is the complete list of syntax bits, in alphabetical -order: - -@table @code - -@cnindex RE_BACKSLASH_ESCAPE_IN_LIST -@item RE_BACKSLASH_ESCAPE_IN_LISTS -If this bit is set, then @samp{\} inside a list (@pxref{List Operators} -quotes (makes ordinary, if it's special) the following character; if -this bit isn't set, then @samp{\} is an ordinary character inside lists. -(@xref{The Backslash Character}, for what `\' does outside of lists.) - -@cnindex RE_BK_PLUS_QM -@item RE_BK_PLUS_QM -If this bit is set, then @samp{\+} represents the match-one-or-more -operator and @samp{\?} represents the match-zero-or-more operator; if -this bit isn't set, then @samp{+} represents the match-one-or-more -operator and @samp{?} represents the match-zero-or-one operator. This -bit is irrelevant if @code{RE_LIMITED_OPS} is set. - -@cnindex RE_CHAR_CLASSES -@item RE_CHAR_CLASSES -If this bit is set, then you can use character classes in lists; if this -bit isn't set, then you can't. - -@cnindex RE_CONTEXT_INDEP_ANCHORS -@item RE_CONTEXT_INDEP_ANCHORS -If this bit is set, then @samp{^} and @samp{$} are special anywhere outside -a list; if this bit isn't set, then these characters are special only in -certain contexts. @xref{Match-beginning-of-line Operator}, and -@ref{Match-end-of-line Operator}. - -@cnindex RE_CONTEXT_INDEP_OPS -@item RE_CONTEXT_INDEP_OPS -If this bit is set, then certain characters are special anywhere outside -a list; if this bit isn't set, then those characters are special only in -some contexts and are ordinary elsewhere. Specifically, if this bit -isn't set then @samp{*}, and (if the syntax bit @code{RE_LIMITED_OPS} -isn't set) @samp{+} and @samp{?} (or @samp{\+} and @samp{\?}, depending -on the syntax bit @code{RE_BK_PLUS_QM}) represent repetition operators -only if they're not first in a regular expression or just after an -open-group or alternation operator. The same holds for @samp{@{} (or -@samp{\@{}, depending on the syntax bit @code{RE_NO_BK_BRACES}) if -it is the beginning of a valid interval and the syntax bit -@code{RE_INTERVALS} is set. - -@cnindex RE_CONTEXT_INVALID_OPS -@item RE_CONTEXT_INVALID_OPS -If this bit is set, then repetition and alternation operators can't be -in certain positions within a regular expression. Specifically, the -regular expression is invalid if it has: - -@itemize @bullet - -@item -a repetition operator first in the regular expression or just after a -match-beginning-of-line, open-group, or alternation operator; or - -@item -an alternation operator first or last in the regular expression, just -before a match-end-of-line operator, or just after an alternation or -open-group operator. - -@end itemize - -If this bit isn't set, then you can put the characters representing the -repetition and alternation characters anywhere in a regular expression. -Whether or not they will in fact be operators in certain positions -depends on other syntax bits. - -@cnindex RE_DOT_NEWLINE -@item RE_DOT_NEWLINE -If this bit is set, then the match-any-character operator matches -a newline; if this bit isn't set, then it doesn't. - -@cnindex RE_DOT_NOT_NULL -@item RE_DOT_NOT_NULL -If this bit is set, then the match-any-character operator doesn't match -a null character; if this bit isn't set, then it does. - -@cnindex RE_INTERVALS -@item RE_INTERVALS -If this bit is set, then Regex recognizes interval operators; if this bit -isn't set, then it doesn't. - -@cnindex RE_LIMITED_OPS -@item RE_LIMITED_OPS -If this bit is set, then Regex doesn't recognize the match-one-or-more, -match-zero-or-one or alternation operators; if this bit isn't set, then -it does. - -@cnindex RE_NEWLINE_ALT -@item RE_NEWLINE_ALT -If this bit is set, then newline represents the alternation operator; if -this bit isn't set, then newline is ordinary. - -@cnindex RE_NO_BK_BRACES -@item RE_NO_BK_BRACES -If this bit is set, then @samp{@{} represents the open-interval operator -and @samp{@}} represents the close-interval operator; if this bit isn't -set, then @samp{\@{} represents the open-interval operator and -@samp{\@}} represents the close-interval operator. This bit is relevant -only if @code{RE_INTERVALS} is set. - -@cnindex RE_NO_BK_PARENS -@item RE_NO_BK_PARENS -If this bit is set, then @samp{(} represents the open-group operator and -@samp{)} represents the close-group operator; if this bit isn't set, then -@samp{\(} represents the open-group operator and @samp{\)} represents -the close-group operator. - -@cnindex RE_NO_BK_REFS -@item RE_NO_BK_REFS -If this bit is set, then Regex doesn't recognize @samp{\}@var{digit} as -the back reference operator; if this bit isn't set, then it does. - -@cnindex RE_NO_BK_VBAR -@item RE_NO_BK_VBAR -If this bit is set, then @samp{|} represents the alternation operator; -if this bit isn't set, then @samp{\|} represents the alternation -operator. This bit is irrelevant if @code{RE_LIMITED_OPS} is set. - -@cnindex RE_NO_EMPTY_RANGES -@item RE_NO_EMPTY_RANGES -If this bit is set, then a regular expression with a range whose ending -point collates lower than its starting point is invalid; if this bit -isn't set, then Regex considers such a range to be empty. - -@cnindex RE_UNMATCHED_RIGHT_PAREN_ORD -@item RE_UNMATCHED_RIGHT_PAREN_ORD -If this bit is set and the regular expression has no matching open-group -operator, then Regex considers what would otherwise be a close-group -operator (based on how @code{RE_NO_BK_PARENS} is set) to match @samp{)}. - -@end table - - -@node Predefined Syntaxes, Collating Elements vs. Characters, Syntax Bits, Regular Expression Syntax -@section Predefined Syntaxes - -If you're programming with Regex, you can set a pattern buffer's -(@pxref{GNU Pattern Buffers}, and @ref{POSIX Pattern Buffers}) -@code{syntax} field either to an arbitrary combination of syntax bits -(@pxref{Syntax Bits}) or else to the configurations defined by Regex. -These configurations define the syntaxes used by certain -programs---@sc{gnu} Emacs, -@cindex Emacs -@sc{posix} Awk, -@cindex POSIX Awk -traditional Awk, -@cindex Awk -Grep, -@cindex Grep -@cindex Egrep -Egrep---in addition to syntaxes for @sc{posix} basic and extended -regular expressions. - -The predefined syntaxes--taken directly from @file{regex.h}---are: - -@example -[[[ syntaxes ]]] -@end example - -@node Collating Elements vs. Characters, The Backslash Character, Predefined Syntaxes, Regular Expression Syntax -@section Collating Elements vs.@: Characters - -@sc{posix} generalizes the notion of a character to that of a -collating element. It defines a @dfn{collating element} to be ``a -sequence of one or more bytes defined in the current collating sequence -as a unit of collation.'' - -This generalizes the notion of a character in -two ways. First, a single character can map into two or more collating -elements. For example, the German -@tex -`\ss' -@end tex -@ifinfo -``es-zet'' -@end ifinfo -collates as the collating element @samp{s} followed by another collating -element @samp{s}. Second, two or more characters can map into one -collating element. For example, the Spanish @samp{ll} collates after -@samp{l} and before @samp{m}. - -Since @sc{posix}'s ``collating element'' preserves the essential idea of -a ``character,'' we use the latter, more familiar, term in this document. - -@node The Backslash Character, , Collating Elements vs. Characters, Regular Expression Syntax -@section The Backslash Character - -@cindex \ -The @samp{\} character has one of four different meanings, depending on -the context in which you use it and what syntax bits are set -(@pxref{Syntax Bits}). It can: 1) stand for itself, 2) quote the next -character, 3) introduce an operator, or 4) do nothing. - -@enumerate -@item -It stands for itself inside a list -(@pxref{List Operators}) if the syntax bit -@code{RE_BACKSLASH_ESCAPE_IN_LISTS} is not set. For example, @samp{[\]} -would match @samp{\}. - -@item -It quotes (makes ordinary, if it's special) the next character when you -use it either: - -@itemize @bullet -@item -outside a list,@footnote{Sometimes -you don't have to explicitly quote special characters to make -them ordinary. For instance, most characters lose any special meaning -inside a list (@pxref{List Operators}). In addition, if the syntax bits -@code{RE_CONTEXT_INVALID_OPS} and @code{RE_CONTEXT_INDEP_OPS} -aren't set, then (for historical reasons) the matcher considers special -characters ordinary if they are in contexts where the operations they -represent make no sense; for example, then the match-zero-or-more -operator (represented by @samp{*}) matches itself in the regular -expression @samp{*foo} because there is no preceding expression on which -it can operate. It is poor practice, however, to depend on this -behavior; if you want a special character to be ordinary outside a list, -it's better to always quote it, regardless.} or - -@item -inside a list and the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is set. - -@end itemize - -@item -It introduces an operator when followed by certain ordinary -characters---sometimes only when certain syntax bits are set. See the -cases @code{RE_BK_PLUS_QM}, @code{RE_NO_BK_BRACES}, @code{RE_NO_BK_VAR}, -@code{RE_NO_BK_PARENS}, @code{RE_NO_BK_REF} in @ref{Syntax Bits}. Also: - -@itemize @bullet -@item -@samp{\b} represents the match-word-boundary operator -(@pxref{Match-word-boundary Operator}). - -@item -@samp{\B} represents the match-within-word operator -(@pxref{Match-within-word Operator}). - -@item -@samp{\<} represents the match-beginning-of-word operator @* -(@pxref{Match-beginning-of-word Operator}). - -@item -@samp{\>} represents the match-end-of-word operator -(@pxref{Match-end-of-word Operator}). - -@item -@samp{\w} represents the match-word-constituent operator -(@pxref{Match-word-constituent Operator}). - -@item -@samp{\W} represents the match-non-word-constituent operator -(@pxref{Match-non-word-constituent Operator}). - -@item -@samp{\`} represents the match-beginning-of-buffer -operator and @samp{\'} represents the match-end-of-buffer operator -(@pxref{Buffer Operators}). - -@item -If Regex was compiled with the C preprocessor symbol @code{emacs} -defined, then @samp{\s@var{class}} represents the match-syntactic-class -operator and @samp{\S@var{class}} represents the -match-not-syntactic-class operator (@pxref{Syntactic Class Operators}). - -@end itemize - -@item -In all other cases, Regex ignores @samp{\}. For example, -@samp{\n} matches @samp{n}. - -@end enumerate - -@node Common Operators, GNU Operators, Regular Expression Syntax, Top -@chapter Common Operators - -You compose regular expressions from operators. In the following -sections, we describe the regular expression operators specified by -@sc{posix}; @sc{gnu} also uses these. Most operators have more than one -representation as characters. @xref{Regular Expression Syntax}, for -what characters represent what operators under what circumstances. - -For most operators that can be represented in two ways, one -representation is a single character and the other is that character -preceded by @samp{\}. For example, either @samp{(} or @samp{\(} -represents the open-group operator. Which one does depends on the -setting of a syntax bit, in this case @code{RE_NO_BK_PARENS}. Why is -this so? Historical reasons dictate some of the varying -representations, while @sc{posix} dictates others. - -Finally, almost all characters lose any special meaning inside a list -(@pxref{List Operators}). - -@menu -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? @{@} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ -@end menu - -@node Match-self Operator, Match-any-character Operator, , Common Operators -@section The Match-self Operator (@var{ordinary character}) - -This operator matches the character itself. All ordinary characters -(@pxref{Regular Expression Syntax}) represent this operator. For -example, @samp{f} is always an ordinary character, so the regular -expression @samp{f} matches only the string @samp{f}. In -particular, it does @emph{not} match the string @samp{ff}. - -@node Match-any-character Operator, Concatenation Operator, Match-self Operator, Common Operators -@section The Match-any-character Operator (@code{.}) - -@cindex @samp{.} - -This operator matches any single printing or nonprinting character -except it won't match a: - -@table @asis -@item newline -if the syntax bit @code{RE_DOT_NEWLINE} isn't set. - -@item null -if the syntax bit @code{RE_DOT_NOT_NULL} is set. - -@end table - -The @samp{.} (period) character represents this operator. For example, -@samp{a.b} matches any three-character string beginning with @samp{a} -and ending with @samp{b}. - -@node Concatenation Operator, Repetition Operators, Match-any-character Operator, Common Operators -@section The Concatenation Operator - -This operator concatenates two regular expressions @var{a} and @var{b}. -No character represents this operator; you simply put @var{b} after -@var{a}. The result is a regular expression that will match a string if -@var{a} matches its first part and @var{b} matches the rest. For -example, @samp{xy} (two match-self operators) matches @samp{xy}. - -@node Repetition Operators, Alternation Operator, Concatenation Operator, Common Operators -@section Repetition Operators - -Repetition operators repeat the preceding regular expression a specified -number of times. - -@menu -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: @{@} -@end menu - -@node Match-zero-or-more Operator, Match-one-or-more Operator, , Repetition Operators -@subsection The Match-zero-or-more Operator (@code{*}) - -@cindex @samp{*} - -This operator repeats the smallest possible preceding regular expression -as many times as necessary (including zero) to match the pattern. -@samp{*} represents this operator. For example, @samp{o*} -matches any string made up of zero or more @samp{o}s. Since this -operator operates on the smallest preceding regular expression, -@samp{fo*} has a repeating @samp{o}, not a repeating @samp{fo}. So, -@samp{fo*} matches @samp{f}, @samp{fo}, @samp{foo}, and so on. - -Since the match-zero-or-more operator is a suffix operator, it may be -useless as such when no regular expression precedes it. This is the -case when it: - -@itemize @bullet -@item -is first in a regular expression, or - -@item -follows a match-beginning-of-line, open-group, or alternation -operator. - -@end itemize - -@noindent -Three different things can happen in these cases: - -@enumerate -@item -If the syntax bit @code{RE_CONTEXT_INVALID_OPS} is set, then the -regular expression is invalid. - -@item -If @code{RE_CONTEXT_INVALID_OPS} isn't set, but -@code{RE_CONTEXT_INDEP_OPS} is, then @samp{*} represents the -match-zero-or-more operator (which then operates on the empty string). - -@item -Otherwise, @samp{*} is ordinary. - -@end enumerate - -@cindex backtracking -The matcher processes a match-zero-or-more operator by first matching as -many repetitions of the smallest preceding regular expression as it can. -Then it continues to match the rest of the pattern. - -If it can't match the rest of the pattern, it backtracks (as many times -as necessary), each time discarding one of the matches until it can -either match the entire pattern or be certain that it cannot get a -match. For example, when matching @samp{ca*ar} against @samp{caaar}, -the matcher first matches all three @samp{a}s of the string with the -@samp{a*} of the regular expression. However, it cannot then match the -final @samp{ar} of the regular expression against the final @samp{r} of -the string. So it backtracks, discarding the match of the last @samp{a} -in the string. It can then match the remaining @samp{ar}. - - -@node Match-one-or-more Operator, Match-zero-or-one Operator, Match-zero-or-more Operator, Repetition Operators -@subsection The Match-one-or-more Operator (@code{+} or @code{\+}) - -@cindex @samp{+} - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't recognize -this operator. Otherwise, if the syntax bit @code{RE_BK_PLUS_QM} isn't -set, then @samp{+} represents this operator; if it is, then @samp{\+} -does. - -This operator is similar to the match-zero-or-more operator except that -it repeats the preceding regular expression at least once; -@pxref{Match-zero-or-more Operator}, for what it operates on, how some -syntax bits affect it, and how Regex backtracks to match it. - -For example, supposing that @samp{+} represents the match-one-or-more -operator; then @samp{ca+r} matches, e.g., @samp{car} and -@samp{caaaar}, but not @samp{cr}. - -@node Match-zero-or-one Operator, Interval Operators, Match-one-or-more Operator, Repetition Operators -@subsection The Match-zero-or-one Operator (@code{?} or @code{\?}) -@cindex @samp{?} - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit -@code{RE_BK_PLUS_QM} isn't set, then @samp{?} represents this operator; -if it is, then @samp{\?} does. - -This operator is similar to the match-zero-or-more operator except that -it repeats the preceding regular expression once or not at all; -@pxref{Match-zero-or-more Operator}, to see what it operates on, how -some syntax bits affect it, and how Regex backtracks to match it. - -For example, supposing that @samp{?} represents the match-zero-or-one -operator; then @samp{ca?r} matches both @samp{car} and @samp{cr}, but -nothing else. - -@node Interval Operators, , Match-zero-or-one Operator, Repetition Operators -@subsection Interval Operators (@code{@{} @dots{} @code{@}} or @code{\@{} @dots{} @code{\@}}) - -@cindex interval expression -@cindex @samp{@{} -@cindex @samp{@}} -@cindex @samp{\@{} -@cindex @samp{\@}} - -If the syntax bit @code{RE_INTERVALS} is set, then Regex recognizes -@dfn{interval expressions}. They repeat the smallest possible preceding -regular expression a specified number of times. - -If the syntax bit @code{RE_NO_BK_BRACES} is set, @samp{@{} represents -the @dfn{open-interval operator} and @samp{@}} represents the -@dfn{close-interval operator} ; otherwise, @samp{\@{} and @samp{\@}} do. - -Specifically, supposing that @samp{@{} and @samp{@}} represent the -open-interval and close-interval operators; then: - -@table @code -@item @{@var{count}@} -matches exactly @var{count} occurrences of the preceding regular -expression. - -@item @{@var{min,}@} -matches @var{min} or more occurrences of the preceding regular -expression. - -@item @{@var{min, max}@} -matches at least @var{min} but no more than @var{max} occurrences of -the preceding regular expression. - -@end table - -The interval expression (but not necessarily the regular expression that -contains it) is invalid if: - -@itemize @bullet -@item -@var{min} is greater than @var{max}, or - -@item -any of @var{count}, @var{min}, or @var{max} are outside the range -zero to @code{RE_DUP_MAX} (which symbol @file{regex.h} -defines). - -@end itemize - -If the interval expression is invalid and the syntax bit -@code{RE_NO_BK_BRACES} is set, then Regex considers all the -characters in the would-be interval to be ordinary. If that bit -isn't set, then the regular expression is invalid. - -If the interval expression is valid but there is no preceding regular -expression on which to operate, then if the syntax bit -@code{RE_CONTEXT_INVALID_OPS} is set, the regular expression is invalid. -If that bit isn't set, then Regex considers all the characters---other -than backslashes, which it ignores---in the would-be interval to be -ordinary. - - -@node Alternation Operator, List Operators, Repetition Operators, Common Operators -@section The Alternation Operator (@code{|} or @code{\|}) - -@kindex | -@kindex \| -@cindex alternation operator -@cindex or operator - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit -@code{RE_NO_BK_VBAR} is set, then @samp{|} represents this operator; -otherwise, @samp{\|} does. - -Alternatives match one of a choice of regular expressions: -if you put the character(s) representing the alternation operator between -any two regular expressions @var{a} and @var{b}, the result matches -the union of the strings that @var{a} and @var{b} match. For -example, supposing that @samp{|} is the alternation operator, then -@samp{foo|bar|quux} would match any of @samp{foo}, @samp{bar} or -@samp{quux}. - -@ignore -@c Nobody needs to disallow empty alternatives any more. -If the syntax bit @code{RE_NO_EMPTY_ALTS} is set, then if either of the regular -expressions @var{a} or @var{b} is empty, the -regular expression is invalid. More precisely, if this syntax bit is -set, then the alternation operator can't: - -@itemize @bullet -@item -be first or last in a regular expression; - -@item -follow either another alternation operator or an open-group operator -(@pxref{Grouping Operators}); or - -@item -precede a close-group operator. - -@end itemize - -@noindent -For example, supposing @samp{(} and @samp{)} represent the open and -close-group operators, then @samp{|foo}, @samp{foo|}, @samp{foo||bar}, -@samp{foo(|bar)}, and @samp{(foo|)bar} would all be invalid. -@end ignore - -The alternation operator operates on the @emph{largest} possible -surrounding regular expressions. (Put another way, it has the lowest -precedence of any regular expression operator.) -Thus, the only way you can -delimit its arguments is to use grouping. For example, if @samp{(} and -@samp{)} are the open and close-group operators, then @samp{fo(o|b)ar} -would match either @samp{fooar} or @samp{fobar}. (@samp{foo|bar} would -match @samp{foo} or @samp{bar}.) - -@cindex backtracking -The matcher usually tries all combinations of alternatives so as to -match the longest possible string. For example, when matching -@samp{(fooq|foo)*(qbarquux|bar)} against @samp{fooqbarquux}, it cannot -take, say, the first (``depth-first'') combination it could match, since -then it would be content to match just @samp{fooqbar}. - -@comment xx something about leftmost-longest - - -@node List Operators, Grouping Operators, Alternation Operator, Common Operators -@section List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]}) - -@cindex matching list -@cindex @samp{[} -@cindex @samp{]} -@cindex @samp{^} -@cindex @samp{-} -@cindex @samp{\} -@cindex @samp{[^} -@cindex nonmatching list -@cindex matching newline -@cindex bracket expression - -@dfn{Lists}, also called @dfn{bracket expressions}, are a set of one or -more items. An @dfn{item} is a character, -@ignore -(These get added when they get implemented.) -a collating symbol, an equivalence class expression, -@end ignore -a character class expression, or a range expression. The syntax bits -affect which kinds of items you can put in a list. We explain the last -two items in subsections below. Empty lists are invalid. - -A @dfn{matching list} matches a single character represented by one of -the list items. You form a matching list by enclosing one or more items -within an @dfn{open-matching-list operator} (represented by @samp{[}) -and a @dfn{close-list operator} (represented by @samp{]}). - -For example, @samp{[ab]} matches either @samp{a} or @samp{b}. -@samp{[ad]*} matches the empty string and any string composed of just -@samp{a}s and @samp{d}s in any order. Regex considers invalid a regular -expression with a @samp{[} but no matching -@samp{]}. - -@dfn{Nonmatching lists} are similar to matching lists except that they -match a single character @emph{not} represented by one of the list -items. You use an @dfn{open-nonmatching-list operator} (represented by -@samp{[^}@footnote{Regex therefore doesn't consider the @samp{^} to be -the first character in the list. If you put a @samp{^} character first -in (what you think is) a matching list, you'll turn it into a -nonmatching list.}) instead of an open-matching-list operator to start a -nonmatching list. - -For example, @samp{[^ab]} matches any character except @samp{a} or -@samp{b}. - -If the @code{posix_newline} field in the pattern buffer (@pxref{GNU -Pattern Buffers} is set, then nonmatching lists do not match a newline. - -Most characters lose any special meaning inside a list. The special -characters inside a list follow. - -@table @samp -@item ] -ends the list if it's not the first list item. So, if you want to make -the @samp{]} character a list item, you must put it first. - -@item \ -quotes the next character if the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is -set. - -@ignore -Put these in if they get implemented. - -@item [. -represents the open-collating-symbol operator (@pxref{Collating Symbol -Operators}). - -@item .] -represents the close-collating-symbol operator. - -@item [= -represents the open-equivalence-class operator (@pxref{Equivalence Class -Operators}). - -@item =] -represents the close-equivalence-class operator. - -@end ignore - -@item [: -represents the open-character-class operator (@pxref{Character Class -Operators}) if the syntax bit @code{RE_CHAR_CLASSES} is set and what -follows is a valid character class expression. - -@item :] -represents the close-character-class operator if the syntax bit -@code{RE_CHAR_CLASSES} is set and what precedes it is an -open-character-class operator followed by a valid character class name. - -@item - -represents the range operator (@pxref{Range Operator}) if it's -not first or last in a list or the ending point of a range. - -@end table - -@noindent -All other characters are ordinary. For example, @samp{[.*]} matches -@samp{.} and @samp{*}. - -@menu -* Character Class Operators:: [:class:] -* Range Operator:: start-end -@end menu - -@ignore -(If collating symbols and equivalence class expressions get implemented, -then add this.) - -node Collating Symbol Operators -subsubsection Collating Symbol Operators (@code{[.} @dots{} @code{.]}) - -If the syntax bit @code{XX} is set, then you can represent -collating symbols inside lists. You form a @dfn{collating symbol} by -putting a collating element between an @dfn{open-collating-symbol -operator} and an @dfn{close-collating-symbol operator}. @samp{[.} -represents the open-collating-symbol operator and @samp{.]} represents -the close-collating-symbol operator. For example, if @samp{ll} is a -collating element, then @samp{[[.ll.]]} would match @samp{ll}. - -node Equivalence Class Operators -subsubsection Equivalence Class Operators (@code{[=} @dots{} @code{=]}) -@cindex equivalence class expression in regex -@cindex @samp{[=} in regex -@cindex @samp{=]} in regex - -If the syntax bit @code{XX} is set, then Regex recognizes equivalence class -expressions inside lists. A @dfn{equivalence class expression} is a set -of collating elements which all belong to the same equivalence class. -You form an equivalence class expression by putting a collating -element between an @dfn{open-equivalence-class operator} and a -@dfn{close-equivalence-class operator}. @samp{[=} represents the -open-equivalence-class operator and @samp{=]} represents the -close-equivalence-class operator. For example, if @samp{a} and @samp{A} -were an equivalence class, then both @samp{[[=a=]]} and @samp{[[=A=]]} -would match both @samp{a} and @samp{A}. If the collating element in an -equivalence class expression isn't part of an equivalence class, then -the matcher considers the equivalence class expression to be a collating -symbol. - -@end ignore - -@node Character Class Operators, Range Operator, , List Operators -@subsection Character Class Operators (@code{[:} @dots{} @code{:]}) - -@cindex character classes -@cindex @samp{[:} in regex -@cindex @samp{:]} in regex - -If the syntax bit @code{RE_CHARACTER_CLASSES} is set, then Regex -recognizes character class expressions inside lists. A @dfn{character -class expression} matches one character from a given class. You form a -character class expression by putting a character class name between an -@dfn{open-character-class operator} (represented by @samp{[:}) and a -@dfn{close-character-class operator} (represented by @samp{:]}). The -character class names and their meanings are: - -@table @code - -@item alnum -letters and digits - -@item alpha -letters - -@item blank -system-dependent; for @sc{gnu}, a space or tab - -@item cntrl -control characters (in the @sc{ascii} encoding, code 0177 and codes -less than 040) - -@item digit -digits - -@item graph -same as @code{print} except omits space - -@item lower -lowercase letters - -@item print -printable characters (in the @sc{ascii} encoding, space -tilde---codes 040 through 0176) - -@item punct -neither control nor alphanumeric characters - -@item space -space, carriage return, newline, vertical tab, and form feed - -@item upper -uppercase letters - -@item xdigit -hexadecimal digits: @code{0}--@code{9}, @code{a}--@code{f}, @code{A}--@code{F} - -@end table - -@noindent -These correspond to the definitions in the C library's @file{<ctype.h>} -facility. For example, @samp{[:alpha:]} corresponds to the standard -facility @code{isalpha}. Regex recognizes character class expressions -only inside of lists; so @samp{[[:alpha:]]} matches any letter, but -@samp{[:alpha:]} outside of a bracket expression and not followed by a -repetition operator matches just itself. - -@node Range Operator, , Character Class Operators, List Operators -@subsection The Range Operator (@code{-}) - -Regex recognizes @dfn{range expressions} inside a list. They represent -those characters -that fall between two elements in the current collating sequence. You -form a range expression by putting a @dfn{range operator} between two -@ignore -(If these get implemented, then substitute this for ``characters.'') -of any of the following: characters, collating elements, collating symbols, -and equivalence class expressions. The starting point of the range and -the ending point of the range don't have to be the same kind of item, -e.g., the starting point could be a collating element and the ending -point could be an equivalence class expression. If a range's ending -point is an equivalence class, then all the collating elements in that -class will be in the range. -@end ignore -characters.@footnote{You can't use a character class for the starting -or ending point of a range, since a character class is not a single -character.} @samp{-} represents the range operator. For example, -@samp{a-f} within a list represents all the characters from @samp{a} -through @samp{f} -inclusively. - -If the syntax bit @code{RE_NO_EMPTY_RANGES} is set, then if the range's -ending point collates less than its starting point, the range (and the -regular expression containing it) is invalid. For example, the regular -expression @samp{[z-a]} would be invalid. If this bit isn't set, then -Regex considers such a range to be empty. - -Since @samp{-} represents the range operator, if you want to make a -@samp{-} character itself -a list item, you must do one of the following: - -@itemize @bullet -@item -Put the @samp{-} either first or last in the list. - -@item -Include a range whose starting point collates strictly lower than -@samp{-} and whose ending point collates equal or higher. Unless a -range is the first item in a list, a @samp{-} can't be its starting -point, but @emph{can} be its ending point. That is because Regex -considers @samp{-} to be the range operator unless it is preceded by -another @samp{-}. For example, in the @sc{ascii} encoding, @samp{)}, -@samp{*}, @samp{+}, @samp{,}, @samp{-}, @samp{.}, and @samp{/} are -contiguous characters in the collating sequence. You might think that -@samp{[)-+--/]} has two ranges: @samp{)-+} and @samp{--/}. Rather, it -has the ranges @samp{)-+} and @samp{+--}, plus the character @samp{/}, so -it matches, e.g., @samp{,}, not @samp{.}. - -@item -Put a range whose starting point is @samp{-} first in the list. - -@end itemize - -For example, @samp{[-a-z]} matches a lowercase letter or a hyphen (in -English, in @sc{ascii}). - - -@node Grouping Operators, Back-reference Operator, List Operators, Common Operators -@section Grouping Operators (@code{(} @dots{} @code{)} or @code{\(} @dots{} @code{\)}) - -@kindex ( -@kindex ) -@kindex \( -@kindex \) -@cindex grouping -@cindex subexpressions -@cindex parenthesizing - -A @dfn{group}, also known as a @dfn{subexpression}, consists of an -@dfn{open-group operator}, any number of other operators, and a -@dfn{close-group operator}. Regex treats this sequence as a unit, just -as mathematics and programming languages treat a parenthesized -expression as a unit. - -Therefore, using @dfn{groups}, you can: - -@itemize @bullet -@item -delimit the argument(s) to an alternation operator (@pxref{Alternation -Operator}) or a repetition operator (@pxref{Repetition -Operators}). - -@item -keep track of the indices of the substring that matched a given group. -@xref{Using Registers}, for a precise explanation. -This lets you: - -@itemize @bullet -@item -use the back-reference operator (@pxref{Back-reference Operator}). - -@item -use registers (@pxref{Using Registers}). - -@end itemize - -@end itemize - -If the syntax bit @code{RE_NO_BK_PARENS} is set, then @samp{(} represents -the open-group operator and @samp{)} represents the -close-group operator; otherwise, @samp{\(} and @samp{\)} do. - -If the syntax bit @code{RE_UNMATCHED_RIGHT_PAREN_ORD} is set and a -close-group operator has no matching open-group operator, then Regex -considers it to match @samp{)}. - - -@node Back-reference Operator, Anchoring Operators, Grouping Operators, Common Operators -@section The Back-reference Operator (@dfn{\}@var{digit}) - -@cindex back references - -If the syntax bit @code{RE_NO_BK_REF} isn't set, then Regex recognizes -back references. A back reference matches a specified preceding group. -The back reference operator is represented by @samp{\@var{digit}} -anywhere after the end of a regular expression's @w{@var{digit}-th} -group (@pxref{Grouping Operators}). - -@var{digit} must be between @samp{1} and @samp{9}. The matcher assigns -numbers 1 through 9 to the first nine groups it encounters. By using -one of @samp{\1} through @samp{\9} after the corresponding group's -close-group operator, you can match a substring identical to the -one that the group does. - -Back references match according to the following (in all examples below, -@samp{(} represents the open-group, @samp{)} the close-group, @samp{@{} -the open-interval and @samp{@}} the close-interval operator): - -@itemize @bullet -@item -If the group matches a substring, the back reference matches an -identical substring. For example, @samp{(a)\1} matches @samp{aa} and -@samp{(bana)na\1bo\1} matches @samp{bananabanabobana}. Likewise, -@samp{(.*)\1} matches any (newline-free if the syntax bit -@code{RE_DOT_NEWLINE} isn't set) string that is composed of two -identical halves; the @samp{(.*)} matches the first half and the -@samp{\1} matches the second half. - -@item -If the group matches more than once (as it might if followed -by, e.g., a repetition operator), then the back reference matches the -substring the group @emph{last} matched. For example, -@samp{((a*)b)*\1\2} matches @samp{aabababa}; first @w{group 1} (the -outer one) matches @samp{aab} and @w{group 2} (the inner one) matches -@samp{aa}. Then @w{group 1} matches @samp{ab} and @w{group 2} matches -@samp{a}. So, @samp{\1} matches @samp{ab} and @samp{\2} matches -@samp{a}. - -@item -If the group doesn't participate in a match, i.e., it is part of an -alternative not taken or a repetition operator allows zero repetitions -of it, then the back reference makes the whole match fail. For example, -@samp{(one()|two())-and-(three\2|four\3)} matches @samp{one-and-three} -and @samp{two-and-four}, but not @samp{one-and-four} or -@samp{two-and-three}. For example, if the pattern matches -@samp{one-and-}, then its @w{group 2} matches the empty string and its -@w{group 3} doesn't participate in the match. So, if it then matches -@samp{four}, then when it tries to back reference @w{group 3}---which it -will attempt to do because @samp{\3} follows the @samp{four}---the match -will fail because @w{group 3} didn't participate in the match. - -@end itemize - -You can use a back reference as an argument to a repetition operator. For -example, @samp{(a(b))\2*} matches @samp{a} followed by two or more -@samp{b}s. Similarly, @samp{(a(b))\2@{3@}} matches @samp{abbbb}. - -If there is no preceding @w{@var{digit}-th} subexpression, the regular -expression is invalid. - - -@node Anchoring Operators, , Back-reference Operator, Common Operators -@section Anchoring Operators - -@cindex anchoring -@cindex regexp anchoring - -These operators can constrain a pattern to match only at the beginning or -end of the entire string or at the beginning or end of a line. - -@menu -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ -@end menu - - -@node Match-beginning-of-line Operator, Match-end-of-line Operator, , Anchoring Operators -@subsection The Match-beginning-of-line Operator (@code{^}) - -@kindex ^ -@cindex beginning-of-line operator -@cindex anchors - -This operator can match the empty string either at the beginning of the -string or after a newline character. Thus, it is said to @dfn{anchor} -the pattern to the beginning of a line. - -In the cases following, @samp{^} represents this operator. (Otherwise, -@samp{^} is ordinary.) - -@itemize @bullet - -@item -It (the @samp{^}) is first in the pattern, as in @samp{^foo}. - -@cnindex RE_CONTEXT_INDEP_ANCHORS @r{(and @samp{^})} -@item -The syntax bit @code{RE_CONTEXT_INDEP_ANCHORS} is set, and it is outside -a bracket expression. - -@cindex open-group operator and @samp{^} -@cindex alternation operator and @samp{^} -@item -It follows an open-group or alternation operator, as in @samp{a\(^b\)} -and @samp{a\|^b}. @xref{Grouping Operators}, and @ref{Alternation -Operator}. - -@end itemize - -These rules imply that some valid patterns containing @samp{^} cannot be -matched; for example, @samp{foo^bar} if @code{RE_CONTEXT_INDEP_ANCHORS} -is set. - -@vindex not_bol @r{field in pattern buffer} -If the @code{not_bol} field is set in the pattern buffer (@pxref{GNU -Pattern Buffers}), then @samp{^} fails to match at the beginning of the -string. @xref{POSIX Matching}, for when you might find this useful. - -@vindex newline_anchor @r{field in pattern buffer} -If the @code{newline_anchor} field is set in the pattern buffer, then -@samp{^} fails to match after a newline. This is useful when you do not -regard the string to be matched as broken into lines. - - -@node Match-end-of-line Operator, , Match-beginning-of-line Operator, Anchoring Operators -@subsection The Match-end-of-line Operator (@code{$}) - -@kindex $ -@cindex end-of-line operator -@cindex anchors - -This operator can match the empty string either at the end of -the string or before a newline character in the string. Thus, it is -said to @dfn{anchor} the pattern to the end of a line. - -It is always represented by @samp{$}. For example, @samp{foo$} usually -matches, e.g., @samp{foo} and, e.g., the first three characters of -@samp{foo\nbar}. - -Its interaction with the syntax bits and pattern buffer fields is -exactly the dual of @samp{^}'s; see the previous section. (That is, -``beginning'' becomes ``end'', ``next'' becomes ``previous'', and -``after'' becomes ``before''.) - - -@node GNU Operators, GNU Emacs Operators, Common Operators, Top -@chapter GNU Operators - -Following are operators that @sc{gnu} defines (and @sc{posix} doesn't). - -@menu -* Word Operators:: -* Buffer Operators:: -@end menu - -@node Word Operators, Buffer Operators, , GNU Operators -@section Word Operators - -The operators in this section require Regex to recognize parts of words. -Regex uses a syntax table to determine whether or not a character is -part of a word, i.e., whether or not it is @dfn{word-constituent}. - -@menu -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W -@end menu - -@node Non-Emacs Syntax Tables, Match-word-boundary Operator, , Word Operators -@subsection Non-Emacs Syntax Tables - -A @dfn{syntax table} is an array indexed by the characters in your -character set. In the @sc{ascii} encoding, therefore, a syntax table -has 256 elements. Regex always uses a @code{char *} variable -@code{re_syntax_table} as its syntax table. In some cases, it -initializes this variable and in others it expects you to initialize it. - -@itemize @bullet -@item -If Regex is compiled with the preprocessor symbols @code{emacs} and -@code{SYNTAX_TABLE} both undefined, then Regex allocates -@code{re_syntax_table} and initializes an element @var{i} either to -@code{Sword} (which it defines) if @var{i} is a letter, number, or -@samp{_}, or to zero if it's not. - -@item -If Regex is compiled with @code{emacs} undefined but @code{SYNTAX_TABLE} -defined, then Regex expects you to define a @code{char *} variable -@code{re_syntax_table} to be a valid syntax table. - -@item -@xref{Emacs Syntax Tables}, for what happens when Regex is compiled with -the preprocessor symbol @code{emacs} defined. - -@end itemize - -@node Match-word-boundary Operator, Match-within-word Operator, Non-Emacs Syntax Tables, Word Operators -@subsection The Match-word-boundary Operator (@code{\b}) - -@cindex @samp{\b} -@cindex word boundaries, matching - -This operator (represented by @samp{\b}) matches the empty string at -either the beginning or the end of a word. For example, @samp{\brat\b} -matches the separate word @samp{rat}. - -@node Match-within-word Operator, Match-beginning-of-word Operator, Match-word-boundary Operator, Word Operators -@subsection The Match-within-word Operator (@code{\B}) - -@cindex @samp{\B} - -This operator (represented by @samp{\B}) matches the empty string within -a word. For example, @samp{c\Brat\Be} matches @samp{crate}, but -@samp{dirty \Brat} doesn't match @samp{dirty rat}. - -@node Match-beginning-of-word Operator, Match-end-of-word Operator, Match-within-word Operator, Word Operators -@subsection The Match-beginning-of-word Operator (@code{\<}) - -@cindex @samp{\<} - -This operator (represented by @samp{\<}) matches the empty string at the -beginning of a word. - -@node Match-end-of-word Operator, Match-word-constituent Operator, Match-beginning-of-word Operator, Word Operators -@subsection The Match-end-of-word Operator (@code{\>}) - -@cindex @samp{\>} - -This operator (represented by @samp{\>}) matches the empty string at the -end of a word. - -@node Match-word-constituent Operator, Match-non-word-constituent Operator, Match-end-of-word Operator, Word Operators -@subsection The Match-word-constituent Operator (@code{\w}) - -@cindex @samp{\w} - -This operator (represented by @samp{\w}) matches any word-constituent -character. - -@node Match-non-word-constituent Operator, , Match-word-constituent Operator, Word Operators -@subsection The Match-non-word-constituent Operator (@code{\W}) - -@cindex @samp{\W} - -This operator (represented by @samp{\W}) matches any character that is -not word-constituent. - - -@node Buffer Operators, , Word Operators, GNU Operators -@section Buffer Operators - -Following are operators which work on buffers. In Emacs, a @dfn{buffer} -is, naturally, an Emacs buffer. For other programs, Regex considers the -entire string to be matched as the buffer. - -@menu -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' -@end menu - - -@node Match-beginning-of-buffer Operator, Match-end-of-buffer Operator, , Buffer Operators -@subsection The Match-beginning-of-buffer Operator (@code{\`}) - -@cindex @samp{\`} - -This operator (represented by @samp{\`}) matches the empty string at the -beginning of the buffer. - -@node Match-end-of-buffer Operator, , Match-beginning-of-buffer Operator, Buffer Operators -@subsection The Match-end-of-buffer Operator (@code{\'}) - -@cindex @samp{\'} - -This operator (represented by @samp{\'}) matches the empty string at the -end of the buffer. - - -@node GNU Emacs Operators, What Gets Matched?, GNU Operators, Top -@chapter GNU Emacs Operators - -Following are operators that @sc{gnu} defines (and @sc{posix} doesn't) -that you can use only when Regex is compiled with the preprocessor -symbol @code{emacs} defined. - -@menu -* Syntactic Class Operators:: -@end menu - - -@node Syntactic Class Operators, , , GNU Emacs Operators -@section Syntactic Class Operators - -The operators in this section require Regex to recognize the syntactic -classes of characters. Regex uses a syntax table to determine this. - -@menu -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS -@end menu - -@node Emacs Syntax Tables, Match-syntactic-class Operator, , Syntactic Class Operators -@subsection Emacs Syntax Tables - -A @dfn{syntax table} is an array indexed by the characters in your -character set. In the @sc{ascii} encoding, therefore, a syntax table -has 256 elements. - -If Regex is compiled with the preprocessor symbol @code{emacs} defined, -then Regex expects you to define and initialize the variable -@code{re_syntax_table} to be an Emacs syntax table. Emacs' syntax -tables are more complicated than Regex's own (@pxref{Non-Emacs Syntax -Tables}). @xref{Syntax, , Syntax, emacs, The GNU Emacs User's Manual}, -for a description of Emacs' syntax tables. - -@node Match-syntactic-class Operator, Match-not-syntactic-class Operator, Emacs Syntax Tables, Syntactic Class Operators -@subsection The Match-syntactic-class Operator (@code{\s}@var{class}) - -@cindex @samp{\s} - -This operator matches any character whose syntactic class is represented -by a specified character. @samp{\s@var{class}} represents this operator -where @var{class} is the character representing the syntactic class you -want. For example, @samp{w} represents the syntactic -class of word-constituent characters, so @samp{\sw} matches any -word-constituent character. - -@node Match-not-syntactic-class Operator, , Match-syntactic-class Operator, Syntactic Class Operators -@subsection The Match-not-syntactic-class Operator (@code{\S}@var{class}) - -@cindex @samp{\S} - -This operator is similar to the match-syntactic-class operator except -that it matches any character whose syntactic class is @emph{not} -represented by the specified character. @samp{\S@var{class}} represents -this operator. For example, @samp{w} represents the syntactic class of -word-constituent characters, so @samp{\Sw} matches any character that is -not word-constituent. - - -@node What Gets Matched?, Programming with Regex, GNU Emacs Operators, Top -@chapter What Gets Matched? - -Regex usually matches strings according to the ``leftmost longest'' -rule; that is, it chooses the longest of the leftmost matches. This -does not mean that for a regular expression containing subexpressions -that it simply chooses the longest match for each subexpression, left to -right; the overall match must also be the longest possible one. - -For example, @samp{(ac*)(c*d[ac]*)\1} matches @samp{acdacaaa}, not -@samp{acdac}, as it would if it were to choose the longest match for the -first subexpression. - - -@node Programming with Regex, Copying, What Gets Matched?, Top -@chapter Programming with Regex - -Here we describe how you use the Regex data structures and functions in -C programs. Regex has three interfaces: one designed for @sc{gnu}, one -compatible with @sc{posix} and one compatible with Berkeley @sc{unix}. - -@menu -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: -@end menu - - -@node GNU Regex Functions, POSIX Regex Functions, , Programming with Regex -@section GNU Regex Functions - -If you're writing code that doesn't need to be compatible with either -@sc{posix} or Berkeley @sc{unix}, you can use these functions. They -provide more options than the other interfaces. - -@menu -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () -@end menu - - -@node GNU Pattern Buffers, GNU Regular Expression Compiling, , GNU Regex Functions -@subsection GNU Pattern Buffers - -@cindex pattern buffer, definition of -@tindex re_pattern_buffer @r{definition} -@tindex struct re_pattern_buffer @r{definition} - -To compile, match, or search for a given regular expression, you must -supply a pattern buffer. A @dfn{pattern buffer} holds one compiled -regular expression.@footnote{Regular expressions are also referred to as -``patterns,'' hence the name ``pattern buffer.''} - -You can have several different pattern buffers simultaneously, each -holding a compiled pattern for a different regular expression. - -@file{regex.h} defines the pattern buffer @code{struct} as follows: - -@example -[[[ pattern_buffer ]]] -@end example - - -@node GNU Regular Expression Compiling, GNU Matching, GNU Pattern Buffers, GNU Regex Functions -@subsection GNU Regular Expression Compiling - -In @sc{gnu}, you can both match and search for a given regular -expression. To do either, you must first compile it in a pattern buffer -(@pxref{GNU Pattern Buffers}). - -@cindex syntax initialization -@vindex re_syntax_options @r{initialization} -Regular expressions match according to the syntax with which they were -compiled; with @sc{gnu}, you indicate what syntax you want by setting -the variable @code{re_syntax_options} (declared in @file{regex.h} and -defined in @file{regex.c}) before calling the compiling function, -@code{re_compile_pattern} (see below). @xref{Syntax Bits}, and -@ref{Predefined Syntaxes}. - -You can change the value of @code{re_syntax_options} at any time. -Usually, however, you set its value once and then never change it. - -@cindex pattern buffer initialization -@code{re_compile_pattern} takes a pattern buffer as an argument. You -must initialize the following fields: - -@table @code - -@item translate @r{initialization} - -@item translate -@vindex translate @r{initialization} -Initialize this to point to a translate table if you want one, or to -zero if you don't. We explain translate tables in @ref{GNU Translate -Tables}. - -@item fastmap -@vindex fastmap @r{initialization} -Initialize this to nonzero if you want a fastmap, or to zero if you -don't. - -@item buffer -@itemx allocated -@vindex buffer @r{initialization} -@vindex allocated @r{initialization} -@findex malloc -If you want @code{re_compile_pattern} to allocate memory for the -compiled pattern, set both of these to zero. If you have an existing -block of memory (allocated with @code{malloc}) you want Regex to use, -set @code{buffer} to its address and @code{allocated} to its size (in -bytes). - -@code{re_compile_pattern} uses @code{realloc} to extend the space for -the compiled pattern as necessary. - -@end table - -To compile a pattern buffer, use: - -@findex re_compile_pattern -@example -char * -re_compile_pattern (const char *@var{regex}, const int @var{regex_size}, - struct re_pattern_buffer *@var{pattern_buffer}) -@end example - -@noindent -@var{regex} is the regular expression's address, @var{regex_size} is its -length, and @var{pattern_buffer} is the pattern buffer's address. - -If @code{re_compile_pattern} successfully compiles the regular -expression, it returns zero and sets @code{*@var{pattern_buffer}} to the -compiled pattern. It sets the pattern buffer's fields as follows: - -@table @code -@item buffer -@vindex buffer @r{field, set by @code{re_compile_pattern}} -to the compiled pattern. - -@item used -@vindex used @r{field, set by @code{re_compile_pattern}} -to the number of bytes the compiled pattern in @code{buffer} occupies. - -@item syntax -@vindex syntax @r{field, set by @code{re_compile_pattern}} -to the current value of @code{re_syntax_options}. - -@item re_nsub -@vindex re_nsub @r{field, set by @code{re_compile_pattern}} -to the number of subexpressions in @var{regex}. - -@item fastmap_accurate -@vindex fastmap_accurate @r{field, set by @code{re_compile_pattern}} -to zero on the theory that the pattern you're compiling is different -than the one previously compiled into @code{buffer}; in that case (since -you can't make a fastmap without a compiled pattern), -@code{fastmap} would either contain an incompatible fastmap, or nothing -at all. - -@c xx what else? -@end table - -If @code{re_compile_pattern} can't compile @var{regex}, it returns an -error string corresponding to one of the errors listed in @ref{POSIX -Regular Expression Compiling}. - - -@node GNU Matching, GNU Searching, GNU Regular Expression Compiling, GNU Regex Functions -@subsection GNU Matching - -@cindex matching with GNU functions - -Matching the @sc{gnu} way means trying to match as much of a string as -possible starting at a position within it you specify. Once you've compiled -a pattern into a pattern buffer (@pxref{GNU Regular Expression -Compiling}), you can ask the matcher to match that pattern against a -string using: - -@findex re_match -@example -int -re_match (struct re_pattern_buffer *@var{pattern_buffer}, - const char *@var{string}, const int @var{size}, - const int @var{start}, struct re_registers *@var{regs}) -@end example - -@noindent -@var{pattern_buffer} is the address of a pattern buffer containing a -compiled pattern. @var{string} is the string you want to match; it can -contain newline and null characters. @var{size} is the length of that -string. @var{start} is the string index at which you want to -begin matching; the first character of @var{string} is at index zero. -@xref{Using Registers}, for a explanation of @var{regs}; you can safely -pass zero. - -@code{re_match} matches the regular expression in @var{pattern_buffer} -against the string @var{string} according to the syntax in -@var{pattern_buffers}'s @code{syntax} field. (@xref{GNU Regular -Expression Compiling}, for how to set it.) The function returns -@math{-1} if the compiled pattern does not match any part of -@var{string} and @math{-2} if an internal error happens; otherwise, it -returns how many (possibly zero) characters of @var{string} the pattern -matched. - -An example: suppose @var{pattern_buffer} points to a pattern buffer -containing the compiled pattern for @samp{a*}, and @var{string} points -to @samp{aaaaab} (whereupon @var{size} should be 6). Then if @var{start} -is 2, @code{re_match} returns 3, i.e., @samp{a*} would have matched the -last three @samp{a}s in @var{string}. If @var{start} is 0, -@code{re_match} returns 5, i.e., @samp{a*} would have matched all the -@samp{a}s in @var{string}. If @var{start} is either 5 or 6, it returns -zero. - -If @var{start} is not between zero and @var{size}, then -@code{re_match} returns @math{-1}. - - -@node GNU Searching, Matching/Searching with Split Data, GNU Matching, GNU Regex Functions -@subsection GNU Searching - -@cindex searching with GNU functions - -@dfn{Searching} means trying to match starting at successive positions -within a string. The function @code{re_search} does this. - -Before calling @code{re_search}, you must compile your regular -expression. @xref{GNU Regular Expression Compiling}. - -Here is the function declaration: - -@findex re_search -@example -int -re_search (struct re_pattern_buffer *@var{pattern_buffer}, - const char *@var{string}, const int @var{size}, - const int @var{start}, const int @var{range}, - struct re_registers *@var{regs}) -@end example - -@noindent -@vindex start @r{argument to @code{re_search}} -@vindex range @r{argument to @code{re_search}} -whose arguments are the same as those to @code{re_match} (@pxref{GNU -Matching}) except that the two arguments @var{start} and @var{range} -replace @code{re_match}'s argument @var{start}. - -If @var{range} is positive, then @code{re_search} attempts a match -starting first at index @var{start}, then at @math{@var{start} + 1} if -that fails, and so on, up to @math{@var{start} + @var{range}}; if -@var{range} is negative, then it attempts a match starting first at -index @var{start}, then at @math{@var{start} -1} if that fails, and so -on. - -If @var{start} is not between zero and @var{size}, then @code{re_search} -returns @math{-1}. When @var{range} is positive, @code{re_search} -adjusts @var{range} so that @math{@var{start} + @var{range} - 1} is -between zero and @var{size}, if necessary; that way it won't search -outside of @var{string}. Similarly, when @var{range} is negative, -@code{re_search} adjusts @var{range} so that @math{@var{start} + -@var{range} + 1} is between zero and @var{size}, if necessary. - -If the @code{fastmap} field of @var{pattern_buffer} is zero, -@code{re_search} matches starting at consecutive positions; otherwise, -it uses @code{fastmap} to make the search more efficient. -@xref{Searching with Fastmaps}. - -If no match is found, @code{re_search} returns @math{-1}. If -a match is found, it returns the index where the match began. If an -internal error happens, it returns @math{-2}. - - -@node Matching/Searching with Split Data, Searching with Fastmaps, GNU Searching, GNU Regex Functions -@subsection Matching and Searching with Split Data - -Using the functions @code{re_match_2} and @code{re_search_2}, you can -match or search in data that is divided into two strings. - -The function: - -@findex re_match_2 -@example -int -re_match_2 (struct re_pattern_buffer *@var{buffer}, - const char *@var{string1}, const int @var{size1}, - const char *@var{string2}, const int @var{size2}, - const int @var{start}, - struct re_registers *@var{regs}, - const int @var{stop}) -@end example - -@noindent -is similar to @code{re_match} (@pxref{GNU Matching}) except that you -pass @emph{two} data strings and sizes, and an index @var{stop} beyond -which you don't want the matcher to try matching. As with -@code{re_match}, if it succeeds, @code{re_match_2} returns how many -characters of @var{string} it matched. Regard @var{string1} and -@var{string2} as concatenated when you set the arguments @var{start} and -@var{stop} and use the contents of @var{regs}; @code{re_match_2} never -returns a value larger than @math{@var{size1} + @var{size2}}. - -The function: - -@findex re_search_2 -@example -int -re_search_2 (struct re_pattern_buffer *@var{buffer}, - const char *@var{string1}, const int @var{size1}, - const char *@var{string2}, const int @var{size2}, - const int @var{start}, const int @var{range}, - struct re_registers *@var{regs}, - const int @var{stop}) -@end example - -@noindent -is similarly related to @code{re_search}. - - -@node Searching with Fastmaps, GNU Translate Tables, Matching/Searching with Split Data, GNU Regex Functions -@subsection Searching with Fastmaps - -@cindex fastmaps -If you're searching through a long string, you should use a fastmap. -Without one, the searcher tries to match at consecutive positions in the -string. Generally, most of the characters in the string could not start -a match. It takes much longer to try matching at a given position in the -string than it does to check in a table whether or not the character at -that position could start a match. A @dfn{fastmap} is such a table. - -More specifically, a fastmap is an array indexed by the characters in -your character set. Under the @sc{ascii} encoding, therefore, a fastmap -has 256 elements. If you want the searcher to use a fastmap with a -given pattern buffer, you must allocate the array and assign the array's -address to the pattern buffer's @code{fastmap} field. You either can -compile the fastmap yourself or have @code{re_search} do it for you; -when @code{fastmap} is nonzero, it automatically compiles a fastmap the -first time you search using a particular compiled pattern. - -To compile a fastmap yourself, use: - -@findex re_compile_fastmap -@example -int -re_compile_fastmap (struct re_pattern_buffer *@var{pattern_buffer}) -@end example - -@noindent -@var{pattern_buffer} is the address of a pattern buffer. If the -character @var{c} could start a match for the pattern, -@code{re_compile_fastmap} makes -@code{@var{pattern_buffer}->fastmap[@var{c}]} nonzero. It returns -@math{0} if it can compile a fastmap and @math{-2} if there is an -internal error. For example, if @samp{|} is the alternation operator -and @var{pattern_buffer} holds the compiled pattern for @samp{a|b}, then -@code{re_compile_fastmap} sets @code{fastmap['a']} and -@code{fastmap['b']} (and no others). - -@code{re_search} uses a fastmap as it moves along in the string: it -checks the string's characters until it finds one that's in the fastmap. -Then it tries matching at that character. If the match fails, it -repeats the process. So, by using a fastmap, @code{re_search} doesn't -waste time trying to match at positions in the string that couldn't -start a match. - -If you don't want @code{re_search} to use a fastmap, -store zero in the @code{fastmap} field of the pattern buffer before -calling @code{re_search}. - -Once you've initialized a pattern buffer's @code{fastmap} field, you -need never do so again---even if you compile a new pattern in -it---provided the way the field is set still reflects whether or not you -want a fastmap. @code{re_search} will still either do nothing if -@code{fastmap} is null or, if it isn't, compile a new fastmap for the -new pattern. - -@node GNU Translate Tables, Using Registers, Searching with Fastmaps, GNU Regex Functions -@subsection GNU Translate Tables - -If you set the @code{translate} field of a pattern buffer to a translate -table, then the @sc{gnu} Regex functions to which you've passed that -pattern buffer use it to apply a simple transformation -to all the regular expression and string characters at which they look. - -A @dfn{translate table} is an array indexed by the characters in your -character set. Under the @sc{ascii} encoding, therefore, a translate -table has 256 elements. The array's elements are also characters in -your character set. When the Regex functions see a character @var{c}, -they use @code{translate[@var{c}]} in its place, with one exception: the -character after a @samp{\} is not translated. (This ensures that, the -operators, e.g., @samp{\B} and @samp{\b}, are always distinguishable.) - -For example, a table that maps all lowercase letters to the -corresponding uppercase ones would cause the matcher to ignore -differences in case.@footnote{A table that maps all uppercase letters to -the corresponding lowercase ones would work just as well for this -purpose.} Such a table would map all characters except lowercase letters -to themselves, and lowercase letters to the corresponding uppercase -ones. Under the @sc{ascii} encoding, here's how you could initialize -such a table (we'll call it @code{case_fold}): - -@example -for (i = 0; i < 256; i++) - case_fold[i] = i; -for (i = 'a'; i <= 'z'; i++) - case_fold[i] = i - ('a' - 'A'); -@end example - -You tell Regex to use a translate table on a given pattern buffer by -assigning that table's address to the @code{translate} field of that -buffer. If you don't want Regex to do any translation, put zero into -this field. You'll get weird results if you change the table's contents -anytime between compiling the pattern buffer, compiling its fastmap, and -matching or searching with the pattern buffer. - -@node Using Registers, Freeing GNU Pattern Buffers, GNU Translate Tables, GNU Regex Functions -@subsection Using Registers - -A group in a regular expression can match a (posssibly empty) substring -of the string that regular expression as a whole matched. The matcher -remembers the beginning and end of the substring matched by -each group. - -To find out what they matched, pass a nonzero @var{regs} argument to a -@sc{gnu} matching or searching function (@pxref{GNU Matching} and -@ref{GNU Searching}), i.e., the address of a structure of this type, as -defined in @file{regex.h}: - -@c We don't bother to include this directly from regex.h, -@c since it changes so rarely. -@example -@tindex re_registers -@vindex num_regs @r{in @code{struct re_registers}} -@vindex start @r{in @code{struct re_registers}} -@vindex end @r{in @code{struct re_registers}} -struct re_registers -@{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -@}; -@end example - -Except for (possibly) the @var{num_regs}'th element (see below), the -@var{i}th element of the @code{start} and @code{end} arrays records -information about the @var{i}th group in the pattern. (They're declared -as C pointers, but this is only because not all C compilers accept -zero-length arrays; conceptually, it is simplest to think of them as -arrays.) - -The @code{start} and @code{end} arrays are allocated in various ways, -depending on the value of the @code{regs_allocated} -@vindex regs_allocated -field in the pattern buffer passed to the matcher. - -The simplest and perhaps most useful is to let the matcher (re)allocate -enough space to record information for all the groups in the regular -expression. If @code{regs_allocated} is @code{REGS_UNALLOCATED}, -@vindex REGS_UNALLOCATED -the matcher allocates @math{1 + @var{re_nsub}} (another field in the -pattern buffer; @pxref{GNU Pattern Buffers}). The extra element is set -to @math{-1}, and sets @code{regs_allocated} to @code{REGS_REALLOCATE}. -@vindex REGS_REALLOCATE -Then on subsequent calls with the same pattern buffer and @var{regs} -arguments, the matcher reallocates more space if necessary. - -It would perhaps be more logical to make the @code{regs_allocated} field -part of the @code{re_registers} structure, instead of part of the -pattern buffer. But in that case the caller would be forced to -initialize the structure before passing it. Much existing code doesn't -do this initialization, and it's arguably better to avoid it anyway. - -@code{re_compile_pattern} sets @code{regs_allocated} to -@code{REGS_UNALLOCATED}, -so if you use the GNU regular expression -functions, you get this behavior by default. - -xx document re_set_registers - -@sc{posix}, on the other hand, requires a different interface: the -caller is supposed to pass in a fixed-length array which the matcher -fills. Therefore, if @code{regs_allocated} is @code{REGS_FIXED} -@vindex REGS_FIXED -the matcher simply fills that array. - -The following examples illustrate the information recorded in the -@code{re_registers} structure. (In all of them, @samp{(} represents the -open-group and @samp{)} the close-group operator. The first character -in the string @var{string} is at index 0.) - -@c xx i'm not sure this is all true anymore. - -@itemize @bullet - -@item -If the regular expression has an @w{@var{i}-th} -group not contained within another group that matches a -substring of @var{string}, then the function sets -@code{@w{@var{regs}->}start[@var{i}]} to the index in @var{string} where -the substring matched by the @w{@var{i}-th} group begins, and -@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that -substring's end. The function sets @code{@w{@var{regs}->}start[0]} and -@code{@w{@var{regs}->}end[0]} to analogous information about the entire -pattern. - -For example, when you match @samp{((a)(b))} against @samp{ab}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]} - -@item -0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]} - -@item -1 in @code{@w{@var{regs}->}start[3]} and 2 in @code{@w{@var{regs}->}end[3]} -@end itemize - -@item -If a group matches more than once (as it might if followed by, -e.g., a repetition operator), then the function reports the information -about what the group @emph{last} matched. - -For example, when you match the pattern @samp{(a)*} against the string -@samp{aa}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]} - -@item -1 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]} -@end itemize - -@item -If the @w{@var{i}-th} group does not participate in a -successful match, e.g., it is an alternative not taken or a -repetition operator allows zero repetitions of it, then the function -sets @code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}. - -For example, when you match the pattern @samp{(a)*b} against -the string @samp{b}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]} -@end itemize - -@item -If the @w{@var{i}-th} group matches a zero-length string, then the -function sets @code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that -zero-length string. - -For example, when you match the pattern @samp{(a*)b} against the string -@samp{b}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]} -@end itemize - -@ignore -The function sets @code{@w{@var{regs}->}start[0]} and -@code{@w{@var{regs}->}end[0]} to analogous information about the entire -pattern. - -For example, when you match the pattern @samp{(a*)} against the empty -string, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 0 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]} -@end itemize -@end ignore - -@item -If an @w{@var{i}-th} group contains a @w{@var{j}-th} group -in turn not contained within any other group within group @var{i} and -the function reports a match of the @w{@var{i}-th} group, then it -records in @code{@w{@var{regs}->}start[@var{j}]} and -@code{@w{@var{regs}->}end[@var{j}]} the last match (if it matched) of -the @w{@var{j}-th} group. - -For example, when you match the pattern @samp{((a*)b)*} against the -string @samp{abb}, @w{group 2} last matches the empty string, so you -get what it previously matched: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]} - -@item -2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]} - -@item -2 in @code{@w{@var{regs}->}start[2]} and 2 in @code{@w{@var{regs}->}end[2]} -@end itemize - -When you match the pattern @samp{((a)*b)*} against the string -@samp{abb}, @w{group 2} doesn't participate in the last match, so you -get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]} - -@item -2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]} - -@item -0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]} -@end itemize - -@item -If an @w{@var{i}-th} group contains a @w{@var{j}-th} group -in turn not contained within any other group within group @var{i} -and the function sets -@code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}, then it also sets -@code{@w{@var{regs}->}start[@var{j}]} and -@code{@w{@var{regs}->}end[@var{j}]} to @math{-1}. - -For example, when you match the pattern @samp{((a)*b)*c} against the -string @samp{c}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[2]} and @math{-1} in @code{@w{@var{regs}->}end[2]} -@end itemize - -@end itemize - -@node Freeing GNU Pattern Buffers, , Using Registers, GNU Regex Functions -@subsection Freeing GNU Pattern Buffers - -To free any allocated fields of a pattern buffer, you can use the -@sc{posix} function described in @ref{Freeing POSIX Pattern Buffers}, -since the type @code{regex_t}---the type for @sc{posix} pattern -buffers---is equivalent to the type @code{re_pattern_buffer}. After -freeing a pattern buffer, you need to again compile a regular expression -in it (@pxref{GNU Regular Expression Compiling}) before passing it to -a matching or searching function. - - -@node POSIX Regex Functions, BSD Regex Functions, GNU Regex Functions, Programming with Regex -@section POSIX Regex Functions - -If you're writing code that has to be @sc{posix} compatible, you'll need -to use these functions. Their interfaces are as specified by @sc{posix}, -draft 1003.2/D11.2. - -@menu -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () -@end menu - - -@node POSIX Pattern Buffers, POSIX Regular Expression Compiling, , POSIX Regex Functions -@subsection POSIX Pattern Buffers - -To compile or match a given regular expression the @sc{posix} way, you -must supply a pattern buffer exactly the way you do for @sc{gnu} -(@pxref{GNU Pattern Buffers}). @sc{posix} pattern buffers have type -@code{regex_t}, which is equivalent to the @sc{gnu} pattern buffer -type @code{re_pattern_buffer}. - - -@node POSIX Regular Expression Compiling, POSIX Matching, POSIX Pattern Buffers, POSIX Regex Functions -@subsection POSIX Regular Expression Compiling - -With @sc{posix}, you can only search for a given regular expression; you -can't match it. To do this, you must first compile it in a -pattern buffer, using @code{regcomp}. - -@ignore -Before calling @code{regcomp}, you must initialize this pattern buffer -as you do for @sc{gnu} (@pxref{GNU Regular Expression Compiling}). See -below, however, for how to choose a syntax with which to compile. -@end ignore - -To compile a pattern buffer, use: - -@findex regcomp -@example -int -regcomp (regex_t *@var{preg}, const char *@var{regex}, int @var{cflags}) -@end example - -@noindent -@var{preg} is the initialized pattern buffer's address, @var{regex} is -the regular expression's address, and @var{cflags} is the compilation -flags, which Regex considers as a collection of bits. Here are the -valid bits, as defined in @file{regex.h}: - -@table @code - -@item REG_EXTENDED -@vindex REG_EXTENDED -says to use @sc{posix} Extended Regular Expression syntax; if this isn't -set, then says to use @sc{posix} Basic Regular Expression syntax. -@code{regcomp} sets @var{preg}'s @code{syntax} field accordingly. - -@item REG_ICASE -@vindex REG_ICASE -@cindex ignoring case -says to ignore case; @code{regcomp} sets @var{preg}'s @code{translate} -field to a translate table which ignores case, replacing anything you've -put there before. - -@item REG_NOSUB -@vindex REG_NOSUB -says to set @var{preg}'s @code{no_sub} field; @pxref{POSIX Matching}, -for what this means. - -@item REG_NEWLINE -@vindex REG_NEWLINE -says that a: - -@itemize @bullet - -@item -match-any-character operator (@pxref{Match-any-character -Operator}) doesn't match a newline. - -@item -nonmatching list not containing a newline (@pxref{List -Operators}) matches a newline. - -@item -match-beginning-of-line operator (@pxref{Match-beginning-of-line -Operator}) matches the empty string immediately after a newline, -regardless of how @code{REG_NOTBOL} is set (@pxref{POSIX Matching}, for -an explanation of @code{REG_NOTBOL}). - -@item -match-end-of-line operator (@pxref{Match-beginning-of-line -Operator}) matches the empty string immediately before a newline, -regardless of how @code{REG_NOTEOL} is set (@pxref{POSIX Matching}, -for an explanation of @code{REG_NOTEOL}). - -@end itemize - -@end table - -If @code{regcomp} successfully compiles the regular expression, it -returns zero and sets @code{*@var{pattern_buffer}} to the compiled -pattern. Except for @code{syntax} (which it sets as explained above), it -also sets the same fields the same way as does the @sc{gnu} compiling -function (@pxref{GNU Regular Expression Compiling}). - -If @code{regcomp} can't compile the regular expression, it returns one -of the error codes listed here. (Except when noted differently, the -syntax of in all examples below is basic regular expression syntax.) - -@table @code - -@comment repetitions -@item REG_BADRPT -For example, the consecutive repetition operators @samp{**} in -@samp{a**} are invalid. As another example, if the syntax is extended -regular expression syntax, then the repetition operator @samp{*} with -nothing on which to operate in @samp{*} is invalid. - -@item REG_BADBR -For example, the @var{count} @samp{-1} in @samp{a\@{-1} is invalid. - -@item REG_EBRACE -For example, @samp{a\@{1} is missing a close-interval operator. - -@comment lists -@item REG_EBRACK -For example, @samp{[a} is missing a close-list operator. - -@item REG_ERANGE -For example, the range ending point @samp{z} that collates lower than -does its starting point @samp{a} in @samp{[z-a]} is invalid. Also, the -range with the character class @samp{[:alpha:]} as its starting point in -@samp{[[:alpha:]-|]}. - -@item REG_ECTYPE -For example, the character class name @samp{foo} in @samp{[[:foo:]} is -invalid. - -@comment groups -@item REG_EPAREN -For example, @samp{a\)} is missing an open-group operator and @samp{\(a} -is missing a close-group operator. - -@item REG_ESUBREG -For example, the back reference @samp{\2} that refers to a nonexistent -subexpression in @samp{\(a\)\2} is invalid. - -@comment unfinished business - -@item REG_EEND -Returned when a regular expression causes no other more specific error. - -@item REG_EESCAPE -For example, the trailing backslash @samp{\} in @samp{a\} is invalid, as is the -one in @samp{\}. - -@comment kitchen sink -@item REG_BADPAT -For example, in the extended regular expression syntax, the empty group -@samp{()} in @samp{a()b} is invalid. - -@comment internal -@item REG_ESIZE -Returned when a regular expression needs a pattern buffer larger than -65536 bytes. - -@item REG_ESPACE -Returned when a regular expression makes Regex to run out of memory. - -@end table - - -@node POSIX Matching, Reporting Errors, POSIX Regular Expression Compiling, POSIX Regex Functions -@subsection POSIX Matching - -Matching the @sc{posix} way means trying to match a null-terminated -string starting at its first character. Once you've compiled a pattern -into a pattern buffer (@pxref{POSIX Regular Expression Compiling}), you -can ask the matcher to match that pattern against a string using: - -@findex regexec -@example -int -regexec (const regex_t *@var{preg}, const char *@var{string}, - size_t @var{nmatch}, regmatch_t @var{pmatch}[], int @var{eflags}) -@end example - -@noindent -@var{preg} is the address of a pattern buffer for a compiled pattern. -@var{string} is the string you want to match. - -@xref{Using Byte Offsets}, for an explanation of @var{pmatch}. If you -pass zero for @var{nmatch} or you compiled @var{preg} with the -compilation flag @code{REG_NOSUB} set, then @code{regexec} will ignore -@var{pmatch}; otherwise, you must allocate it to have at least -@var{nmatch} elements. @code{regexec} will record @var{nmatch} byte -offsets in @var{pmatch}, and set to @math{-1} any unused elements up to -@math{@var{pmatch}@code{[@var{nmatch}]} - 1}. - -@var{eflags} specifies @dfn{execution flags}---namely, the two bits -@code{REG_NOTBOL} and @code{REG_NOTEOL} (defined in @file{regex.h}). If -you set @code{REG_NOTBOL}, then the match-beginning-of-line operator -(@pxref{Match-beginning-of-line Operator}) always fails to match. -This lets you match against pieces of a line, as you would need to if, -say, searching for repeated instances of a given pattern in a line; it -would work correctly for patterns both with and without -match-beginning-of-line operators. @code{REG_NOTEOL} works analogously -for the match-end-of-line operator (@pxref{Match-end-of-line -Operator}); it exists for symmetry. - -@code{regexec} tries to find a match for @var{preg} in @var{string} -according to the syntax in @var{preg}'s @code{syntax} field. -(@xref{POSIX Regular Expression Compiling}, for how to set it.) The -function returns zero if the compiled pattern matches @var{string} and -@code{REG_NOMATCH} (defined in @file{regex.h}) if it doesn't. - -@node Reporting Errors, Using Byte Offsets, POSIX Matching, POSIX Regex Functions -@subsection Reporting Errors - -If either @code{regcomp} or @code{regexec} fail, they return a nonzero -error code, the possibilities for which are defined in @file{regex.h}. -@xref{POSIX Regular Expression Compiling}, and @ref{POSIX Matching}, for -what these codes mean. To get an error string corresponding to these -codes, you can use: - -@findex regerror -@example -size_t -regerror (int @var{errcode}, - const regex_t *@var{preg}, - char *@var{errbuf}, - size_t @var{errbuf_size}) -@end example - -@noindent -@var{errcode} is an error code, @var{preg} is the address of the pattern -buffer which provoked the error, @var{errbuf} is the error buffer, and -@var{errbuf_size} is @var{errbuf}'s size. - -@code{regerror} returns the size in bytes of the error string -corresponding to @var{errcode} (including its terminating null). If -@var{errbuf} and @var{errbuf_size} are nonzero, it also returns in -@var{errbuf} the first @math{@var{errbuf_size} - 1} characters of the -error string, followed by a null. -@var{errbuf_size} must be a nonnegative number less than or equal to the -size in bytes of @var{errbuf}. - -You can call @code{regerror} with a null @var{errbuf} and a zero -@var{errbuf_size} to determine how large @var{errbuf} need be to -accommodate @code{regerror}'s error string. - -@node Using Byte Offsets, Freeing POSIX Pattern Buffers, Reporting Errors, POSIX Regex Functions -@subsection Using Byte Offsets - -In @sc{posix}, variables of type @code{regmatch_t} hold analogous -information, but are not identical to, @sc{gnu}'s registers (@pxref{Using -Registers}). To get information about registers in @sc{posix}, pass to -@code{regexec} a nonzero @var{pmatch} of type @code{regmatch_t}, i.e., -the address of a structure of this type, defined in -@file{regex.h}: - -@tindex regmatch_t -@example -typedef struct -@{ - regoff_t rm_so; - regoff_t rm_eo; -@} regmatch_t; -@end example - -When reading in @ref{Using Registers}, about how the matching function -stores the information into the registers, substitute @var{pmatch} for -@var{regs}, @code{@w{@var{pmatch}[@var{i}]->}rm_so} for -@code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{pmatch}[@var{i}]->}rm_eo} for -@code{@w{@var{regs}->}end[@var{i}]}. - -@node Freeing POSIX Pattern Buffers, , Using Byte Offsets, POSIX Regex Functions -@subsection Freeing POSIX Pattern Buffers - -To free any allocated fields of a pattern buffer, use: - -@findex regfree -@example -void -regfree (regex_t *@var{preg}) -@end example - -@noindent -@var{preg} is the pattern buffer whose allocated fields you want freed. -@code{regfree} also sets @var{preg}'s @code{allocated} and @code{used} -fields to zero. After freeing a pattern buffer, you need to again -compile a regular expression in it (@pxref{POSIX Regular Expression -Compiling}) before passing it to the matching function (@pxref{POSIX -Matching}). - - -@node BSD Regex Functions, , POSIX Regex Functions, Programming with Regex -@section BSD Regex Functions - -If you're writing code that has to be Berkeley @sc{unix} compatible, -you'll need to use these functions whose interfaces are the same as those -in Berkeley @sc{unix}. - -@menu -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () -@end menu - -@node BSD Regular Expression Compiling, BSD Searching, , BSD Regex Functions -@subsection BSD Regular Expression Compiling - -With Berkeley @sc{unix}, you can only search for a given regular -expression; you can't match one. To search for it, you must first -compile it. Before you compile it, you must indicate the regular -expression syntax you want it compiled according to by setting the -variable @code{re_syntax_options} (declared in @file{regex.h} to some -syntax (@pxref{Regular Expression Syntax}). - -To compile a regular expression use: - -@findex re_comp -@example -char * -re_comp (char *@var{regex}) -@end example - -@noindent -@var{regex} is the address of a null-terminated regular expression. -@code{re_comp} uses an internal pattern buffer, so you can use only the -most recently compiled pattern buffer. This means that if you want to -use a given regular expression that you've already compiled---but it -isn't the latest one you've compiled---you'll have to recompile it. If -you call @code{re_comp} with the null string (@emph{not} the empty -string) as the argument, it doesn't change the contents of the pattern -buffer. - -If @code{re_comp} successfully compiles the regular expression, it -returns zero. If it can't compile the regular expression, it returns -an error string. @code{re_comp}'s error messages are identical to those -of @code{re_compile_pattern} (@pxref{GNU Regular Expression -Compiling}). - -@node BSD Searching, , BSD Regular Expression Compiling, BSD Regex Functions -@subsection BSD Searching - -Searching the Berkeley @sc{unix} way means searching in a string -starting at its first character and trying successive positions within -it to find a match. Once you've compiled a pattern using @code{re_comp} -(@pxref{BSD Regular Expression Compiling}), you can ask Regex -to search for that pattern in a string using: - -@findex re_exec -@example -int -re_exec (char *@var{string}) -@end example - -@noindent -@var{string} is the address of the null-terminated string in which you -want to search. - -@code{re_exec} returns either 1 for success or 0 for failure. It -automatically uses a @sc{gnu} fastmap (@pxref{Searching with Fastmaps}). - - -@node Copying, Index, Programming with Regex, Top -@appendix GNU GENERAL PUBLIC LICENSE -@center Version 2, June 1991 - -@display -Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc. -675 Mass Ave, Cambridge, MA 02139, USA - -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. -@end display - -@unnumberedsec Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software---to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - -@iftex -@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -@end iftex -@ifinfo -@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -@end ifinfo - -@enumerate -@item -This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The ``Program'', below, -refers to any such program or work, and a ``work based on the Program'' -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term ``modification''.) Each licensee is addressed as ``you''. - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - -@item -You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - -@item -You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - -@enumerate a -@item -You must cause the modified files to carry prominent notices -stating that you changed the files and the date of any change. - -@item -You must cause any work that you distribute or publish, that in -whole or in part contains or is derived from the Program or any -part thereof, to be licensed as a whole at no charge to all third -parties under the terms of this License. - -@item -If the modified program normally reads commands interactively -when run, you must cause it, when started running for such -interactive use in the most ordinary way, to print or display an -announcement including an appropriate copyright notice and a -notice that there is no warranty (or else, saying that you provide -a warranty) and that users may redistribute the program under -these conditions, and telling the user how to view a copy of this -License. (Exception: if the Program itself is interactive but -does not normally print such an announcement, your work based on -the Program is not required to print an announcement.) -@end enumerate - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - -@item -You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - -@enumerate a -@item -Accompany it with the complete corresponding machine-readable -source code, which must be distributed under the terms of Sections -1 and 2 above on a medium customarily used for software interchange; or, - -@item -Accompany it with a written offer, valid for at least three -years, to give any third party, for a charge no more than your -cost of physically performing source distribution, a complete -machine-readable copy of the corresponding source code, to be -distributed under the terms of Sections 1 and 2 above on a medium -customarily used for software interchange; or, - -@item -Accompany it with the information you received as to the offer -to distribute corresponding source code. (This alternative is -allowed only for noncommercial distribution and only if you -received the program in object code or executable form with such -an offer, in accord with Subsection b above.) -@end enumerate - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - -@item -You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - -@item -You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - -@item -Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - -@item -If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - -@item -If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - -@item -The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and ``any -later version'', you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - -@item -If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - -@iftex -@heading NO WARRANTY -@end iftex -@ifinfo -@center NO WARRANTY -@end ifinfo - -@item -BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - -@item -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. -@end enumerate - -@iftex -@heading END OF TERMS AND CONDITIONS -@end iftex -@ifinfo -@center END OF TERMS AND CONDITIONS -@end ifinfo - -@page -@unnumberedsec Appendix: How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the ``copyright'' line and a pointer to where the full notice is found. - -@smallexample -@var{one line to give the program's name and a brief idea of what it does.} -Copyright (C) 19@var{yy} @var{name of author} - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -@end smallexample - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - -@smallexample -Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author} -Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. -This is free software, and you are welcome to redistribute it -under certain conditions; type `show c' for details. -@end smallexample - -The hypothetical commands @samp{show w} and @samp{show c} should show -the appropriate parts of the General Public License. Of course, the -commands you use may be called something other than @samp{show w} and -@samp{show c}; they could even be mouse-clicks or menu items---whatever -suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a ``copyright disclaimer'' for the program, if -necessary. Here is a sample; alter the names: - -@example -Yoyodyne, Inc., hereby disclaims all copyright interest in the program -`Gnomovision' (which makes passes at compilers) written by James Hacker. - -@var{signature of Ty Coon}, 1 April 1989 -Ty Coon, President of Vice -@end example - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. - - -@node Index, , Copying, Top -@unnumbered Index - -@printindex cp - -@contents - -@bye diff --git a/gnu/libregex/regex.c b/gnu/libregex/regex.c deleted file mode 100644 index 8169880d7e4f..000000000000 --- a/gnu/libregex/regex.c +++ /dev/null @@ -1,4948 +0,0 @@ -/* Extended regular expression matching and search library, - version 0.12. - (Implements POSIX draft P10003.2/D11.2, except for - internationalization features.) - - Copyright (C) 1993 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* AIX requires this to be the first thing in the file. */ -#if defined (_AIX) && !defined (REGEX_MALLOC) - #pragma alloca -#endif - -#define _GNU_SOURCE - -/* We need this for `regex.h', and perhaps for the Emacs include files. */ -#include <sys/types.h> - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* The `emacs' switch turns on certain matching commands - that make sense only in Emacs. */ -#ifdef emacs - -#include "lisp.h" -#include "buffer.h" -#include "syntax.h" - -/* Emacs uses `NULL' as a predicate. */ -#undef NULL - -#else /* not emacs */ - -/* We used to test for `BSTRING' here, but only GCC and Emacs define - `BSTRING', as far as I know, and neither of them use this code. */ -#if HAVE_STRING_H || STDC_HEADERS -#include <string.h> -#ifndef bcmp -#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) -#endif -#ifndef bcopy -#define bcopy(s, d, n) memcpy ((d), (s), (n)) -#endif -#ifndef bzero -#define bzero(s, n) memset ((s), 0, (n)) -#endif -#else -#include <strings.h> -#endif - -#ifdef STDC_HEADERS -#include <stdlib.h> -#else -char *malloc (); -char *realloc (); -#endif - - -/* Define the syntax stuff for \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -#ifndef Sword -#define Sword 1 -#endif - -#ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -#else /* not SYNTAX_TABLE */ - -/* How many characters in the character set. */ -#define CHAR_SET_SIZE 256 - -static char re_syntax_table[CHAR_SET_SIZE]; - -static void -init_syntax_once () -{ - register int c; - static int done = 0; - - if (done) - return; - - bzero (re_syntax_table, sizeof re_syntax_table); - - for (c = 'a'; c <= 'z'; c++) - re_syntax_table[c] = Sword; - - for (c = 'A'; c <= 'Z'; c++) - re_syntax_table[c] = Sword; - - for (c = '0'; c <= '9'; c++) - re_syntax_table[c] = Sword; - - re_syntax_table['_'] = Sword; - - done = 1; -} - -#endif /* not SYNTAX_TABLE */ - -#define SYNTAX(c) re_syntax_table[c] - -#endif /* not emacs */ - -/* Get the interface, including the syntax bits. */ -#include "regex.h" - -/* isalpha etc. are used for the character classes. */ -#include <ctype.h> - -#ifndef isascii -#define isascii(c) 1 -#endif - -#ifdef isblank -#define ISBLANK(c) (isascii (c) && isblank (c)) -#else -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -#define ISGRAPH(c) (isascii (c) && isgraph (c)) -#else -#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) -#endif - -#define ISPRINT(c) (isascii (c) && isprint (c)) -#define ISDIGIT(c) (isascii (c) && isdigit (c)) -#define ISALNUM(c) (isascii (c) && isalnum (c)) -#define ISALPHA(c) (isascii (c) && isalpha (c)) -#define ISCNTRL(c) (isascii (c) && iscntrl (c)) -#define ISLOWER(c) (isascii (c) && islower (c)) -#define ISPUNCT(c) (isascii (c) && ispunct (c)) -#define ISSPACE(c) (isascii (c) && isspace (c)) -#define ISUPPER(c) (isascii (c) && isupper (c)) -#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) - -#ifndef NULL -#define NULL 0 -#endif - -/* We remove any previous definition of `SIGN_EXTEND_CHAR', - since ours (we hope) works properly with all combinations of - machines, compilers, `char' and `unsigned char' argument types. - (Per Bothner suggested the basic approach.) */ -#undef SIGN_EXTEND_CHAR -#if __STDC__ -#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -#else /* not __STDC__ */ -/* As in Harbison and Steele. */ -#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -#endif - -/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we - use `alloca' instead of `malloc'. This is because using malloc in - re_search* or re_match* could cause memory leaks when C-g is used in - Emacs; also, malloc is slower and causes storage fragmentation. On - the other hand, malloc is more portable, and easier to debug. - - Because we sometimes use alloca, some routines have to be macros, - not functions -- `alloca'-allocated space disappears at the end of the - function it is called in. */ - -#ifdef REGEX_MALLOC - -#define REGEX_ALLOCATE malloc -#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) - -#else /* not REGEX_MALLOC */ - -/* Emacs already defines alloca, sometimes. */ -#ifndef alloca - -/* Make alloca work the best possible way. */ -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not __GNUC__ */ -#if HAVE_ALLOCA_H -#include <alloca.h> -#else /* not __GNUC__ or HAVE_ALLOCA_H */ -#ifndef _AIX /* Already did AIX, up at the top. */ -char *alloca (); -#endif /* not _AIX */ -#endif /* not HAVE_ALLOCA_H */ -#endif /* not __GNUC__ */ - -#endif /* not alloca */ - -#define REGEX_ALLOCATE alloca - -/* Assumes a `char *destination' variable. */ -#define REGEX_REALLOCATE(source, osize, nsize) \ - (destination = (char *) alloca (nsize), \ - bcopy (source, destination, osize), \ - destination) - -#endif /* not REGEX_MALLOC */ - - -/* True if `size1' is non-NULL and PTR is pointing anywhere inside - `string1' or just past its end. This works if PTR is NULL, which is - a good thing. */ -#define FIRST_STRING_P(ptr) \ - (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) - -/* (Re)Allocate N items of type T using malloc, or fail. */ -#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) - -#define BYTEWIDTH 8 /* In bits. */ - -#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) - -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -typedef char boolean; -#define false 0 -#define true 1 - -/* These are the command codes that appear in compiled regular - expressions. Some opcodes are followed by argument bytes. A - command code can specify any interpretation whatsoever for its - arguments. Zero bytes may appear in the compiled regular expression. - - The value of `exactn' is needed in search.c (search_buffer) in Emacs. - So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of - `exactn' we use here must also be 1. */ - -typedef enum -{ - no_op = 0, - - /* Followed by one byte giving n, then by n literal bytes. */ - exactn = 1, - - /* Matches any (more or less) character. */ - anychar, - - /* Matches any one char belonging to specified set. First - following byte is number of bitmap bytes. Then come bytes - for a bitmap saying which chars are in. Bits in each byte - are ordered low-bit-first. A character is in the set if its - bit is 1. A character too large to have a bit in the map is - automatically not in the set. */ - charset, - - /* Same parameters as charset, but match any character that is - not one of those specified. */ - charset_not, - - /* Start remembering the text that is matched, for storing in a - register. Followed by one byte with the register number, in - the range 0 to one less than the pattern buffer's re_nsub - field. Then followed by one byte with the number of groups - inner to this one. (This last has to be part of the - start_memory only because we need it in the on_failure_jump - of re_match_2.) */ - start_memory, - - /* Stop remembering the text that is matched and store it in a - memory register. Followed by one byte with the register - number, in the range 0 to one less than `re_nsub' in the - pattern buffer, and one byte with the number of inner groups, - just like `start_memory'. (We need the number of inner - groups here because we don't have any easy way of finding the - corresponding start_memory when we're at a stop_memory.) */ - stop_memory, - - /* Match a duplicate of something remembered. Followed by one - byte containing the register number. */ - duplicate, - - /* Fail unless at beginning of line. */ - begline, - - /* Fail unless at end of line. */ - endline, - - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ - begbuf, - - /* Analogously, for end of buffer/string. */ - endbuf, - - /* Followed by two byte relative address to which to jump. */ - jump, - - /* Same as jump, but marks the end of an alternative. */ - jump_past_alt, - - /* Followed by two-byte relative address of place to resume at - in case of failure. */ - on_failure_jump, - - /* Like on_failure_jump, but pushes a placeholder instead of the - current string position when executed. */ - on_failure_keep_string_jump, - - /* Throw away latest failure point and then jump to following - two-byte relative address. */ - pop_failure_jump, - - /* Change to pop_failure_jump if know won't have to backtrack to - match; otherwise change to jump. This is used to jump - back to the beginning of a repeat. If what follows this jump - clearly won't match what the repeat does, such that we can be - sure that there is no use backtracking out of repetitions - already matched, then we change it to a pop_failure_jump. - Followed by two-byte address. */ - maybe_pop_jump, - - /* Jump to following two-byte address, and push a dummy failure - point. This failure point will be thrown away if an attempt - is made to use it for a failure. A `+' construct makes this - before the first repeat. Also used as an intermediary kind - of jump when compiling an alternative. */ - dummy_failure_jump, - - /* Push a dummy failure point and continue. Used at the end of - alternatives. */ - push_dummy_failure, - - /* Followed by two-byte relative address and two-byte number n. - After matching N times, jump to the address upon failure. */ - succeed_n, - - /* Followed by two-byte relative address, and two-byte number n. - Jump to the address N times, then fail. */ - jump_n, - - /* Set the following two-byte relative address to the - subsequent two-byte number. The address *includes* the two - bytes of number. */ - set_number_at, - - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ - - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ - - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ - -#ifdef emacs - ,before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ - - /* Matches any character whose syntax is specified. Followed by - a byte which contains a syntax code, e.g., Sword. */ - syntaxspec, - - /* Matches any character whose syntax is not that specified. */ - notsyntaxspec -#endif /* emacs */ -} re_opcode_t; - -/* Common operations on the compiled pattern. */ - -/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ - -#define STORE_NUMBER(destination, number) \ - do { \ - (destination)[0] = (number) & 0377; \ - (destination)[1] = (number) >> 8; \ - } while (0) - -/* Same as STORE_NUMBER, except increment DESTINATION to - the byte after where the number is stored. Therefore, DESTINATION - must be an lvalue. */ - -#define STORE_NUMBER_AND_INCR(destination, number) \ - do { \ - STORE_NUMBER (destination, number); \ - (destination) += 2; \ - } while (0) - -/* Put into DESTINATION a number stored in two contiguous bytes starting - at SOURCE. */ - -#define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source) & 0377; \ - (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ - } while (0) - -#ifdef DEBUG -static void -extract_number (dest, source) - int *dest; - unsigned char *source; -{ - int temp = SIGN_EXTEND_CHAR (*(source + 1)); - *dest = *source & 0377; - *dest += temp << 8; -} - -#ifndef EXTRACT_MACROS /* To debug the macros. */ -#undef EXTRACT_NUMBER -#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. - SOURCE must be an lvalue. */ - -#define EXTRACT_NUMBER_AND_INCR(destination, source) \ - do { \ - EXTRACT_NUMBER (destination, source); \ - (source) += 2; \ - } while (0) - -#ifdef DEBUG -static void -extract_number_and_incr (destination, source) - int *destination; - unsigned char **source; -{ - extract_number (destination, *source); - *source += 2; -} - -#ifndef EXTRACT_MACROS -#undef EXTRACT_NUMBER_AND_INCR -#define EXTRACT_NUMBER_AND_INCR(dest, src) \ - extract_number_and_incr (&dest, &src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* If DEBUG is defined, Regex prints many voluminous messages about what - it is doing (if the variable `debug' is nonzero). If linked with the - main program in `iregex.c', you can enter patterns and strings - interactively. And if linked with the main program in `main.c' and - the other test files, you can run the already-written tests. */ - -#ifdef DEBUG - -/* We use standard I/O for debugging. */ -#include <stdio.h> - -/* It is useful to test things that ``must'' be true when debugging. */ -#include <assert.h> - -static int debug = 0; - -#define DEBUG_STATEMENT(e) e -#define DEBUG_PRINT1(x) if (debug) printf (x) -#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) print_partial_compiled_pattern (s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) print_double_string (w, s1, sz1, s2, sz2) - - -extern void printchar (); - -/* Print the fastmap in human-readable form. */ - -void -print_fastmap (fastmap) - char *fastmap; -{ - unsigned was_a_range = 0; - unsigned i = 0; - - while (i < (1 << BYTEWIDTH)) - { - if (fastmap[i++]) - { - was_a_range = 0; - printchar (i - 1); - while (i < (1 << BYTEWIDTH) && fastmap[i]) - { - was_a_range = 1; - i++; - } - if (was_a_range) - { - printf ("-"); - printchar (i - 1); - } - } - } - putchar ('\n'); -} - - -/* Print a compiled pattern string in human-readable form, starting at - the START pointer into it and ending just before the pointer END. */ - -void -print_partial_compiled_pattern (start, end) - unsigned char *start; - unsigned char *end; -{ - int mcnt, mcnt2; - unsigned char *p = start; - unsigned char *pend = end; - - if (start == NULL) - { - printf ("(null)\n"); - return; - } - - /* Loop over pattern commands. */ - while (p < pend) - { - switch ((re_opcode_t) *p++) - { - case no_op: - printf ("/no_op"); - break; - - case exactn: - mcnt = *p++; - printf ("/exactn/%d", mcnt); - do - { - putchar ('/'); - printchar (*p++); - } - while (--mcnt); - break; - - case start_memory: - mcnt = *p++; - printf ("/start_memory/%d/%d", mcnt, *p++); - break; - - case stop_memory: - mcnt = *p++; - printf ("/stop_memory/%d/%d", mcnt, *p++); - break; - - case duplicate: - printf ("/duplicate/%d", *p++); - break; - - case anychar: - printf ("/anychar"); - break; - - case charset: - case charset_not: - { - register int c; - - printf ("/charset%s", - (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); - - assert (p + *p < pend); - - for (c = 0; c < *p; c++) - { - unsigned bit; - unsigned char map_byte = p[1 + c]; - - putchar ('/'); - - for (bit = 0; bit < BYTEWIDTH; bit++) - if (map_byte & (1 << bit)) - printchar (c * BYTEWIDTH + bit); - } - p += 1 + *p; - break; - } - - case begline: - printf ("/begline"); - break; - - case endline: - printf ("/endline"); - break; - - case on_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_jump/0/%d", mcnt); - break; - - case on_failure_keep_string_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_keep_string_jump/0/%d", mcnt); - break; - - case dummy_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/dummy_failure_jump/0/%d", mcnt); - break; - - case push_dummy_failure: - printf ("/push_dummy_failure"); - break; - - case maybe_pop_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/maybe_pop_jump/0/%d", mcnt); - break; - - case pop_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/pop_failure_jump/0/%d", mcnt); - break; - - case jump_past_alt: - extract_number_and_incr (&mcnt, &p); - printf ("/jump_past_alt/0/%d", mcnt); - break; - - case jump: - extract_number_and_incr (&mcnt, &p); - printf ("/jump/0/%d", mcnt); - break; - - case succeed_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2); - break; - - case jump_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2); - break; - - case set_number_at: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2); - break; - - case wordbound: - printf ("/wordbound"); - break; - - case notwordbound: - printf ("/notwordbound"); - break; - - case wordbeg: - printf ("/wordbeg"); - break; - - case wordend: - printf ("/wordend"); - -#ifdef emacs - case before_dot: - printf ("/before_dot"); - break; - - case at_dot: - printf ("/at_dot"); - break; - - case after_dot: - printf ("/after_dot"); - break; - - case syntaxspec: - printf ("/syntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; - - case notsyntaxspec: - printf ("/notsyntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; -#endif /* emacs */ - - case wordchar: - printf ("/wordchar"); - break; - - case notwordchar: - printf ("/notwordchar"); - break; - - case begbuf: - printf ("/begbuf"); - break; - - case endbuf: - printf ("/endbuf"); - break; - - default: - printf ("?%d", *(p-1)); - } - } - printf ("/\n"); -} - - -void -print_compiled_pattern (bufp) - struct re_pattern_buffer *bufp; -{ - unsigned char *buffer = bufp->buffer; - - print_partial_compiled_pattern (buffer, buffer + bufp->used); - printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); - - if (bufp->fastmap_accurate && bufp->fastmap) - { - printf ("fastmap: "); - print_fastmap (bufp->fastmap); - } - - printf ("re_nsub: %d\t", bufp->re_nsub); - printf ("regs_alloc: %d\t", bufp->regs_allocated); - printf ("can_be_null: %d\t", bufp->can_be_null); - printf ("newline_anchor: %d\n", bufp->newline_anchor); - printf ("no_sub: %d\t", bufp->no_sub); - printf ("not_bol: %d\t", bufp->not_bol); - printf ("not_eol: %d\t", bufp->not_eol); - printf ("syntax: %d\n", bufp->syntax); - /* Perhaps we should print the translate table? */ -} - - -void -print_double_string (where, string1, size1, string2, size2) - const char *where; - const char *string1; - const char *string2; - int size1; - int size2; -{ - unsigned this_char; - - if (where == NULL) - printf ("(null)"); - else - { - if (FIRST_STRING_P (where)) - { - for (this_char = where - string1; this_char < size1; this_char++) - printchar (string1[this_char]); - - where = string2; - } - - for (this_char = where - string2; this_char < size2; this_char++) - printchar (string2[this_char]); - } -} - -#else /* not DEBUG */ - -#undef assert -#define assert(e) - -#define DEBUG_STATEMENT(e) -#define DEBUG_PRINT1(x) -#define DEBUG_PRINT2(x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) - -#endif /* not DEBUG */ - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; - return ret; -} - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. */ - -static const char *re_error_msg[] = - { NULL, /* REG_NOERROR */ - "No match", /* REG_NOMATCH */ - "Invalid regular expression", /* REG_BADPAT */ - "Invalid collation character", /* REG_ECOLLATE */ - "Invalid character class name", /* REG_ECTYPE */ - "Trailing backslash", /* REG_EESCAPE */ - "Invalid back reference", /* REG_ESUBREG */ - "Unmatched [ or [^", /* REG_EBRACK */ - "Unmatched ( or \\(", /* REG_EPAREN */ - "Unmatched \\{", /* REG_EBRACE */ - "Invalid content of \\{\\}", /* REG_BADBR */ - "Invalid range end", /* REG_ERANGE */ - "Memory exhausted", /* REG_ESPACE */ - "Invalid preceding regular expression", /* REG_BADRPT */ - "Premature end of regular expression", /* REG_EEND */ - "Regular expression too big", /* REG_ESIZE */ - "Unmatched ) or \\)", /* REG_ERPAREN */ - }; - -/* Subroutine declarations and macros for regex_compile. */ - -static void store_op1 (), store_op2 (); -static void insert_op1 (), insert_op2 (); -static boolean at_begline_loc_p (), at_endline_loc_p (); -static boolean group_in_compile_stack (); -static reg_errcode_t compile_range (); - -/* Fetch the next character in the uncompiled pattern---translating it - if necessary. Also cast from a signed character in the constant - string passed to us by the user to an unsigned char that we can use - as an array index (in, e.g., `translate'). */ -#define PATFETCH(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - if (translate) c = translate[c]; \ - } while (0) - -/* Fetch the next character in the uncompiled pattern, with no - translation. */ -#define PATFETCH_RAW(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - } while (0) - -/* Go backwards one character in the pattern. */ -#define PATUNFETCH p-- - - -/* If `translate' is non-null, return translate[D], else just D. We - cast the subscript to translate because some data is declared as - `char *', to avoid warnings when a string constant is passed. But - when we use a character as a subscript we must make it unsigned. */ -#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) - - -/* Macros for outputting the compiled pattern into `buffer'. */ - -/* If the buffer isn't allocated when it comes in, use this. */ -#define INIT_BUF_SIZE 32 - -/* Make sure we have at least N more bytes of space in buffer. */ -#define GET_BUFFER_SPACE(n) \ - while (b - bufp->buffer + (n) > bufp->allocated) \ - EXTEND_BUFFER () - -/* Make sure we have one more byte of buffer space and then add C to it. */ -#define BUF_PUSH(c) \ - do { \ - GET_BUFFER_SPACE (1); \ - *b++ = (unsigned char) (c); \ - } while (0) - - -/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ -#define BUF_PUSH_2(c1, c2) \ - do { \ - GET_BUFFER_SPACE (2); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - } while (0) - - -/* As with BUF_PUSH_2, except for three bytes. */ -#define BUF_PUSH_3(c1, c2, c3) \ - do { \ - GET_BUFFER_SPACE (3); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - *b++ = (unsigned char) (c3); \ - } while (0) - - -/* Store a jump with opcode OP at LOC to location TO. We store a - relative address offset by the three bytes the jump itself occupies. */ -#define STORE_JUMP(op, loc, to) \ - store_op1 (op, loc, (to) - (loc) - 3) - -/* Likewise, for a two-argument jump. */ -#define STORE_JUMP2(op, loc, to, arg) \ - store_op2 (op, loc, (to) - (loc) - 3, arg) - -/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP(op, loc, to) \ - insert_op1 (op, loc, (to) - (loc) - 3, b) - -/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP2(op, loc, to, arg) \ - insert_op2 (op, loc, (to) - (loc) - 3, arg, b) - - -/* This is not an arbitrary limit: the arguments which represent offsets - into the pattern are two bytes long. So if 2^16 bytes turns out to - be too small, many things would have to change. */ -#define MAX_BUF_SIZE (1L << 16) - - -/* Extend the buffer by twice its current size via realloc and - reset the pointers that pointed into the old block to point to the - correct places in the new one. If extending the buffer results in it - being larger than MAX_BUF_SIZE, then flag memory exhausted. */ -#define EXTEND_BUFFER() \ - do { \ - unsigned char *old_buffer = bufp->buffer; \ - if (bufp->allocated == MAX_BUF_SIZE) \ - return REG_ESIZE; \ - bufp->allocated <<= 1; \ - if (bufp->allocated > MAX_BUF_SIZE) \ - bufp->allocated = MAX_BUF_SIZE; \ - bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ - if (bufp->buffer == NULL) \ - return REG_ESPACE; \ - /* If the buffer moved, move all the pointers into it. */ \ - if (old_buffer != bufp->buffer) \ - { \ - b = (b - old_buffer) + bufp->buffer; \ - begalt = (begalt - old_buffer) + bufp->buffer; \ - if (fixup_alt_jump) \ - fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ - if (laststart) \ - laststart = (laststart - old_buffer) + bufp->buffer; \ - if (pending_exact) \ - pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ - } \ - } while (0) - - -/* Since we have one byte reserved for the register number argument to - {start,stop}_memory, the maximum number of groups we can report - things about is what fits in that byte. */ -#define MAX_REGNUM 255 - -/* But patterns can have more than `MAX_REGNUM' registers. We just - ignore the excess. */ -typedef unsigned regnum_t; - - -/* Macros for the compile stack. */ - -/* Since offsets can go either forwards or backwards, this type needs to - be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ -typedef int pattern_offset_t; - -typedef struct -{ - pattern_offset_t begalt_offset; - pattern_offset_t fixup_alt_jump; - pattern_offset_t inner_group_offset; - pattern_offset_t laststart_offset; - regnum_t regnum; -} compile_stack_elt_t; - - -typedef struct -{ - compile_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} compile_stack_type; - - -#define INIT_COMPILE_STACK_SIZE 32 - -#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) -#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) - -/* The next available element. */ -#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) - - -/* Set the bit for character C in a list. */ -#define SET_LIST_BIT(c) \ - (b[((unsigned char) (c)) / BYTEWIDTH] \ - |= 1 << (((unsigned char) c) % BYTEWIDTH)) - - -/* Get the next unsigned number in the uncompiled pattern. */ -#define GET_UNSIGNED_NUMBER(num) \ - { if (p != pend) \ - { \ - PATFETCH (c); \ - while (ISDIGIT (c)) \ - { \ - if (num < 0) \ - num = 0; \ - num = num * 10 + c - '0'; \ - if (p == pend) \ - break; \ - PATFETCH (c); \ - } \ - } \ - } - -#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ - -#define IS_CHAR_CLASS(string) \ - (STREQ (string, "alpha") || STREQ (string, "upper") \ - || STREQ (string, "lower") || STREQ (string, "digit") \ - || STREQ (string, "alnum") || STREQ (string, "xdigit") \ - || STREQ (string, "space") || STREQ (string, "print") \ - || STREQ (string, "punct") || STREQ (string, "graph") \ - || STREQ (string, "cntrl") || STREQ (string, "blank")) - -/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. - Returns one of error codes defined in `regex.h', or zero for success. - - Assumes the `allocated' (and perhaps `buffer') and `translate' - fields are set in BUFP on entry. - - If it succeeds, results are put in BUFP (if it returns an error, the - contents of BUFP are undefined): - `buffer' is the compiled pattern; - `syntax' is set to SYNTAX; - `used' is set to the length of the compiled pattern; - `fastmap_accurate' is zero; - `re_nsub' is the number of subexpressions in PATTERN; - `not_bol' and `not_eol' are zero; - - The `fastmap' and `newline_anchor' fields are neither - examined nor set. */ - -static reg_errcode_t -regex_compile (pattern, size, syntax, bufp) - const char *pattern; - int size; - reg_syntax_t syntax; - struct re_pattern_buffer *bufp; -{ - /* We fetch characters from PATTERN here. Even though PATTERN is - `char *' (i.e., signed), we declare these variables as unsigned, so - they can be reliably used as array indices. */ - register unsigned char c, c1; - - /* A random tempory spot in PATTERN. */ - const char *p1; - - /* Points to the end of the buffer, where we should append. */ - register unsigned char *b; - - /* Keeps track of unclosed groups. */ - compile_stack_type compile_stack; - - /* Points to the current (ending) position in the pattern. */ - const char *p = pattern; - const char *pend = pattern + size; - - /* How to translate the characters in the pattern. */ - char *translate = bufp->translate; - - /* Address of the count-byte of the most recently inserted `exactn' - command. This makes it possible to tell if a new exact-match - character can be added to that command or if the character requires - a new `exactn' command. */ - unsigned char *pending_exact = 0; - - /* Address of start of the most recently finished expression. - This tells, e.g., postfix * where to find the start of its - operand. Reset at the beginning of groups and alternatives. */ - unsigned char *laststart = 0; - - /* Address of beginning of regexp, or inside of last group. */ - unsigned char *begalt; - - /* Place in the uncompiled pattern (i.e., the {) to - which to go back if the interval is invalid. */ - const char *beg_interval; - - /* Address of the place where a forward jump should go to the end of - the containing expression. Each alternative of an `or' -- except the - last -- ends with a forward jump of this sort. */ - unsigned char *fixup_alt_jump = 0; - - /* Counts open-groups as they are encountered. Remembered for the - matching close-group on the compile stack, so the same register - number is put in the stop_memory as the start_memory. */ - regnum_t regnum = 0; - -#ifdef DEBUG - DEBUG_PRINT1 ("\nCompiling pattern: "); - if (debug) - { - unsigned debug_count; - - for (debug_count = 0; debug_count < size; debug_count++) - printchar (pattern[debug_count]); - putchar ('\n'); - } -#endif /* DEBUG */ - - /* Initialize the compile stack. */ - compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); - if (compile_stack.stack == NULL) - return REG_ESPACE; - - compile_stack.size = INIT_COMPILE_STACK_SIZE; - compile_stack.avail = 0; - - /* Initialize the pattern buffer. */ - bufp->syntax = syntax; - bufp->fastmap_accurate = 0; - bufp->not_bol = bufp->not_eol = 0; - - /* Set `used' to zero, so that if we return an error, the pattern - printer (for debugging) will think there's no pattern. We reset it - at the end. */ - bufp->used = 0; - - /* Always count groups, whether or not bufp->no_sub is set. */ - bufp->re_nsub = 0; - -#if !defined (emacs) && !defined (SYNTAX_TABLE) - /* Initialize the syntax table. */ - init_syntax_once (); -#endif - - if (bufp->allocated == 0) - { - if (bufp->buffer) - { /* If zero allocated, but buffer is non-null, try to realloc - enough space. This loses if buffer's address is bogus, but - that is the user's responsibility. */ - RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); - } - else - { /* Caller did not allocate a buffer. Do it for them. */ - bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); - } - if (!bufp->buffer) return REG_ESPACE; - - bufp->allocated = INIT_BUF_SIZE; - } - - begalt = b = bufp->buffer; - - /* Loop through the uncompiled pattern until we're at the end. */ - while (p != pend) - { - PATFETCH (c); - - switch (c) - { - case '^': - { - if ( /* If at start of pattern, it's an operator. */ - p == pattern + 1 - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's come before. */ - || at_begline_loc_p (pattern, p, syntax)) - BUF_PUSH (begline); - else - goto normal_char; - } - break; - - - case '$': - { - if ( /* If at end of pattern, it's an operator. */ - p == pend - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's next. */ - || at_endline_loc_p (p, pend, syntax)) - BUF_PUSH (endline); - else - goto normal_char; - } - break; - - - case '+': - case '?': - if ((syntax & RE_BK_PLUS_QM) - || (syntax & RE_LIMITED_OPS)) - goto normal_char; - handle_plus: - case '*': - /* If there is no previous pattern... */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; - else if (!(syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - } - - { - /* Are we optimizing this jump? */ - boolean keep_string_p = false; - - /* 1 means zero (many) matches is allowed. */ - char zero_times_ok = 0, many_times_ok = 0; - - /* If there is a sequence of repetition chars, collapse it - down to just one (the right one). We can't combine - interval operators with these because of, e.g., `a{2}*', - which should only match an even number of `a's. */ - - for (;;) - { - zero_times_ok |= c != '+'; - many_times_ok |= c != '?'; - - if (p == pend) - break; - - PATFETCH (c); - - if (c == '*' - || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) - ; - - else if (syntax & RE_BK_PLUS_QM && c == '\\') - { - if (p == pend) return REG_EESCAPE; - - PATFETCH (c1); - if (!(c1 == '+' || c1 == '?')) - { - PATUNFETCH; - PATUNFETCH; - break; - } - - c = c1; - } - else - { - PATUNFETCH; - break; - } - - /* If we get here, we found another repeat character. */ - } - - /* Star, etc. applied to an empty pattern is equivalent - to an empty pattern. */ - if (!laststart) - break; - - /* Now we know whether or not zero matches is allowed - and also whether or not two or more matches is allowed. */ - if (many_times_ok) - { /* More than one repetition is allowed, so put in at the - end a backward relative jump from `b' to before the next - jump we're going to put in below (which jumps from - laststart to after this jump). - - But if we are at the `*' in the exact sequence `.*\n', - insert an unconditional jump backwards to the ., - instead of the beginning of the loop. This way we only - push a failure point once, instead of every time - through the loop. */ - assert (p - 1 > pattern); - - /* Allocate the space for the jump. */ - GET_BUFFER_SPACE (3); - - /* We know we are not at the first character of the pattern, - because laststart was nonzero. And we've already - incremented `p', by the way, to be the character after - the `*'. Do we have to do something analogous here - for null bytes, because of RE_DOT_NOT_NULL? */ - if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') - && zero_times_ok - && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') - && !(syntax & RE_DOT_NEWLINE)) - { /* We have .*\n. */ - STORE_JUMP (jump, b, laststart); - keep_string_p = true; - } - else - /* Anything else. */ - STORE_JUMP (maybe_pop_jump, b, laststart - 3); - - /* We've added more stuff to the buffer. */ - b += 3; - } - - /* On failure, jump from laststart to b + 3, which will be the - end of the buffer after this jump is inserted. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump - : on_failure_jump, - laststart, b + 3); - pending_exact = 0; - b += 3; - - if (!zero_times_ok) - { - /* At least one repetition is required, so insert a - `dummy_failure_jump' before the initial - `on_failure_jump' instruction of the loop. This - effects a skip over that instruction the first time - we hit that loop. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); - b += 3; - } - } - break; - - - case '.': - laststart = b; - BUF_PUSH (anychar); - break; - - - case '[': - { - boolean had_char_class = false; - - if (p == pend) return REG_EBRACK; - - /* Ensure that we have enough space to push a charset: the - opcode, the length count, and the bitset; 34 bytes in all. */ - GET_BUFFER_SPACE (34); - - laststart = b; - - /* We test `*p == '^' twice, instead of using an if - statement, so we only need one BUF_PUSH. */ - BUF_PUSH (*p == '^' ? charset_not : charset); - if (*p == '^') - p++; - - /* Remember the first position in the bracket expression. */ - p1 = p; - - /* Push the number of bytes in the bitmap. */ - BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); - - /* Clear the whole map. */ - bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); - - /* charset_not matches newline according to a syntax bit. */ - if ((re_opcode_t) b[-2] == charset_not - && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) - SET_LIST_BIT ('\n'); - - /* Read in characters and ranges, setting map bits. */ - for (;;) - { - if (p == pend) return REG_EBRACK; - - PATFETCH (c); - - /* \ might escape characters inside [...] and [^...]. */ - if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') - { - if (p == pend) return REG_EESCAPE; - - PATFETCH (c1); - SET_LIST_BIT (c1); - continue; - } - - /* Could be the end of the bracket expression. If it's - not (i.e., when the bracket expression is `[]' so - far), the ']' character bit gets set way below. */ - if (c == ']' && p != p1 + 1) - break; - - /* Look ahead to see if it's a range when the last thing - was a character class. */ - if (had_char_class && c == '-' && *p != ']') - return REG_ERANGE; - - /* Look ahead to see if it's a range when the last thing - was a character: if this is a hyphen not at the - beginning or the end of a list, then it's the range - operator. */ - if (c == '-' - && !(p - 2 >= pattern && p[-2] == '[') - && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') - && *p != ']') - { - reg_errcode_t ret - = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; - } - - else if (p[0] == '-' && p[1] != ']') - { /* This handles ranges made up of characters only. */ - reg_errcode_t ret; - - /* Move past the `-'. */ - PATFETCH (c1); - - ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; - } - - /* See if we're at the beginning of a possible character - class. */ - - else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') - { /* Leave room for the null. */ - char str[CHAR_CLASS_MAX_LENGTH + 1]; - - PATFETCH (c); - c1 = 0; - - /* If pattern is `[[:'. */ - if (p == pend) return REG_EBRACK; - - for (;;) - { - PATFETCH (c); - if (c == ':' || c == ']' || p == pend - || c1 == CHAR_CLASS_MAX_LENGTH) - break; - str[c1++] = c; - } - str[c1] = '\0'; - - /* If isn't a word bracketed by `[:' and:`]': - undo the ending character, the letters, and leave - the leading `:' and `[' (but set bits for them). */ - if (c == ':' && *p == ']') - { - int ch; - boolean is_alnum = STREQ (str, "alnum"); - boolean is_alpha = STREQ (str, "alpha"); - boolean is_blank = STREQ (str, "blank"); - boolean is_cntrl = STREQ (str, "cntrl"); - boolean is_digit = STREQ (str, "digit"); - boolean is_graph = STREQ (str, "graph"); - boolean is_lower = STREQ (str, "lower"); - boolean is_print = STREQ (str, "print"); - boolean is_punct = STREQ (str, "punct"); - boolean is_space = STREQ (str, "space"); - boolean is_upper = STREQ (str, "upper"); - boolean is_xdigit = STREQ (str, "xdigit"); - - if (!IS_CHAR_CLASS (str)) return REG_ECTYPE; - - /* Throw away the ] at the end of the character - class. */ - PATFETCH (c); - - if (p == pend) return REG_EBRACK; - - for (ch = 0; ch < 1 << BYTEWIDTH; ch++) - { - if ( (is_alnum && ISALNUM (ch)) - || (is_alpha && ISALPHA (ch)) - || (is_blank && ISBLANK (ch)) - || (is_cntrl && ISCNTRL (ch)) - || (is_digit && ISDIGIT (ch)) - || (is_graph && ISGRAPH (ch)) - || (is_lower && ISLOWER (ch)) - || (is_print && ISPRINT (ch)) - || (is_punct && ISPUNCT (ch)) - || (is_space && ISSPACE (ch)) - || (is_upper && ISUPPER (ch)) - || (is_xdigit && ISXDIGIT (ch))) - SET_LIST_BIT (ch); - } - had_char_class = true; - } - else - { - c1++; - while (c1--) - PATUNFETCH; - SET_LIST_BIT ('['); - SET_LIST_BIT (':'); - had_char_class = false; - } - } - else - { - had_char_class = false; - SET_LIST_BIT (c); - } - } - - /* Discard any (non)matching list bytes that are all 0 at the - end of the map. Decrease the map-length byte too. */ - while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) - b[-1]--; - b += b[-1]; - } - break; - - - case '(': - if (syntax & RE_NO_BK_PARENS) - goto handle_open; - else - goto normal_char; - - - case ')': - if (syntax & RE_NO_BK_PARENS) - goto handle_close; - else - goto normal_char; - - - case '\n': - if (syntax & RE_NEWLINE_ALT) - goto handle_alt; - else - goto normal_char; - - - case '|': - if (syntax & RE_NO_BK_VBAR) - goto handle_alt; - else - goto normal_char; - - - case '{': - if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) - goto handle_interval; - else - goto normal_char; - - - case '\\': - if (p == pend) return REG_EESCAPE; - - /* Do not translate the character after the \, so that we can - distinguish, e.g., \B from \b, even if we normally would - translate, e.g., B to b. */ - PATFETCH_RAW (c); - - switch (c) - { - case '(': - if (syntax & RE_NO_BK_PARENS) - goto normal_backslash; - - handle_open: - bufp->re_nsub++; - regnum++; - - if (COMPILE_STACK_FULL) - { - RETALLOC (compile_stack.stack, compile_stack.size << 1, - compile_stack_elt_t); - if (compile_stack.stack == NULL) return REG_ESPACE; - - compile_stack.size <<= 1; - } - - /* These are the values to restore when we hit end of this - group. They are all relative offsets, so that if the - whole pattern moves because of realloc, they will still - be valid. */ - COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; - COMPILE_STACK_TOP.fixup_alt_jump - = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; - COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; - COMPILE_STACK_TOP.regnum = regnum; - - /* We will eventually replace the 0 with the number of - groups inner to this one. But do not push a - start_memory for groups beyond the last one we can - represent in the compiled pattern. */ - if (regnum <= MAX_REGNUM) - { - COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; - BUF_PUSH_3 (start_memory, regnum, 0); - } - - compile_stack.avail++; - - fixup_alt_jump = 0; - laststart = 0; - begalt = b; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - break; - - - case ')': - if (syntax & RE_NO_BK_PARENS) goto normal_backslash; - - if (COMPILE_STACK_EMPTY) - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_backslash; - else - return REG_ERPAREN; - - handle_close: - if (fixup_alt_jump) - { /* Push a dummy failure point at the end of the - alternative for a possible future - `pop_failure_jump' to pop. See comments at - `push_dummy_failure' in `re_match_2'. */ - BUF_PUSH (push_dummy_failure); - - /* We allocated space for this jump when we assigned - to `fixup_alt_jump', in the `handle_alt' case below. */ - STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); - } - - /* See similar code for backslashed left paren above. */ - if (COMPILE_STACK_EMPTY) - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - else - return REG_ERPAREN; - - /* Since we just checked for an empty stack above, this - ``can't happen''. */ - assert (compile_stack.avail != 0); - { - /* We don't just want to restore into `regnum', because - later groups should continue to be numbered higher, - as in `(ab)c(de)' -- the second group is #2. */ - regnum_t this_group_regnum; - - compile_stack.avail--; - begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; - fixup_alt_jump - = COMPILE_STACK_TOP.fixup_alt_jump - ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 - : 0; - laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; - this_group_regnum = COMPILE_STACK_TOP.regnum; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - - /* We're at the end of the group, so now we know how many - groups were inside this one. */ - if (this_group_regnum <= MAX_REGNUM) - { - unsigned char *inner_group_loc - = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; - - *inner_group_loc = regnum - this_group_regnum; - BUF_PUSH_3 (stop_memory, this_group_regnum, - regnum - this_group_regnum); - } - } - break; - - - case '|': /* `\|'. */ - if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) - goto normal_backslash; - handle_alt: - if (syntax & RE_LIMITED_OPS) - goto normal_char; - - /* Insert before the previous alternative a jump which - jumps to this alternative if the former fails. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (on_failure_jump, begalt, b + 6); - pending_exact = 0; - b += 3; - - /* The alternative before this one has a jump after it - which gets executed if it gets matched. Adjust that - jump so it will jump to this alternative's analogous - jump (put in below, which in turn will jump to the next - (if any) alternative's such jump, etc.). The last such - jump jumps to the correct final destination. A picture: - _____ _____ - | | | | - | v | v - a | b | c - - If we are at `b', then fixup_alt_jump right now points to a - three-byte space after `a'. We'll put in the jump, set - fixup_alt_jump to right after `b', and leave behind three - bytes which we'll fill in when we get to after `c'. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - /* Mark and leave space for a jump after this alternative, - to be filled in later either by next alternative or - when know we're at the end of a series of alternatives. */ - fixup_alt_jump = b; - GET_BUFFER_SPACE (3); - b += 3; - - laststart = 0; - begalt = b; - break; - - - case '{': - /* If \{ is a literal. */ - if (!(syntax & RE_INTERVALS) - /* If we're at `\{' and it's not the open-interval - operator. */ - || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - || (p - 2 == pattern && p == pend)) - goto normal_backslash; - - handle_interval: - { - /* If got here, then the syntax allows intervals. */ - - /* At least (most) this many matches must be made. */ - int lower_bound = -1, upper_bound = -1; - - beg_interval = p - 1; - - if (p == pend) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - return REG_EBRACE; - } - - GET_UNSIGNED_NUMBER (lower_bound); - - if (c == ',') - { - GET_UNSIGNED_NUMBER (upper_bound); - if (upper_bound < 0) upper_bound = RE_DUP_MAX; - } - else - /* Interval such as `{1}' => match exactly once. */ - upper_bound = lower_bound; - - if (lower_bound < 0 || upper_bound > RE_DUP_MAX - || lower_bound > upper_bound) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - return REG_BADBR; - } - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (c != '\\') return REG_EBRACE; - - PATFETCH (c); - } - - if (c != '}') - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - return REG_BADBR; - } - - /* We just parsed a valid interval. */ - - /* If it's invalid to have no preceding re. */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; - else if (syntax & RE_CONTEXT_INDEP_OPS) - laststart = b; - else - goto unfetch_interval; - } - - /* If the upper bound is zero, don't want to succeed at - all; jump from `laststart' to `b + 3', which will be - the end of the buffer after we insert the jump. */ - if (upper_bound == 0) - { - GET_BUFFER_SPACE (3); - INSERT_JUMP (jump, laststart, b + 3); - b += 3; - } - - /* Otherwise, we have a nontrivial interval. When - we're all done, the pattern will look like: - set_number_at <jump count> <upper bound> - set_number_at <succeed_n count> <lower bound> - succeed_n <after jump addr> <succed_n count> - <body of loop> - jump_n <succeed_n addr> <jump count> - (The upper bound and `jump_n' are omitted if - `upper_bound' is 1, though.) */ - else - { /* If the upper bound is > 1, we need to insert - more at the end of the loop. */ - unsigned nbytes = 10 + (upper_bound > 1) * 10; - - GET_BUFFER_SPACE (nbytes); - - /* Initialize lower bound of the `succeed_n', even - though it will be set during matching by its - attendant `set_number_at' (inserted next), - because `re_compile_fastmap' needs to know. - Jump to the `jump_n' we might insert below. */ - INSERT_JUMP2 (succeed_n, laststart, - b + 5 + (upper_bound > 1) * 5, - lower_bound); - b += 5; - - /* Code to initialize the lower bound. Insert - before the `succeed_n'. The `5' is the last two - bytes of this `set_number_at', plus 3 bytes of - the following `succeed_n'. */ - insert_op2 (set_number_at, laststart, 5, lower_bound, b); - b += 5; - - if (upper_bound > 1) - { /* More than one repetition is allowed, so - append a backward jump to the `succeed_n' - that starts this interval. - - When we've reached this during matching, - we'll have matched the interval once, so - jump back only `upper_bound - 1' times. */ - STORE_JUMP2 (jump_n, b, laststart + 5, - upper_bound - 1); - b += 5; - - /* The location we want to set is the second - parameter of the `jump_n'; that is `b-2' as - an absolute address. `laststart' will be - the `set_number_at' we're about to insert; - `laststart+3' the number to set, the source - for the relative address. But we are - inserting into the middle of the pattern -- - so everything is getting moved up by 5. - Conclusion: (b - 2) - (laststart + 3) + 5, - i.e., b - laststart. - - We insert this at the beginning of the loop - so that if we fail during matching, we'll - reinitialize the bounds. */ - insert_op2 (set_number_at, laststart, b - laststart, - upper_bound - 1, b); - b += 5; - } - } - pending_exact = 0; - beg_interval = NULL; - } - break; - - unfetch_interval: - /* If an invalid interval, match the characters as literals. */ - assert (beg_interval); - p = beg_interval; - beg_interval = NULL; - - /* normal_char and normal_backslash need `c'. */ - PATFETCH (c); - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (p > pattern && p[-1] == '\\') - goto normal_backslash; - } - goto normal_char; - -#ifdef emacs - /* There is no way to specify the before_dot and after_dot - operators. rms says this is ok. --karl */ - case '=': - BUF_PUSH (at_dot); - break; - - case 's': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); - break; - - case 'S': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); - break; -#endif /* emacs */ - - - case 'w': - laststart = b; - BUF_PUSH (wordchar); - break; - - - case 'W': - laststart = b; - BUF_PUSH (notwordchar); - break; - - - case '<': - BUF_PUSH (wordbeg); - break; - - case '>': - BUF_PUSH (wordend); - break; - - case 'b': - BUF_PUSH (wordbound); - break; - - case 'B': - BUF_PUSH (notwordbound); - break; - - case '`': - BUF_PUSH (begbuf); - break; - - case '\'': - BUF_PUSH (endbuf); - break; - - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - if (syntax & RE_NO_BK_REFS) - goto normal_char; - - c1 = c - '0'; - - if (c1 > regnum) - return REG_ESUBREG; - - /* Can't back reference to a subexpression if inside of it. */ - if (group_in_compile_stack (compile_stack, c1)) - goto normal_char; - - laststart = b; - BUF_PUSH_2 (duplicate, c1); - break; - - - case '+': - case '?': - if (syntax & RE_BK_PLUS_QM) - goto handle_plus; - else - goto normal_backslash; - - default: - normal_backslash: - /* You might think it would be useful for \ to mean - not to translate; but if we don't translate it - it will never match anything. */ - c = TRANSLATE (c); - goto normal_char; - } - break; - - - default: - /* Expects the character in `c'. */ - normal_char: - /* If no exactn currently being built. */ - if (!pending_exact - - /* If last exactn not at current position. */ - || pending_exact + *pending_exact + 1 != b - - /* We have only one byte following the exactn for the count. */ - || *pending_exact == (1 << BYTEWIDTH) - 1 - - /* If followed by a repetition operator. */ - || *p == '*' || *p == '^' - || ((syntax & RE_BK_PLUS_QM) - ? *p == '\\' && (p[1] == '+' || p[1] == '?') - : (*p == '+' || *p == '?')) - || ((syntax & RE_INTERVALS) - && ((syntax & RE_NO_BK_BRACES) - ? *p == '{' - : (p[0] == '\\' && p[1] == '{')))) - { - /* Start building a new exactn. */ - - laststart = b; - - BUF_PUSH_2 (exactn, 0); - pending_exact = b - 1; - } - - BUF_PUSH (c); - (*pending_exact)++; - break; - } /* switch (c) */ - } /* while p != pend */ - - - /* Through the pattern now. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - if (!COMPILE_STACK_EMPTY) - return REG_EPAREN; - - free (compile_stack.stack); - - /* We have succeeded; set the length of the buffer. */ - bufp->used = b - bufp->buffer; - -#ifdef DEBUG - if (debug) - { - DEBUG_PRINT1 ("\nCompiled pattern: "); - print_compiled_pattern (bufp); - } -#endif /* DEBUG */ - - return REG_NOERROR; -} /* regex_compile */ - -/* Subroutines for `regex_compile'. */ - -/* Store OP at LOC followed by two-byte integer parameter ARG. */ - -static void -store_op1 (op, loc, arg) - re_opcode_t op; - unsigned char *loc; - int arg; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg); -} - - -/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -store_op2 (op, loc, arg1, arg2) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg1); - STORE_NUMBER (loc + 3, arg2); -} - - -/* Copy the bytes from LOC to END to open up three bytes of space at LOC - for OP followed by two-byte integer parameter ARG. */ - -static void -insert_op1 (op, loc, arg, end) - re_opcode_t op; - unsigned char *loc; - int arg; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 3; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op1 (op, loc, arg); -} - - -/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -insert_op2 (op, loc, arg1, arg2, end) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 5; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op2 (op, loc, arg1, arg2); -} - - -/* P points to just after a ^ in PATTERN. Return true if that ^ comes - after an alternative or a begin-subexpression. We assume there is at - least one character before the ^. */ - -static boolean -at_begline_loc_p (pattern, p, syntax) - const char *pattern, *p; - reg_syntax_t syntax; -{ - const char *prev = p - 2; - boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; - - return - /* After a subexpression? */ - (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) - /* After an alternative? */ - || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); -} - - -/* The dual of at_begline_loc_p. This one is for $. We assume there is - at least one character after the $, i.e., `P < PEND'. */ - -static boolean -at_endline_loc_p (p, pend, syntax) - const char *p, *pend; - int syntax; -{ - const char *next = p; - boolean next_backslash = *next == '\\'; - const char *next_next = p + 1 < pend ? p + 1 : NULL; - - return - /* Before a subexpression? */ - (syntax & RE_NO_BK_PARENS ? *next == ')' - : next_backslash && next_next && *next_next == ')') - /* Before an alternative? */ - || (syntax & RE_NO_BK_VBAR ? *next == '|' - : next_backslash && next_next && *next_next == '|'); -} - - -/* Returns true if REGNUM is in one of COMPILE_STACK's elements and - false if it's not. */ - -static boolean -group_in_compile_stack (compile_stack, regnum) - compile_stack_type compile_stack; - regnum_t regnum; -{ - int this_element; - - for (this_element = compile_stack.avail - 1; - this_element >= 0; - this_element--) - if (compile_stack.stack[this_element].regnum == regnum) - return true; - - return false; -} - - -/* Read the ending character of a range (in a bracket expression) from the - uncompiled pattern *P_PTR (which ends at PEND). We assume the - starting character is in `P[-2]'. (`P[-1]' is the character `-'.) - Then we set the translation of all bits between the starting and - ending characters (inclusive) in the compiled pattern B. - - Return an error code. - - We use these short variable names so we can use the same macros as - `regex_compile' itself. */ - -static reg_errcode_t -compile_range (p_ptr, pend, translate, syntax, b) - const char **p_ptr, *pend; - char *translate; - reg_syntax_t syntax; - unsigned char *b; -{ - unsigned this_char; - - const char *p = *p_ptr; - int range_start, range_end; - - if (p == pend) - return REG_ERANGE; - - /* Even though the pattern is a signed `char *', we need to fetch - with unsigned char *'s; if the high bit of the pattern character - is set, the range endpoints will be negative if we fetch using a - signed char *. - - We also want to fetch the endpoints without translating them; the - appropriate translation is done in the bit-setting loop below. */ - range_start = ((unsigned char *) p)[-2]; - range_end = ((unsigned char *) p)[0]; - - /* Have to increment the pointer into the pattern string, so the - caller isn't still at the ending character. */ - (*p_ptr)++; - - /* If the start is after the end, the range is empty. */ - if (range_start > range_end) - return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - - /* Here we see why `this_char' has to be larger than an `unsigned - char' -- the range is inclusive, so if `range_end' == 0xff - (assuming 8-bit characters), we would otherwise go into an infinite - loop, since all characters <= 0xff. */ - for (this_char = range_start; this_char <= range_end; this_char++) - { - SET_LIST_BIT (TRANSLATE (this_char)); - } - - return REG_NOERROR; -} - -/* Failure stack declarations and macros; both re_compile_fastmap and - re_match_2 use a failure stack. These have to be macros because of - REGEX_ALLOCATE. */ - - -/* Number of failure points for which to initially allocate space - when matching. If this number is exceeded, we allocate more - space, so it is not a hard limit. */ -#ifndef INIT_FAILURE_ALLOC -#define INIT_FAILURE_ALLOC 5 -#endif - -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_SPACE each time we failed. - This is a variable only so users of regex can assign to it; we never - change it ourselves. */ -int re_max_failures = 2000; - -typedef const unsigned char *fail_stack_elt_t; - -typedef struct -{ - fail_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} fail_stack_type; - -#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) -#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) -#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) -#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) - - -/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ - -#define INIT_FAIL_STACK() \ - do { \ - fail_stack.stack = (fail_stack_elt_t *) \ - REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ - \ - if (fail_stack.stack == NULL) \ - return -2; \ - \ - fail_stack.size = INIT_FAILURE_ALLOC; \ - fail_stack.avail = 0; \ - } while (0) - - -/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. - - Return 1 if succeeds, and 0 if either ran out of memory - allocating space for it or it was already too large. - - REGEX_REALLOCATE requires `destination' be declared. */ - -#define DOUBLE_FAIL_STACK(fail_stack) \ - ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ - ? 0 \ - : ((fail_stack).stack = (fail_stack_elt_t *) \ - REGEX_REALLOCATE ((fail_stack).stack, \ - (fail_stack).size * sizeof (fail_stack_elt_t), \ - ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ - \ - (fail_stack).stack == NULL \ - ? 0 \ - : ((fail_stack).size <<= 1, \ - 1))) - - -/* Push PATTERN_OP on FAIL_STACK. - - Return 1 if was able to do so and 0 if ran out of memory allocating - space to do so. */ -#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ - ((FAIL_STACK_FULL () \ - && !DOUBLE_FAIL_STACK (fail_stack)) \ - ? 0 \ - : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ - 1)) - -/* This pushes an item onto the failure stack. Must be a four-byte - value. Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_ITEM(item) \ - fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item - -/* The complement operation. Assumes `fail_stack' is nonempty. */ -#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] - -/* Used to omit pushing failure point id's when we're not debugging. */ -#ifdef DEBUG -#define DEBUG_PUSH PUSH_FAILURE_ITEM -#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () -#else -#define DEBUG_PUSH(item) -#define DEBUG_POP(item_addr) -#endif - - -/* Push the information about the state we will need - if we ever fail back to it. - - Requires variables fail_stack, regstart, regend, reg_info, and - num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be - declared. - - Does `return FAILURE_CODE' if runs out of memory. */ - -#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ - do { \ - char *destination; \ - /* Must be int, so when we don't save any registers, the arithmetic \ - of 0 + -1 isn't done as unsigned. */ \ - int this_reg; \ - \ - DEBUG_STATEMENT (failure_id++); \ - DEBUG_STATEMENT (nfailure_points_pushed++); \ - DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ - DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ - DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ - \ - DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ - DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ - \ - /* Ensure we have enough space allocated for what we will push. */ \ - while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ - { \ - if (!DOUBLE_FAIL_STACK (fail_stack)) \ - return failure_code; \ - \ - DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ - (fail_stack).size); \ - DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ - } \ - \ - /* Push the info, starting with the registers. */ \ - DEBUG_PRINT1 ("\n"); \ - \ - for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ - this_reg++) \ - { \ - DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ - DEBUG_STATEMENT (num_regs_pushed++); \ - \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - PUSH_FAILURE_ITEM (regstart[this_reg]); \ - \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - PUSH_FAILURE_ITEM (regend[this_reg]); \ - \ - DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ - DEBUG_PRINT2 (" match_null=%d", \ - REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ - DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ - DEBUG_PRINT2 (" matched_something=%d", \ - MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT2 (" ever_matched=%d", \ - EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT1 ("\n"); \ - PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ - } \ - \ - DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ - PUSH_FAILURE_ITEM (lowest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ - PUSH_FAILURE_ITEM (highest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ - PUSH_FAILURE_ITEM (pattern_place); \ - \ - DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ - DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ - size2); \ - DEBUG_PRINT1 ("'\n"); \ - PUSH_FAILURE_ITEM (string_place); \ - \ - DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ - DEBUG_PUSH (failure_id); \ - } while (0) - -/* This is the number of items that are pushed and popped on the stack - for each register. */ -#define NUM_REG_ITEMS 3 - -/* Individual items aside from the registers. */ -#ifdef DEBUG -#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ -#else -#define NUM_NONREG_ITEMS 4 -#endif - -/* We push at most this many items on the stack. */ -#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) - -/* We actually push this many items. */ -#define NUM_FAILURE_ITEMS \ - ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ - + NUM_NONREG_ITEMS) - -/* How many items can still be added to the stack without overflowing it. */ -#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) - - -/* Pops what PUSH_FAIL_STACK pushes. - - We restore into the parameters, all of which should be lvalues: - STR -- the saved data position. - PAT -- the saved pattern position. - LOW_REG, HIGH_REG -- the highest and lowest active registers. - REGSTART, REGEND -- arrays of string positions. - REG_INFO -- array of information about each subexpression. - - Also assumes the variables `fail_stack' and (if debugging), `bufp', - `pend', `string1', `size1', `string2', and `size2'. */ - -#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ -{ \ - DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ - int this_reg; \ - const unsigned char *string_temp; \ - \ - assert (!FAIL_STACK_EMPTY ()); \ - \ - /* Remove failure points and point to how many regs pushed. */ \ - DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ - DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ - \ - assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ - \ - DEBUG_POP (&failure_id); \ - DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ - \ - /* If the saved string location is NULL, it came from an \ - on_failure_keep_string_jump opcode, and we want to throw away the \ - saved NULL, thus retaining our current position in the string. */ \ - string_temp = POP_FAILURE_ITEM (); \ - if (string_temp != NULL) \ - str = (const char *) string_temp; \ - \ - DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ - DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ - DEBUG_PRINT1 ("'\n"); \ - \ - pat = (unsigned char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ - \ - /* Restore register info. */ \ - high_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ - \ - low_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ - \ - for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ - { \ - DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ - \ - reg_info[this_reg].word = POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ - \ - regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - \ - regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - } \ - \ - DEBUG_STATEMENT (nfailure_points_popped++); \ -} /* POP_FAILURE_POINT */ - -/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in - BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible - characters can start a string that matches the pattern. This fastmap - is used by re_search to skip quickly over impossible starting points. - - The caller must supply the address of a (1 << BYTEWIDTH)-byte data - area as BUFP->fastmap. - - We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in - the pattern buffer. - - Returns 0 if we succeed, -2 if an internal error. */ - -int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; -{ - int j, k; - fail_stack_type fail_stack; -#ifndef REGEX_MALLOC - char *destination; -#endif - /* We don't push any register information onto the failure stack. */ - unsigned num_regs = 0; - - register char *fastmap = bufp->fastmap; - unsigned char *pattern = bufp->buffer; - unsigned long size = bufp->used; - const unsigned char *p = pattern; - register unsigned char *pend = pattern + size; - - /* Assume that each path through the pattern can be null until - proven otherwise. We set this false at the bottom of switch - statement, to which we get only if a particular path doesn't - match the empty string. */ - boolean path_can_be_null = true; - - /* We aren't doing a `succeed_n' to begin with. */ - boolean succeed_n_p = false; - - assert (fastmap != NULL && p != NULL); - - INIT_FAIL_STACK (); - bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ - bufp->fastmap_accurate = 1; /* It will be when we're done. */ - bufp->can_be_null = 0; - - while (p != pend || !FAIL_STACK_EMPTY ()) - { - if (p == pend) - { - bufp->can_be_null |= path_can_be_null; - - /* Reset for next path. */ - path_can_be_null = true; - - p = fail_stack.stack[--fail_stack.avail]; - } - - /* We should never be about to go beyond the end of the pattern. */ - assert (p < pend); - -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - - /* I guess the idea here is to simply not bother with a fastmap - if a backreference is used, since it's too hard to figure out - the fastmap for the corresponding group. Setting - `can_be_null' stops `re_search_2' from using the fastmap, so - that is all we do. */ - case duplicate: - bufp->can_be_null = 1; - return 0; - - - /* Following are the cases which match a character. These end - with `break'. */ - - case exactn: - fastmap[p[1]] = 1; - break; - - - case charset: - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) - fastmap[j] = 1; - break; - - - case charset_not: - /* Chars beyond end of map must be allowed. */ - for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) - fastmap[j] = 1; - break; - - - case wordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == Sword) - fastmap[j] = 1; - break; - - - case notwordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != Sword) - fastmap[j] = 1; - break; - - - case anychar: - /* `.' matches anything ... */ - for (j = 0; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - /* ... except perhaps newline. */ - if (!(bufp->syntax & RE_DOT_NEWLINE)) - fastmap['\n'] = 0; - - /* Return if we have already set `can_be_null'; if we have, - then the fastmap is irrelevant. Something's wrong here. */ - else if (bufp->can_be_null) - return 0; - - /* Otherwise, have to check alternative paths. */ - break; - - -#ifdef emacs - case syntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - case notsyntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - /* All cases after this match the empty string. These end with - `continue'. */ - - - case before_dot: - case at_dot: - case after_dot: - continue; -#endif /* not emacs */ - - - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbound: - case notwordbound: - case wordbeg: - case wordend: - case push_dummy_failure: - continue; - - - case jump_n: - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case jump_past_alt: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - if (j > 0) - continue; - - /* Jump backward implies we just went through the body of a - loop and matched nothing. Opcode jumped to should be - `on_failure_jump' or `succeed_n'. Just treat it like an - ordinary jump. For a * loop, it has pushed its failure - point already; if so, discard that as redundant. */ - if ((re_opcode_t) *p != on_failure_jump - && (re_opcode_t) *p != succeed_n) - continue; - - p++; - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - - /* If what's on the stack is where we are now, pop it. */ - if (!FAIL_STACK_EMPTY () - && fail_stack.stack[fail_stack.avail - 1] == p) - fail_stack.avail--; - - continue; - - - case on_failure_jump: - case on_failure_keep_string_jump: - handle_on_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - - /* For some patterns, e.g., `(a?)?', `p+j' here points to the - end of the pattern. We don't want to push such a point, - since when we restore it above, entering the switch will - increment `p' past the end of the pattern. We don't need - to push such a point since we obviously won't find any more - fastmap entries beyond `pend'. Such a pattern can match - the null string, though. */ - if (p + j < pend) - { - if (!PUSH_PATTERN_OP (p + j, fail_stack)) - return -2; - } - else - bufp->can_be_null = 1; - - if (succeed_n_p) - { - EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ - succeed_n_p = false; - } - - continue; - - - case succeed_n: - /* Get to the number of times to succeed. */ - p += 2; - - /* Increment p past the n for when k != 0. */ - EXTRACT_NUMBER_AND_INCR (k, p); - if (k == 0) - { - p -= 4; - succeed_n_p = true; /* Spaghetti code alert. */ - goto handle_on_failure_jump; - } - continue; - - - case set_number_at: - p += 4; - continue; - - - case start_memory: - case stop_memory: - p += 2; - continue; - - - default: - abort (); /* We have listed all the cases. */ - } /* switch *p++ */ - - /* Getting here means we have found the possible starting - characters for one path of the pattern -- and that the empty - string does not match. We need not follow this path further. - Instead, look at the next alternative (remembered on the - stack), or quit if no more. The test at the top of the loop - does these things. */ - path_can_be_null = false; - p = pend; - } /* while p */ - - /* Set `can_be_null' for the last path (also the first path, if the - pattern is empty). */ - bufp->can_be_null |= path_can_be_null; - return 0; -} /* re_compile_fastmap */ - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use - this memory for recording register information. STARTS and ENDS - must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ - -void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - unsigned num_regs; - regoff_t *starts, *ends; -{ - if (num_regs) - { - bufp->regs_allocated = REGS_REALLOCATE; - regs->num_regs = num_regs; - regs->start = starts; - regs->end = ends; - } - else - { - bufp->regs_allocated = REGS_UNALLOCATED; - regs->num_regs = 0; - regs->start = regs->end = (regoff_t) 0; - } -} - -/* Searching routines. */ - -/* Like re_search_2, below, but only one string is specified, and - doesn't let you say where to stop matching. */ - -int -re_search (bufp, string, size, startpos, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, startpos, range; - struct re_registers *regs; -{ - return re_search_2 (bufp, NULL, 0, string, size, startpos, range, - regs, size); -} - - -/* Using the compiled pattern in BUFP->buffer, first tries to match the - virtual concatenation of STRING1 and STRING2, starting first at index - STARTPOS, then at STARTPOS + 1, and so on. - - STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. - - RANGE is how far to scan while trying to match. RANGE = 0 means try - only at STARTPOS; in general, the last start tried is STARTPOS + - RANGE. - - In REGS, return the indices of the virtual concatenation of STRING1 - and STRING2 that matched the entire BUFP->buffer and its contained - subexpressions. - - Do not consider matching one past the index STOP in the virtual - concatenation of STRING1 and STRING2. - - We return either the position in the strings at which the match was - found, -1 if no match, or -2 if error (such as failure - stack overflow). */ - -int -re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int startpos; - int range; - struct re_registers *regs; - int stop; -{ - int val; - register char *fastmap = bufp->fastmap; - register char *translate = bufp->translate; - int total_size = size1 + size2; - int endpos = startpos + range; - - /* Check for out-of-range STARTPOS. */ - if (startpos < 0 || startpos > total_size) - return -1; - - /* Fix up RANGE if it might eventually take us outside - the virtual concatenation of STRING1 and STRING2. */ - if (endpos < -1) - range = -1 - startpos; - else if (endpos > total_size) - range = total_size - startpos; - - /* If the search isn't to be a backwards one, don't waste time in a - search for a pattern that must be anchored. */ - if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) - { - if (startpos > 0) - return -1; - else - range = 1; - } - - /* Update the fastmap now if not correct already. */ - if (fastmap && !bufp->fastmap_accurate) - if (re_compile_fastmap (bufp) == -2) - return -2; - - /* Loop through the string, looking for a place to start matching. */ - for (;;) - { - /* If a fastmap is supplied, skip quickly over characters that - cannot be the start of a match. If the pattern can match the - null string, however, we don't need to skip characters; we want - the first null string. */ - if (fastmap && startpos < total_size && !bufp->can_be_null) - { - if (range > 0) /* Searching forwards. */ - { - register const char *d; - register int lim = 0; - int irange = range; - - if (startpos < size1 && startpos + range >= size1) - lim = range - (size1 - startpos); - - d = (startpos >= size1 ? string2 - size1 : string1) + startpos; - - /* Written out as an if-else to avoid testing `translate' - inside the loop. */ - if (translate) - while (range > lim - && !fastmap[(unsigned char) - translate[(unsigned char) *d++]]) - range--; - else - while (range > lim && !fastmap[(unsigned char) *d++]) - range--; - - startpos += irange - range; - } - else /* Searching backwards. */ - { - register char c = (size1 == 0 || startpos >= size1 - ? string2[startpos - size1] - : string1[startpos]); - - if (!fastmap[(unsigned char) TRANSLATE (c)]) - goto advance; - } - } - - /* If can't match the null string, and that's all we have left, fail. */ - if (range >= 0 && startpos == total_size && fastmap - && !bufp->can_be_null) - return -1; - - val = re_match_2 (bufp, string1, size1, string2, size2, - startpos, regs, stop); - if (val >= 0) - return startpos; - - if (val == -2) - return -2; - - advance: - if (!range) - break; - else if (range > 0) - { - range--; - startpos++; - } - else - { - range++; - startpos--; - } - } - return -1; -} /* re_search_2 */ - -/* Declarations and macros for re_match_2. */ - -static int bcmp_translate (); -static boolean alt_match_null_string_p (), - common_op_match_null_string_p (), - group_match_null_string_p (); - -/* Structure for per-register (a.k.a. per-group) information. - This must not be longer than one word, because we push this value - onto the failure stack. Other register information, such as the - starting and ending positions (which are addresses), and the list of - inner groups (which is a bits list) are maintained in separate - variables. - - We are making a (strictly speaking) nonportable assumption here: that - the compiler will pack our bit fields into something that fits into - the type of `word', i.e., is something that fits into one item on the - failure stack. */ -typedef union -{ - fail_stack_elt_t word; - struct - { - /* This field is one if this group can match the empty string, - zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ -#define MATCH_NULL_UNSET_VALUE 3 - unsigned match_null_string_p : 2; - unsigned is_active : 1; - unsigned matched_something : 1; - unsigned ever_matched_something : 1; - } bits; -} register_info_type; - -#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) -#define IS_ACTIVE(R) ((R).bits.is_active) -#define MATCHED_SOMETHING(R) ((R).bits.matched_something) -#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) - - -/* Call this when have matched a real character; it sets `matched' flags - for the subexpressions which we are currently inside. Also records - that those subexprs have matched. */ -#define SET_REGS_MATCHED() \ - do \ - { \ - unsigned r; \ - for (r = lowest_active_reg; r <= highest_active_reg; r++) \ - { \ - MATCHED_SOMETHING (reg_info[r]) \ - = EVER_MATCHED_SOMETHING (reg_info[r]) \ - = 1; \ - } \ - } \ - while (0) - - -/* This converts PTR, a pointer into one of the search strings `string1' - and `string2' into an offset from the beginning of that string. */ -#define POINTER_TO_OFFSET(ptr) \ - (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) - -/* Registers are set to a sentinel when they haven't yet matched. */ -#define REG_UNSET_VALUE ((char *) -1) -#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) - - -/* Macros for dealing with the split strings in re_match_2. */ - -#define MATCHING_IN_FIRST_STRING (dend == end_match_1) - -/* Call before fetching a character with *d. This switches over to - string2 if necessary. */ -#define PREFETCH() \ - while (d == dend) \ - { \ - /* End of string2 => fail. */ \ - if (dend == end_match_2) \ - goto fail; \ - /* End of string1 => advance to string2. */ \ - d = string2; \ - dend = end_match_2; \ - } - - -/* Test if at very beginning or at very end of the virtual concatenation - of `string1' and `string2'. If only one string, it's `string2'. */ -#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) -#define AT_STRINGS_END(d) ((d) == end2) - - -/* Test if D points to a character which is word-constituent. We have - two special cases to check for: if past the end of string1, look at - the first character in string2; and if before the beginning of - string2, look at the last character in string1. */ -#define WORDCHAR_P(d) \ - (SYNTAX ((d) == end1 ? *string2 \ - : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ - == Sword) - -/* Test if the character before D and the one at D differ with respect - to being word-constituent. */ -#define AT_WORD_BOUNDARY(d) \ - (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ - || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) - - -/* Free everything we malloc. */ -#ifdef REGEX_MALLOC -#define FREE_VAR(var) if (var) free (var); var = NULL -#define FREE_VARIABLES() \ - do { \ - FREE_VAR (fail_stack.stack); \ - FREE_VAR (regstart); \ - FREE_VAR (regend); \ - FREE_VAR (old_regstart); \ - FREE_VAR (old_regend); \ - FREE_VAR (best_regstart); \ - FREE_VAR (best_regend); \ - FREE_VAR (reg_info); \ - FREE_VAR (reg_dummy); \ - FREE_VAR (reg_info_dummy); \ - } while (0) -#else /* not REGEX_MALLOC */ -/* Some MIPS systems (at least) want this to free alloca'd storage. */ -#define FREE_VARIABLES() alloca (0) -#endif /* not REGEX_MALLOC */ - - -/* These values must meet several constraints. They must not be valid - register values; since we have a limit of 255 registers (because - we use only one byte in the pattern for the register number), we can - use numbers larger than 255. They must differ by 1, because of - NUM_FAILURE_ITEMS above. And the value for the lowest register must - be larger than the value for the highest register, so we do not try - to actually save any registers when none are active. */ -#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) -#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) - -/* Matching routines. */ - -#ifndef emacs /* Emacs never uses this. */ -/* re_match is like re_match_2 except it takes only a single string. */ - -int -re_match (bufp, string, size, pos, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, pos; - struct re_registers *regs; - { - return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); -} -#endif /* not emacs */ - - -/* re_match_2 matches the compiled pattern in BUFP against the - the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 - and SIZE2, respectively). We start matching at POS, and stop - matching at STOP. - - If REGS is non-null and the `no_sub' field of BUFP is nonzero, we - store offsets for the substring each group matched in REGS. See the - documentation for exactly how many groups we fill. - - We return -1 if no match, -2 if an internal error (such as the - failure stack overflowing). Otherwise, we return the length of the - matched substring. */ - -int -re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; -{ - /* General temporaries. */ - int mcnt; - unsigned char *p1; - - /* Just past the end of the corresponding string. */ - const char *end1, *end2; - - /* Pointers into string1 and string2, just past the last characters in - each to consider matching. */ - const char *end_match_1, *end_match_2; - - /* Where we are in the data, and the end of the current string. */ - const char *d, *dend; - - /* Where we are in the pattern, and the end of the pattern. */ - unsigned char *p = bufp->buffer; - register unsigned char *pend = p + bufp->used; - - /* We use this to map every character in the string. */ - char *translate = bufp->translate; - - /* Failure point stack. Each place that can handle a failure further - down the line pushes a failure point on this stack. It consists of - restart, regend, and reg_info for all registers corresponding to - the subexpressions we're currently inside, plus the number of such - registers, and, finally, two char *'s. The first char * is where - to resume scanning the pattern; the second one is where to resume - scanning the strings. If the latter is zero, the failure point is - a ``dummy''; if a failure happens and the failure point is a dummy, - it gets discarded and the next next one is tried. */ - fail_stack_type fail_stack; -#ifdef DEBUG - static unsigned failure_id = 0; - unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; -#endif - - /* We fill all the registers internally, independent of what we - return, for use in backreferences. The number here includes - an element for register zero. */ - unsigned num_regs = bufp->re_nsub + 1; - - /* The currently active registers. */ - unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; - unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; - - /* Information on the contents of registers. These are pointers into - the input strings; they record just what was matched (on this - attempt) by a subexpression part of the pattern, that is, the - regnum-th regstart pointer points to where in the pattern we began - matching and the regnum-th regend points to right after where we - stopped matching the regnum-th subexpression. (The zeroth register - keeps track of what the whole pattern matches.) */ - const char **regstart, **regend; - - /* If a group that's operated upon by a repetition operator fails to - match anything, then the register for its start will need to be - restored because it will have been set to wherever in the string we - are when we last see its open-group operator. Similarly for a - register's end. */ - const char **old_regstart, **old_regend; - - /* The is_active field of reg_info helps us keep track of which (possibly - nested) subexpressions we are currently in. The matched_something - field of reg_info[reg_num] helps us tell whether or not we have - matched any of the pattern so far this time through the reg_num-th - subexpression. These two fields get reset each time through any - loop their register is in. */ - register_info_type *reg_info; - - /* The following record the register info as found in the above - variables when we find a match better than any we've seen before. - This happens as we backtrack through the failure points, which in - turn happens only if we have not yet matched the entire string. */ - unsigned best_regs_set = false; - const char **best_regstart, **best_regend; - - /* Logically, this is `best_regend[0]'. But we don't want to have to - allocate space for that if we're not allocating space for anything - else (see below). Also, we never need info about register 0 for - any of the other register vectors, and it seems rather a kludge to - treat `best_regend' differently than the rest. So we keep track of - the end of the best match so far in a separate variable. We - initialize this to NULL so that when we backtrack the first time - and need to test it, it's not garbage. */ - const char *match_end = NULL; - - /* Used when we pop values we don't care about. */ - const char **reg_dummy; - register_info_type *reg_info_dummy; - -#ifdef DEBUG - /* Counts the total number of registers pushed. */ - unsigned num_regs_pushed = 0; -#endif - - DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); - - INIT_FAIL_STACK (); - - /* Do not bother to initialize all the register variables if there are - no groups in the pattern, as it takes a fair amount of time. If - there are groups, we include space for register 0 (the whole - pattern), even though we never use it, since it simplifies the - array indexing. We should fix this. */ - if (bufp->re_nsub) - { - regstart = REGEX_TALLOC (num_regs, const char *); - regend = REGEX_TALLOC (num_regs, const char *); - old_regstart = REGEX_TALLOC (num_regs, const char *); - old_regend = REGEX_TALLOC (num_regs, const char *); - best_regstart = REGEX_TALLOC (num_regs, const char *); - best_regend = REGEX_TALLOC (num_regs, const char *); - reg_info = REGEX_TALLOC (num_regs, register_info_type); - reg_dummy = REGEX_TALLOC (num_regs, const char *); - reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); - - if (!(regstart && regend && old_regstart && old_regend && reg_info - && best_regstart && best_regend && reg_dummy && reg_info_dummy)) - { - FREE_VARIABLES (); - return -2; - } - } -#ifdef REGEX_MALLOC - else - { - /* We must initialize all our variables to NULL, so that - `FREE_VARIABLES' doesn't try to free them. */ - regstart = regend = old_regstart = old_regend = best_regstart - = best_regend = reg_dummy = NULL; - reg_info = reg_info_dummy = (register_info_type *) NULL; - } -#endif /* REGEX_MALLOC */ - - /* The starting position is bogus. */ - if (pos < 0 || pos > size1 + size2) - { - FREE_VARIABLES (); - return -1; - } - - /* Initialize subexpression text positions to -1 to mark ones that no - start_memory/stop_memory has been seen for. Also initialize the - register information struct. */ - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = regend[mcnt] - = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; - - REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; - IS_ACTIVE (reg_info[mcnt]) = 0; - MATCHED_SOMETHING (reg_info[mcnt]) = 0; - EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; - } - - /* We move `string1' into `string2' if the latter's empty -- but not if - `string1' is null. */ - if (size2 == 0 && string1 != NULL) - { - string2 = string1; - size2 = size1; - string1 = 0; - size1 = 0; - } - end1 = string1 + size1; - end2 = string2 + size2; - - /* Compute where to stop matching, within the two strings. */ - if (stop <= size1) - { - end_match_1 = string1 + stop; - end_match_2 = string2; - } - else - { - end_match_1 = end1; - end_match_2 = string2 + stop - size1; - } - - /* `p' scans through the pattern as `d' scans through the data. - `dend' is the end of the input string that `d' points within. `d' - is advanced into the following input string whenever necessary, but - this happens before fetching; therefore, at the beginning of the - loop, `d' can be pointing at the end of a string, but it cannot - equal `string2'. */ - if (size1 > 0 && pos <= size1) - { - d = string1 + pos; - dend = end_match_1; - } - else - { - d = string2 + pos - size1; - dend = end_match_2; - } - - DEBUG_PRINT1 ("The compiled pattern is: "); - DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); - DEBUG_PRINT1 ("The string to match is: `"); - DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); - DEBUG_PRINT1 ("'\n"); - - /* This loops over pattern commands. It exits by returning from the - function if the match is complete, or it drops through if the match - fails at this starting point in the input data. */ - for (;;) - { - DEBUG_PRINT2 ("\n0x%x: ", p); - - if (p == pend) - { /* End of pattern means we might have succeeded. */ - DEBUG_PRINT1 ("end of pattern ... "); - - /* If we haven't matched the entire string, and we want the - longest match, try backtracking. */ - if (d != end_match_2) - { - DEBUG_PRINT1 ("backtracking.\n"); - - if (!FAIL_STACK_EMPTY ()) - { /* More failure points to try. */ - boolean same_str_p = (FIRST_STRING_P (match_end) - == MATCHING_IN_FIRST_STRING); - - /* If exceeds best match so far, save it. */ - if (!best_regs_set - || (same_str_p && d > match_end) - || (!same_str_p && !MATCHING_IN_FIRST_STRING)) - { - best_regs_set = true; - match_end = d; - - DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - best_regstart[mcnt] = regstart[mcnt]; - best_regend[mcnt] = regend[mcnt]; - } - } - goto fail; - } - - /* If no failure points, don't restore garbage. */ - else if (best_regs_set) - { - restore_best_regs: - /* Restore best match. It may happen that `dend == - end_match_1' while the restored d is in string2. - For example, the pattern `x.*y.*z' against the - strings `x-' and `y-z-', if the two strings are - not consecutive in memory. */ - DEBUG_PRINT1 ("Restoring best registers.\n"); - - d = match_end; - dend = ((d >= string1 && d <= end1) - ? end_match_1 : end_match_2); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = best_regstart[mcnt]; - regend[mcnt] = best_regend[mcnt]; - } - } - } /* d != end_match_2 */ - - DEBUG_PRINT1 ("Accepting match.\n"); - - /* If caller wants register contents data back, do it. */ - if (regs && !bufp->no_sub) - { - /* Have the register data arrays been allocated? */ - if (bufp->regs_allocated == REGS_UNALLOCATED) - { /* No. So allocate them with malloc. We need one - extra element beyond `num_regs' for the `-1' marker - GNU code uses. */ - regs->num_regs = MAX (RE_NREGS, num_regs + 1); - regs->start = TALLOC (regs->num_regs, regoff_t); - regs->end = TALLOC (regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - bufp->regs_allocated = REGS_REALLOCATE; - } - else if (bufp->regs_allocated == REGS_REALLOCATE) - { /* Yes. If we need more elements than were already - allocated, reallocate them. If we need fewer, just - leave it alone. */ - if (regs->num_regs < num_regs + 1) - { - regs->num_regs = num_regs + 1; - RETALLOC (regs->start, regs->num_regs, regoff_t); - RETALLOC (regs->end, regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - } - } - else - assert (bufp->regs_allocated == REGS_FIXED); - - /* Convert the pointer data in `regstart' and `regend' to - indices. Register zero has to be set differently, - since we haven't kept track of any info for it. */ - if (regs->num_regs > 0) - { - regs->start[0] = pos; - regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 - : d - string2 + size1); - } - - /* Go through the first `min (num_regs, regs->num_regs)' - registers, since that is all we initialized. */ - for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) - { - if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) - regs->start[mcnt] = regs->end[mcnt] = -1; - else - { - regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); - regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); - } - } - - /* If the regs structure we return has more elements than - were in the pattern, set the extra elements to -1. If - we (re)allocated the registers, this is the case, - because we always allocate enough to have at least one - -1 at the end. */ - for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; - } /* regs && !bufp->no_sub */ - - FREE_VARIABLES (); - DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", - nfailure_points_pushed, nfailure_points_popped, - nfailure_points_pushed - nfailure_points_popped); - DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); - - mcnt = d - pos - (MATCHING_IN_FIRST_STRING - ? string1 - : string2 - size1); - - DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); - - return mcnt; - } - - /* Otherwise match next pattern command. */ -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - /* Ignore these. Used to ignore the n of succeed_n's which - currently have n == 0. */ - case no_op: - DEBUG_PRINT1 ("EXECUTING no_op.\n"); - break; - - - /* Match the next n pattern characters exactly. The following - byte in the pattern defines n, and the n bytes after that - are the characters to match. */ - case exactn: - mcnt = *p++; - DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); - - /* This is written out as an if-else so we don't waste time - testing `translate' inside the loop. */ - if (translate) - { - do - { - PREFETCH (); - if (translate[(unsigned char) *d++] != (char) *p++) - goto fail; - } - while (--mcnt); - } - else - { - do - { - PREFETCH (); - if (*d++ != (char) *p++) goto fail; - } - while (--mcnt); - } - SET_REGS_MATCHED (); - break; - - - /* Match any character except possibly a newline or a null. */ - case anychar: - DEBUG_PRINT1 ("EXECUTING anychar.\n"); - - PREFETCH (); - - if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') - || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) - goto fail; - - SET_REGS_MATCHED (); - DEBUG_PRINT2 (" Matched `%d'.\n", *d); - d++; - break; - - - case charset: - case charset_not: - { - register unsigned char c; - boolean not = (re_opcode_t) *(p - 1) == charset_not; - - DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); - - PREFETCH (); - c = TRANSLATE (*d); /* The character to match. */ - - /* Cast to `unsigned' instead of `unsigned char' in case the - bit list is a full 32 bytes long. */ - if (c < (unsigned) (*p * BYTEWIDTH) - && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - p += 1 + *p; - - if (!not) goto fail; - - SET_REGS_MATCHED (); - d++; - break; - } - - - /* The beginning of a group is represented by start_memory. - The arguments are the register number in the next byte, and the - number of groups inner to this one in the next. The text - matched within the group is recorded (in the internal - registers data structure) under the register number. */ - case start_memory: - DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); - - /* Find out if this group can match the empty string. */ - p1 = p; /* To send to group_match_null_string_p. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[*p]) - = group_match_null_string_p (&p1, pend, reg_info); - - /* Save the position in the string where we were the last time - we were at this open-group operator in case the group is - operated upon by a repetition operator, e.g., with `(a*)*b' - against `ab'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regstart[*p]) ? d : regstart[*p] - : regstart[*p]; - DEBUG_PRINT2 (" old_regstart: %d\n", - POINTER_TO_OFFSET (old_regstart[*p])); - - regstart[*p] = d; - DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); - - IS_ACTIVE (reg_info[*p]) = 1; - MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* This is the new highest active register. */ - highest_active_reg = *p; - - /* If nothing was active before, this is the new lowest active - register. */ - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *p; - - /* Move past the register number and inner group count. */ - p += 2; - break; - - - /* The stop_memory opcode represents the end of a group. Its - arguments are the same as start_memory's: the register - number, and the number of inner groups. */ - case stop_memory: - DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); - - /* We need to save the string position the last time we were at - this close-group operator in case the group is operated - upon by a repetition operator, e.g., with `((a*)*(b*)*)*' - against `aba'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regend[*p]) ? d : regend[*p] - : regend[*p]; - DEBUG_PRINT2 (" old_regend: %d\n", - POINTER_TO_OFFSET (old_regend[*p])); - - regend[*p] = d; - DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); - - /* This register isn't active anymore. */ - IS_ACTIVE (reg_info[*p]) = 0; - - /* If this was the only register active, nothing is active - anymore. */ - if (lowest_active_reg == highest_active_reg) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - { /* We must scan for the new highest active register, since - it isn't necessarily one less than now: consider - (a(b)c(d(e)f)g). When group 3 ends, after the f), the - new highest active register is 1. */ - unsigned char r = *p - 1; - while (r > 0 && !IS_ACTIVE (reg_info[r])) - r--; - - /* If we end up at register zero, that means that we saved - the registers as the result of an `on_failure_jump', not - a `start_memory', and we jumped to past the innermost - `stop_memory'. For example, in ((.)*) we save - registers 1 and 2 as a result of the *, but when we pop - back to the second ), we are at the stop_memory 1. - Thus, nothing is active. */ - if (r == 0) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - highest_active_reg = r; - } - - /* If just failed to match something this time around with a - group that's operated on by a repetition operator, try to - force exit from the ``loop'', and restore the register - information for this group that we had before trying this - last match. */ - if ((!MATCHED_SOMETHING (reg_info[*p]) - || (re_opcode_t) p[-3] == start_memory) - && (p + 2) < pend) - { - boolean is_a_jump_n = false; - - p1 = p + 2; - mcnt = 0; - switch ((re_opcode_t) *p1++) - { - case jump_n: - is_a_jump_n = true; - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (is_a_jump_n) - p1 += 2; - break; - - default: - /* do nothing */ ; - } - p1 += mcnt; - - /* If the next operation is a jump backwards in the pattern - to an on_failure_jump right before the start_memory - corresponding to this stop_memory, exit from the loop - by forcing a failure after pushing on the stack the - on_failure_jump's jump in the pattern, and d. */ - if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump - && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) - { - /* If this group ever matched anything, then restore - what its registers were before trying this last - failed match, e.g., with `(a*)*b' against `ab' for - regstart[1], and, e.g., with `((a*)*(b*)*)*' - against `aba' for regend[3]. - - Also restore the registers for inner groups for, - e.g., `((a*)(b*))*' against `aba' (register 3 would - otherwise get trashed). */ - - if (EVER_MATCHED_SOMETHING (reg_info[*p])) - { - unsigned r; - - EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* Restore this and inner groups' (if any) registers. */ - for (r = *p; r < *p + *(p + 1); r++) - { - regstart[r] = old_regstart[r]; - - /* xx why this test? */ - if ((int) old_regend[r] >= (int) regstart[r]) - regend[r] = old_regend[r]; - } - } - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - PUSH_FAILURE_POINT (p1 + mcnt, d, -2); - - goto fail; - } - } - - /* Move past the register number and the inner group count. */ - p += 2; - break; - - - /* \<digit> has been turned into a `duplicate' command which is - followed by the numeric value of <digit> as the register number. */ - case duplicate: - { - register const char *d2, *dend2; - int regno = *p++; /* Get which register to match against. */ - DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); - - /* Can't back reference a group which we've never matched. */ - if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) - goto fail; - - /* Where in input to try to start matching. */ - d2 = regstart[regno]; - - /* Where to stop matching; if both the place to start and - the place to stop matching are in the same string, then - set to the place to stop, otherwise, for now have to use - the end of the first string. */ - - dend2 = ((FIRST_STRING_P (regstart[regno]) - == FIRST_STRING_P (regend[regno])) - ? regend[regno] : end_match_1); - for (;;) - { - /* If necessary, advance to next segment in register - contents. */ - while (d2 == dend2) - { - if (dend2 == end_match_2) break; - if (dend2 == regend[regno]) break; - - /* End of string1 => advance to string2. */ - d2 = string2; - dend2 = regend[regno]; - } - /* At end of register contents => success */ - if (d2 == dend2) break; - - /* If necessary, advance to next segment in data. */ - PREFETCH (); - - /* How many characters left in this segment to match. */ - mcnt = dend - d; - - /* Want how many consecutive characters we can match in - one shot, so, if necessary, adjust the count. */ - if (mcnt > dend2 - d2) - mcnt = dend2 - d2; - - /* Compare that many; failure if mismatch, else move - past them. */ - if (translate - ? bcmp_translate (d, d2, mcnt, translate) - : bcmp (d, d2, mcnt)) - goto fail; - d += mcnt, d2 += mcnt; - } - } - break; - - - /* begline matches the empty string at the beginning of the string - (unless `not_bol' is set in `bufp'), and, if - `newline_anchor' is set, after newlines. */ - case begline: - DEBUG_PRINT1 ("EXECUTING begline.\n"); - - if (AT_STRINGS_BEG (d)) - { - if (!bufp->not_bol) break; - } - else if (d[-1] == '\n' && bufp->newline_anchor) - { - break; - } - /* In all other cases, we fail. */ - goto fail; - - - /* endline is the dual of begline. */ - case endline: - DEBUG_PRINT1 ("EXECUTING endline.\n"); - - if (AT_STRINGS_END (d)) - { - if (!bufp->not_eol) break; - } - - /* We have to ``prefetch'' the next character. */ - else if ((d == end1 ? *string2 : *d) == '\n' - && bufp->newline_anchor) - { - break; - } - goto fail; - - - /* Match at the very beginning of the data. */ - case begbuf: - DEBUG_PRINT1 ("EXECUTING begbuf.\n"); - if (AT_STRINGS_BEG (d)) - break; - goto fail; - - - /* Match at the very end of the data. */ - case endbuf: - DEBUG_PRINT1 ("EXECUTING endbuf.\n"); - if (AT_STRINGS_END (d)) - break; - goto fail; - - - /* on_failure_keep_string_jump is used to optimize `.*\n'. It - pushes NULL as the value for the string on the stack. Then - `pop_failure_point' will keep the current value for the - string, instead of restoring it. To see why, consider - matching `foo\nbar' against `.*\n'. The .* matches the foo; - then the . fails against the \n. But the next thing we want - to do is match the \n against the \n; if we restored the - string value, we would be back at the foo. - - Because this is used only in specific cases, we don't need to - check all the things that `on_failure_jump' does, to make - sure the right things get saved on the stack. Hence we don't - share its code. The only reason to push anything on the - stack at all is that otherwise we would have to change - `anychar's code to do something besides goto fail in this - case; that seems worse than this. */ - case on_failure_keep_string_jump: - DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); - - PUSH_FAILURE_POINT (p + mcnt, NULL, -2); - break; - - - /* Uses of on_failure_jump: - - Each alternative starts with an on_failure_jump that points - to the beginning of the next alternative. Each alternative - except the last ends with a jump that in effect jumps past - the rest of the alternatives. (They really jump to the - ending jump of the following alternative, because tensioning - these jumps is a hassle.) - - Repeats start with an on_failure_jump that points past both - the repetition text and either the following jump or - pop_failure_jump back to this on_failure_jump. */ - case on_failure_jump: - on_failure: - DEBUG_PRINT1 ("EXECUTING on_failure_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); - - /* If this on_failure_jump comes right before a group (i.e., - the original * applied to a group), save the information - for that group and all inner ones, so that if we fail back - to this point, the group's information will be correct. - For example, in \(a*\)*\1, we need the preceding group, - and in \(\(a*\)b*\)\2, we need the inner group. */ - - /* We can't use `p' to check ahead because we push - a failure point to `p + mcnt' after we do this. */ - p1 = p; - - /* We need to skip no_op's before we look for the - start_memory in case this on_failure_jump is happening as - the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 - against aba. */ - while (p1 < pend && (re_opcode_t) *p1 == no_op) - p1++; - - if (p1 < pend && (re_opcode_t) *p1 == start_memory) - { - /* We have a new highest active register now. This will - get reset at the start_memory we are about to get to, - but we will have saved all the registers relevant to - this repetition op, as described above. */ - highest_active_reg = *(p1 + 1) + *(p1 + 2); - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *(p1 + 1); - } - - DEBUG_PRINT1 (":\n"); - PUSH_FAILURE_POINT (p + mcnt, d, -2); - break; - - - /* A smart repeat ends with `maybe_pop_jump'. - We change it to either `pop_failure_jump' or `jump'. */ - case maybe_pop_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); - { - register unsigned char *p2 = p; - - /* Compare the beginning of the repeat with what in the - pattern follows its end. If we can establish that there - is nothing that they would both match, i.e., that we - would have to backtrack because of (as in, e.g., `a*a') - then we can change to pop_failure_jump, because we'll - never have to backtrack. - - This is not true in the case of alternatives: in - `(a|ab)*' we do need to backtrack to the `ab' alternative - (e.g., if the string was `ab'). But instead of trying to - detect that here, the alternative has put on a dummy - failure point which is what we will end up popping. */ - - /* Skip over open/close-group commands. */ - while (p2 + 2 < pend - && ((re_opcode_t) *p2 == stop_memory - || (re_opcode_t) *p2 == start_memory)) - p2 += 3; /* Skip over args, too. */ - - /* If we're at the end of the pattern, we can change. */ - if (p2 == pend) - { - /* Consider what happens when matching ":\(.*\)" - against ":/". I don't really understand this code - yet. */ - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 - (" End of pattern: change to `pop_failure_jump'.\n"); - } - - else if ((re_opcode_t) *p2 == exactn - || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) - { - register unsigned char c - = *p2 == (unsigned char) endline ? '\n' : p2[2]; - p1 = p + mcnt; - - /* p1[0] ... p1[2] are the `on_failure_jump' corresponding - to the `maybe_finalize_jump' of this case. Examine what - follows. */ - if ((re_opcode_t) p1[3] == exactn && p1[5] != c) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", - c, p1[5]); - } - - else if ((re_opcode_t) p1[3] == charset - || (re_opcode_t) p1[3] == charset_not) - { - int not = (re_opcode_t) p1[3] == charset_not; - - if (c < (unsigned char) (p1[4] * BYTEWIDTH) - && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - /* `not' is equal to 1 if c would match, which means - that we can't change to pop_failure_jump. */ - if (!not) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - } - } - p -= 2; /* Point at relative address again. */ - if ((re_opcode_t) p[-1] != pop_failure_jump) - { - p[-1] = (unsigned char) jump; - DEBUG_PRINT1 (" Match => jump.\n"); - goto unconditional_jump; - } - /* Note fall through. */ - - - /* The end of a simple repeat has a pop_failure_jump back to - its matching on_failure_jump, where the latter will push a - failure point. The pop_failure_jump takes off failure - points put on by this pop_failure_jump's matching - on_failure_jump; we got through the pattern to here from the - matching on_failure_jump, so didn't fail. */ - case pop_failure_jump: - { - /* We need to pass separate storage for the lowest and - highest registers, even though we don't care about the - actual values. Otherwise, we will restore only one - register from the stack, since lowest will == highest in - `pop_failure_point'. */ - unsigned dummy_low_reg, dummy_high_reg; - unsigned char *pdummy; - const char *sdummy; - - DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); - POP_FAILURE_POINT (sdummy, pdummy, - dummy_low_reg, dummy_high_reg, - reg_dummy, reg_dummy, reg_info_dummy); - } - /* Note fall through. */ - - - /* Unconditionally jump (without popping any failure points). */ - case jump: - unconditional_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ - DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); - p += mcnt; /* Do the jump. */ - DEBUG_PRINT2 ("(to 0x%x).\n", p); - break; - - - /* We need this opcode so we can detect where alternatives end - in `group_match_null_string_p' et al. */ - case jump_past_alt: - DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); - goto unconditional_jump; - - - /* Normally, the on_failure_jump pushes a failure point, which - then gets popped at pop_failure_jump. We will end up at - pop_failure_jump, also, and with a pattern of, say, `a+', we - are skipping over the on_failure_jump, so we have to push - something meaningless for pop_failure_jump to pop. */ - case dummy_failure_jump: - DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); - /* It doesn't matter what we push for the string here. What - the code at `fail' tests is the value for the pattern. */ - PUSH_FAILURE_POINT (0, 0, -2); - goto unconditional_jump; - - - /* At the end of an alternative, we need to push a dummy failure - point in case we are followed by a `pop_failure_jump', because - we don't want the failure point for the alternative to be - popped. For example, matching `(a|ab)*' against `aab' - requires that we match the `ab' alternative. */ - case push_dummy_failure: - DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); - /* See comments just above at `dummy_failure_jump' about the - two zeroes. */ - PUSH_FAILURE_POINT (0, 0, -2); - break; - - /* Have to succeed matching what follows at least n times. - After that, handle like `on_failure_jump'. */ - case succeed_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); - - assert (mcnt >= 0); - /* Originally, this is how many times we HAVE to succeed. */ - if (mcnt > 0) - { - mcnt--; - p += 2; - STORE_NUMBER_AND_INCR (p, mcnt); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); - } - else if (mcnt == 0) - { - DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); - p[2] = (unsigned char) no_op; - p[3] = (unsigned char) no_op; - goto on_failure; - } - break; - - case jump_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); - - /* Originally, this is how many times we CAN jump. */ - if (mcnt) - { - mcnt--; - STORE_NUMBER (p + 2, mcnt); - goto unconditional_jump; - } - /* If don't have to jump any more, skip over the rest of command. */ - else - p += 4; - break; - - case set_number_at: - { - DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - p1 = p + mcnt; - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); - STORE_NUMBER (p1, mcnt); - break; - } - - case wordbound: - DEBUG_PRINT1 ("EXECUTING wordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - break; - goto fail; - - case notwordbound: - DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - goto fail; - break; - - case wordbeg: - DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); - if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) - break; - goto fail; - - case wordend: - DEBUG_PRINT1 ("EXECUTING wordend.\n"); - if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) - && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) - break; - goto fail; - -#ifdef emacs -#ifdef emacs19 - case before_dot: - DEBUG_PRINT1 ("EXECUTING before_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) >= point) - goto fail; - break; - - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) != point) - goto fail; - break; - - case after_dot: - DEBUG_PRINT1 ("EXECUTING after_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) <= point) - goto fail; - break; -#else /* not emacs19 */ - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) - goto fail; - break; -#endif /* not emacs19 */ - - case syntaxspec: - DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchsyntax; - - case wordchar: - DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); - mcnt = (int) Sword; - matchsyntax: - PREFETCH (); - if (SYNTAX (*d++) != (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - - case notsyntaxspec: - DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchnotsyntax; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); - mcnt = (int) Sword; - matchnotsyntax: - PREFETCH (); - if (SYNTAX (*d++) == (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - -#else /* not emacs */ - case wordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); - PREFETCH (); - if (!WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); - PREFETCH (); - if (WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; -#endif /* not emacs */ - - default: - abort (); - } - continue; /* Successfully executed one pattern command; keep going. */ - - - /* We goto here if a matching operation fails. */ - fail: - if (!FAIL_STACK_EMPTY ()) - { /* A restart point is known. Restore to that state. */ - DEBUG_PRINT1 ("\nFAIL:\n"); - POP_FAILURE_POINT (d, p, - lowest_active_reg, highest_active_reg, - regstart, regend, reg_info); - - /* If this failure point is a dummy, try the next one. */ - if (!p) - goto fail; - - /* If we failed to the end of the pattern, don't examine *p. */ - assert (p <= pend); - if (p < pend) - { - boolean is_a_jump_n = false; - - /* If failed to a backwards jump that's part of a repetition - loop, need to pop this failure point and use the next one. */ - switch ((re_opcode_t) *p) - { - case jump_n: - is_a_jump_n = true; - case maybe_pop_jump: - case pop_failure_jump: - case jump: - p1 = p + 1; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - - if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) - || (!is_a_jump_n - && (re_opcode_t) *p1 == on_failure_jump)) - goto fail; - break; - default: - /* do nothing */ ; - } - } - - if (d >= string1 && d <= end1) - dend = end_match_1; - } - else - break; /* Matching at this starting point really fails. */ - } /* for (;;) */ - - if (best_regs_set) - goto restore_best_regs; - - FREE_VARIABLES (); - - return -1; /* Failure to match. */ -} /* re_match_2 */ - -/* Subroutine definitions for re_match_2. */ - - -/* We are passed P pointing to a register number after a start_memory. - - Return true if the pattern up to the corresponding stop_memory can - match the empty string, and false otherwise. - - If we find the matching stop_memory, sets P to point to one past its number. - Otherwise, sets P to an undefined byte less than or equal to END. - - We don't handle duplicates properly (yet). */ - -static boolean -group_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - /* Point to after the args to the start_memory. */ - unsigned char *p1 = *p + 2; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and return true or - false, as appropriate, when we get to one that can't, or to the - matching stop_memory. */ - - switch ((re_opcode_t) *p1) - { - /* Could be either a loop or a series of alternatives. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - /* If the next operation is not a jump backwards in the - pattern. */ - - if (mcnt >= 0) - { - /* Go through the on_failure_jumps of the alternatives, - seeing if any of the alternatives cannot match nothing. - The last alternative starts with only a jump, - whereas the rest start with on_failure_jump and end - with a jump, e.g., here is the pattern for `a|b|c': - - /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 - /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 - /exactn/1/c - - So, we have to first go through the first (n-1) - alternatives and then deal with the last one separately. */ - - - /* Deal with the first (n-1) alternatives, which start - with an on_failure_jump (see above) that jumps to right - past a jump_past_alt. */ - - while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) - { - /* `mcnt' holds how many bytes long the alternative - is, including the ending `jump_past_alt' and - its number. */ - - if (!alt_match_null_string_p (p1, p1 + mcnt - 3, - reg_info)) - return false; - - /* Move to right after this alternative, including the - jump_past_alt. */ - p1 += mcnt; - - /* Break if it's the beginning of an n-th alternative - that doesn't begin with an on_failure_jump. */ - if ((re_opcode_t) *p1 != on_failure_jump) - break; - - /* Still have to check that it's not an n-th - alternative that starts with an on_failure_jump. */ - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) - { - /* Get to the beginning of the n-th alternative. */ - p1 -= 3; - break; - } - } - - /* Deal with the last alternative: go back and get number - of the `jump_past_alt' just before it. `mcnt' contains - the length of the alternative. */ - EXTRACT_NUMBER (mcnt, p1 - 2); - - if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) - return false; - - p1 += mcnt; /* Get past the n-th alternative. */ - } /* if mcnt > 0 */ - break; - - - case stop_memory: - assert (p1[1] == **p); - *p = p1 + 2; - return true; - - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return false; -} /* group_match_null_string_p */ - - -/* Similar to group_match_null_string_p, but doesn't deal with alternatives: - It expects P to be the first byte of a single alternative and END one - byte past the last. The alternative can contain groups. */ - -static boolean -alt_match_null_string_p (p, end, reg_info) - unsigned char *p, *end; - register_info_type *reg_info; -{ - int mcnt; - unsigned char *p1 = p; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and break when we get - to one that can't. */ - - switch ((re_opcode_t) *p1) - { - /* It's a loop. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - break; - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return true; -} /* alt_match_null_string_p */ - - -/* Deals with the ops common to group_match_null_string_p and - alt_match_null_string_p. - - Sets P to one after the op and its arguments, if any. */ - -static boolean -common_op_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - boolean ret; - int reg_no; - unsigned char *p1 = *p; - - switch ((re_opcode_t) *p1++) - { - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbeg: - case wordend: - case wordbound: - case notwordbound: -#ifdef emacs - case before_dot: - case at_dot: - case after_dot: -#endif - break; - - case start_memory: - reg_no = *p1; - assert (reg_no > 0 && reg_no <= MAX_REGNUM); - ret = group_match_null_string_p (&p1, end, reg_info); - - /* Have to set this here in case we're checking a group which - contains a group and a back reference to it. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; - - if (!ret) - return false; - break; - - /* If this is an optimized succeed_n for zero times, make the jump. */ - case jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (mcnt >= 0) - p1 += mcnt; - else - return false; - break; - - case succeed_n: - /* Get to the number of times to succeed. */ - p1 += 2; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - if (mcnt == 0) - { - p1 -= 4; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - } - else - return false; - break; - - case duplicate: - if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) - return false; - break; - - case set_number_at: - p1 += 4; - - default: - /* All other opcodes mean we cannot match the empty string. */ - return false; - } - - *p = p1; - return true; -} /* common_op_match_null_string_p */ - - -/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN - bytes; nonzero otherwise. */ - -static int -bcmp_translate (s1, s2, len, translate) - unsigned char *s1, *s2; - register int len; - char *translate; -{ - register unsigned char *p1 = s1, *p2 = s2; - while (len) - { - if (translate[*p1++] != translate[*p2++]) return 1; - len--; - } - return 0; -} - -/* Entry points for GNU code. */ - -/* re_compile_pattern is the GNU regular expression compiler: it - compiles PATTERN (of length SIZE) and puts the result in BUFP. - Returns 0 if the pattern was valid, otherwise an error string. - - Assumes the `allocated' (and perhaps `buffer') and `translate' fields - are set in BUFP on entry. - - We call regex_compile to do the actual compilation. */ - -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - int length; - struct re_pattern_buffer *bufp; -{ - reg_errcode_t ret; - - /* GNU code is written to assume at least RE_NREGS registers will be set - (and at least one extra will be -1). */ - bufp->regs_allocated = REGS_UNALLOCATED; - - /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by - setting no_sub. */ - bufp->no_sub = 0; - - /* Match anchors at newline. */ - bufp->newline_anchor = 1; - - ret = regex_compile (pattern, length, re_syntax_options, bufp); - - return re_error_msg[(int) ret]; -} - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them if this is an Emacs or POSIX compilation. */ - -#if !defined (emacs) && !defined (_POSIX_SOURCE) - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -re_comp (s) - const char *s; -{ - reg_errcode_t ret; - - if (!s) - { - if (!re_comp_buf.buffer) - return "No previous regular expression"; - return 0; - } - - if (!re_comp_buf.buffer) - { - re_comp_buf.buffer = (unsigned char *) malloc (200); - if (re_comp_buf.buffer == NULL) - return "Memory exhausted"; - re_comp_buf.allocated = 200; - - re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); - if (re_comp_buf.fastmap == NULL) - return "Memory exhausted"; - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - /* Match anchors at newlines. */ - re_comp_buf.newline_anchor = 1; - - ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); - - /* Yes, we're discarding `const' here. */ - return (char *) re_error_msg[(int) ret]; -} - - -int -re_exec (s) - const char *s; -{ - const int len = strlen (s); - return - 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); -} -#endif /* not emacs and not _POSIX_SOURCE */ - -/* POSIX.2 functions. Don't define these for Emacs. */ - -#ifndef emacs - -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' and `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for - the return codes and their meanings.) */ - -int -regcomp (preg, pattern, cflags) - regex_t *preg; - const char *pattern; - int cflags; -{ - reg_errcode_t ret; - unsigned syntax - = (cflags & REG_EXTENDED) ? - RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; - - /* regex_compile will allocate the space for the compiled pattern. */ - preg->buffer = 0; - preg->allocated = 0; - - /* Don't bother to use a fastmap when searching. This simplifies the - REG_NEWLINE case: if we used a fastmap, we'd have to put all the - characters after newlines into the fastmap. This way, we just try - every character. */ - preg->fastmap = 0; - - if (cflags & REG_ICASE) - { - unsigned i; - - preg->translate = (char *) malloc (CHAR_SET_SIZE); - if (preg->translate == NULL) - return (int) REG_ESPACE; - - /* Map uppercase characters to corresponding lowercase ones. */ - for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = ISUPPER (i) ? tolower (i) : i; - } - else - preg->translate = NULL; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - /* It also changes the matching behavior. */ - preg->newline_anchor = 1; - } - else - preg->newline_anchor = 0; - - preg->no_sub = !!(cflags & REG_NOSUB); - - /* POSIX says a null character in the pattern terminates it, so we - can use strlen here in compiling the pattern. */ - ret = regex_compile (pattern, strlen (pattern), syntax, preg); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) ret = REG_EPAREN; - - return (int) ret; -} - - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *preg; - const char *string; - size_t nmatch; - regmatch_t pmatch[]; - int eflags; -{ - int ret; - struct re_registers regs; - regex_t private_preg; - int len = strlen (string); - boolean want_reg_info = !preg->no_sub && nmatch > 0; - - private_preg = *preg; - - private_preg.not_bol = !!(eflags & REG_NOTBOL); - private_preg.not_eol = !!(eflags & REG_NOTEOL); - - /* The user has told us exactly how many registers to return - information about, via `nmatch'. We have to pass that on to the - matching routines. */ - private_preg.regs_allocated = REGS_FIXED; - - if (want_reg_info) - { - regs.num_regs = nmatch; - regs.start = TALLOC (nmatch, regoff_t); - regs.end = TALLOC (nmatch, regoff_t); - if (regs.start == NULL || regs.end == NULL) - return (int) REG_NOMATCH; - } - - /* Perform the searching operation. */ - ret = re_search (&private_preg, string, len, - /* start: */ 0, /* range: */ len, - want_reg_info ? ®s : (struct re_registers *) 0); - - /* Copy the register information to the POSIX structure. */ - if (want_reg_info) - { - if (ret >= 0) - { - unsigned r; - - for (r = 0; r < nmatch; r++) - { - pmatch[r].rm_so = regs.start[r]; - pmatch[r].rm_eo = regs.end[r]; - } - } - - /* If we needed the temporary register info, free the space now. */ - free (regs.start); - free (regs.end); - } - - /* We want zero return to mean success, unlike `re_search'. */ - return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; -} - - -/* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. We don't use PREG here. */ - -size_t -regerror (errcode, preg, errbuf, errbuf_size) - int errcode; - const regex_t *preg; - char *errbuf; - size_t errbuf_size; -{ - const char *msg; - size_t msg_size; - - if (errcode < 0 - || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = re_error_msg[errcode]; - - /* POSIX doesn't require that we do anything in this case, but why - not be nice. */ - if (! msg) - msg = "Success"; - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (errbuf_size != 0) - { - if (msg_size > errbuf_size) - { - strncpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; - } - else - strcpy (errbuf, msg); - } - - return msg_size; -} - - -/* Free dynamically allocated space used by PREG. */ - -void -regfree (preg) - regex_t *preg; -{ - if (preg->buffer != NULL) - free (preg->buffer); - preg->buffer = NULL; - - preg->allocated = 0; - preg->used = 0; - - if (preg->fastmap != NULL) - free (preg->fastmap); - preg->fastmap = NULL; - preg->fastmap_accurate = 0; - - if (preg->translate != NULL) - free (preg->translate); - preg->translate = NULL; -} - -#endif /* not emacs */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/libregex/regex.h b/gnu/libregex/regex.h deleted file mode 100644 index 408dd210348f..000000000000 --- a/gnu/libregex/regex.h +++ /dev/null @@ -1,490 +0,0 @@ -/* Definitions for data structures and routines for the regular - expression library, version 0.12. - - Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifndef __REGEXP_LIBRARY_H__ -#define __REGEXP_LIBRARY_H__ - -/* POSIX says that <sys/types.h> must be included (by the caller) before - <regex.h>. */ - -#ifdef VMS -/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it - should be there. */ -#include <stddef.h> -#endif - - -/* The following bits are used to determine the regexp syntax we - recognize. The set/not-set meanings are chosen so that Emacs syntax - remains the value 0. The bits are given in alphabetical order, and - the definitions shifted by one from the previous bit; thus, when we - add or remove a bit, only one other definition need change. */ -typedef unsigned reg_syntax_t; - -/* If this bit is not set, then \ inside a bracket expression is literal. - If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) - -/* If this bit is not set, then + and ? are operators, and \+ and \? are - literals. - If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) - -/* If this bit is set, then character classes are supported. They are: - [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], - [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. - If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) - -/* If this bit is set, then ^ and $ are always anchors (outside bracket - expressions, of course). - If this bit is not set, then it depends: - ^ is an anchor if it is at the beginning of a regular - expression or after an open-group or an alternation operator; - $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. - - This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because - POSIX draft 11.2 says that * etc. in leading positions is undefined. - We already implemented a previous draft which made those constructs - invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) - -/* If this bit is set, then special characters are always special - regardless of where they are in the pattern. - If this bit is not set, then special characters are special only in - some contexts; otherwise they are ordinary. Specifically, - * + ? and intervals are only special when not after the beginning, - open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) - -/* If this bit is set, then *, +, ?, and { cannot be first in an re or - immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) - -/* If this bit is set, then . matches newline. - If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) - -/* If this bit is set, then . doesn't match NUL. - If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) - -/* If this bit is set, nonmatching lists [^...] do not match newline. - If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) - -/* If this bit is set, either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. - If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -/* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) - -/* If this bit is set, newline is an alternation operator. - If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) - -/* If this bit is set, then `{...}' defines an interval, and \{ and \} - are literals. - If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) - -/* If this bit is set, (...) defines a group, and \( and \) are literals. - If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) - -/* If this bit is set, then \<digit> matches <digit>. - If not set, then \<digit> is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) - -/* If this bit is set, then | is an alternation operator, and \| is literal. - If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) - -/* If this bit is set, then an ending range point collating higher - than the starting range point, as in [z-a], is invalid. - If not set, then when ending range point collates higher than the - starting range point, the range is ignored. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) - -/* If this bit is set, then an unmatched ) is ordinary. - If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) - -/* This global variable defines the particular regexp syntax to use (for - some interfaces). When a regexp is compiled, the syntax used is - stored in the pattern buffer, so changing this does not affect - already-compiled regexps. */ -extern reg_syntax_t re_syntax_options; - -/* Define combinations of the above bits for the standard possibilities. - (The [[[ comments delimit what gets put into the Texinfo file, so - don't delete them!) */ -/* [[[begin syntaxes]]] */ -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -#define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) - -/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - -/* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) -/* [[[end syntaxes]]] */ - -/* Maximum number of duplicates an interval can allow. Some systems - (erroneously) define this in other header files, but we want our - value, so remove any previous define. */ -#ifdef RE_DUP_MAX -#undef RE_DUP_MAX -#endif -#define RE_DUP_MAX ((1 << 15) - 1) - - -/* POSIX `cflags' bits (i.e., information for `regcomp'). */ - -/* If this bit is set, then use extended regular expression syntax. - If not set, then use basic regular expression syntax. */ -#define REG_EXTENDED 1 - -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -#define REG_ICASE (REG_EXTENDED << 1) - -/* If this bit is set, then anchors do not match at newline - characters in the string. - If not set, then anchors do match at newlines. */ -#define REG_NEWLINE (REG_ICASE << 1) - -/* If this bit is set, then report only success or fail in regexec. - If not set, then returns differ between not matching and errors. */ -#define REG_NOSUB (REG_NEWLINE << 1) - - -/* POSIX `eflags' bits (i.e., information for regexec). */ - -/* If this bit is set, then the beginning-of-line operator doesn't match - the beginning of the string (presumably because it's not the - beginning of a line). - If not set, then the beginning-of-line operator does match the - beginning of the string. */ -#define REG_NOTBOL 1 - -/* Like REG_NOTBOL, except for the end-of-line. */ -#define REG_NOTEOL (1 << 1) - - -/* If any error codes are removed, changed, or added, update the - `re_error_msg' table in regex.c. */ -typedef enum -{ - REG_NOERROR = 0, /* Success. */ - REG_NOMATCH, /* Didn't find a match (for regexec). */ - - /* POSIX regcomp return error codes. (In the order listed in the - standard.) */ - REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Not implemented. */ - REG_ECTYPE, /* Invalid character class name. */ - REG_EESCAPE, /* Trailing backslash. */ - REG_ESUBREG, /* Invalid back reference. */ - REG_EBRACK, /* Unmatched left bracket. */ - REG_EPAREN, /* Parenthesis imbalance. */ - REG_EBRACE, /* Unmatched \{. */ - REG_BADBR, /* Invalid contents of \{\}. */ - REG_ERANGE, /* Invalid range end. */ - REG_ESPACE, /* Ran out of memory. */ - REG_BADRPT, /* No preceding re for repetition op. */ - - /* Error codes we've added. */ - REG_EEND, /* Premature end. */ - REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ - REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ -} reg_errcode_t; - -/* This data structure represents a compiled pattern. Before calling - the pattern compiler, the fields `buffer', `allocated', `fastmap', - `translate', and `no_sub' can be set. After the pattern has been - compiled, the `re_nsub' field is available. All other fields are - private to the regex routines. */ - -struct re_pattern_buffer -{ -/* [[[begin pattern_buffer]]] */ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -/* [[[end pattern_buffer]]] */ -}; - -typedef struct re_pattern_buffer regex_t; - - -/* search.c (search_buffer) in Emacs needs this one opcode value. It is - defined both in `regex.c' and here. */ -#define RE_EXACTN_VALUE 1 - -/* Type for byte offsets within the string. POSIX mandates this. */ -typedef int regoff_t; - - -/* This is the structure we store register match data in. See - regex.texinfo for a full description of what registers match. */ -struct re_registers -{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -}; - - -/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, - `re_match_2' returns information about at least this many registers - the first time a `regs' structure is passed. */ -#ifndef RE_NREGS -#define RE_NREGS 30 -#endif - - -/* POSIX specification for registers. Aside from the different names than - `re_registers', POSIX uses an array of structures, instead of a - structure of arrays. */ -typedef struct -{ - regoff_t rm_so; /* Byte offset from string's start to substring's start. */ - regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ -} regmatch_t; - -/* Declarations for routines. */ - -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -#define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -#define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - -/* Sets the current default syntax to SYNTAX, and return the old syntax. - You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); - -/* Compile the regular expression PATTERN, with length LENGTH - and syntax given by the global `re_syntax_options', into the buffer - BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, int length, - struct re_pattern_buffer *buffer)); - - -/* Compile a fastmap for the compiled pattern in BUFFER; used to - accelerate searches. Return 0 if successful and -2 if was an - internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); - - -/* Search in the string STRING (with length LENGTH) for the pattern - compiled into BUFFER. Start searching at position START, for RANGE - characters. Return the starting position of the match, -1 for no - match, or -2 for an internal error. Also return register - information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); - - -/* Like `re_search', but search in the concatenation of STRING1 and - STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); - - -/* Like `re_search', but return how many characters in STRING the regexp - in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); - - -/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); - - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using BUFFER and REGS will use this memory - for recording register information. STARTS and ENDS must be - allocated with malloc, and must each be at least `NUM_REGS * sizeof - (regoff_t)' bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); - -/* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); - -/* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); -extern int regexec - _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags)); -extern size_t regerror - _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, - size_t errbuf_size)); -extern void regfree _RE_ARGS ((regex_t *preg)); - -#endif /* not __REGEXP_LIBRARY_H__ */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/libregex/test/ChangeLog b/gnu/libregex/test/ChangeLog deleted file mode 100644 index f0265bb26a6a..000000000000 --- a/gnu/libregex/test/ChangeLog +++ /dev/null @@ -1,77 +0,0 @@ -Thu Mar 25 21:23:43 1993 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * debugmalloc.c: #include <string.h>, and remove declaration of - memcpy. - -Sun Dec 13 20:59:32 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * tregress.c (test_regress): Add regression test for matching - "[a-a]" against "a" with the upcase translation map. - - * iregex.c (print_regs): Don't print a newline after the register - contents. - (main): Instead, write out newlines here after printing match and - search results; this way, we get a newline whether or not the - pattern matched. - -Fri Dec 11 03:30:50 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu) - - * tregress.c (test_regress): Add new test to catch bug fixed by - change to regex.c today. - - * Makefile.in (dregex.o): Depend on `../regex.[ch]', not `regex.[ch]'. - -Sun Nov 15 07:51:40 1992 Karl Berry (karl@cs.umb.edu) - - * debugmalloc.c (memcpy): Declare; also, include <assert.h>. - - * psx-interf.c (fill_pmatch): Declare offsets as `regoff_t' - instead of `off_t'. - -Thu Nov 12 11:29:58 1992 Karl Berry (karl@cs.umb.edu) - - * iregex.c (main): Remove unused variable `c'; initialize - the char array in C code; only call print_regs if the match and - search succeeded. - (strlen): Declare. - - * tregress.c (test_regress): Bug from enami. - -Tue Nov 10 10:36:53 1992 Karl Berry (karl@cs.umb.edu) - - * tregress.c (test_regress): Remove Emacs 19 diff bug from rms, as - it was never the right thing to test anyway, and the test itself - had bugs in it. - -Mon Nov 9 10:09:40 1992 Karl Berry (karl@cs.umb.edu) - - * tregress.c (test_regress): Bug from meyering. - -Thu Sep 24 10:48:34 1992 Karl Berry (karl@cs.umb.edu) - - * Makefile.in: avoid $< (except in implicit rule). - -Sat Sep 19 15:38:29 1992 Karl Berry (karl@hayley) - - * Makefile.in (TAGS): include regex.c and regex.h. - -Wed Sep 16 09:29:27 1992 Karl Berry (karl@hayley) - - * xmalloc.c (xmalloc): use char *, not void *, as some compilers - bomb out on the latter. - - * Makefile.in (LOADLIBES): use LIBS instead, as that what's - Autoconf wants to define. - - * other.c: remove tests for ^/$ around newlines. - -Tue Sep 15 11:01:15 1992 Karl Berry (karl@hayley) - - * fileregex.c (main): call re_search_2 instead of re_search. - - * Makefile.in (regex.o): make target dregex.o, so VPATH doesn't - find ../regex.o. - -Sun Sep 13 06:50:03 1992 Karl Berry (karl@hayley) - - * Created. diff --git a/gnu/libregex/test/Makefile b/gnu/libregex/test/Makefile deleted file mode 100644 index 5a8656a76e9a..000000000000 --- a/gnu/libregex/test/Makefile +++ /dev/null @@ -1,169 +0,0 @@ -# Generated automatically from Makefile.in by configure. -# Makefile for regex testing. -# -# Copyright (C) 1992 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -CPPFLAGS = -CFLAGS = -g -LDFLAGS = - -srcdir = . -VPATH = .:../. - -CC = gcc -DEFS = -DHAVE_STRING_H=1 -LIBS = $(LOADLIBES) - -ETAGS = etags -SHELL = /bin/sh - -debug = -DDEBUG -ALL_CPPFLAGS = -I. -I$(srcdir) -I../$(srcdir) $(DEFS) $(CPPFLAGS) $(debug) - -.c.o: - $(CC) $(ALL_CPPFLAGS) $(CFLAGS) -c $< - - -# Define this as `../regex.o' to get the optimized version. -regex_o = dregex.o -test_h = test.h -test_o = test.o bsd-interf.o other.o tregress.o psx-basic.o psx-extend.o \ - psx-generic.o psx-group.o psx-interf.o psx-interv.o -common_o = printchar.o upcase.o xmalloc.o $(malloc) - -# We have a lot of mallocs we can try when we run afoul of strange bugs. -malloc = -#malloc = # the libc malloc -#malloc = g++malloc.o -#malloc = debugmalloc.o -#malloc = emacsmalloc.o -emacsmallocflags = -Drcheck -Dbotch=abort -DUSG - -# default is to do nothing. -default: - -all: regex syntax - -regex: $(regex_o) $(common_o) $(test_o) main.o - $(CC) -o $@ $(LDFLAGS) $^ $(LIBS) - -# As long as we're doing tests, we enable debugging. -dregex.o: ../regex.c ../regex.h - rm -f $@ - $(CC) $(ALL_CPPFLAGS) $(CFLAGS) -c ../$(srcdir)/regex.c - mv regex.o $@ - -# iregex is the interactive regex. -iregex: $(common_o) $(regex_o) iregex.o - $(CC) -o $@ $(LDFLAGS) $^ $(LIBS) - -# fileregex searches for an r.e. in every line of a given file. -fileregex_o = fileregex.o printchar.o $(regex_o) -fileregex: $(fileregex_o) - $(CC) -o $@ $(LDFLAGS) $(fileregex_o) $(LIBS) - -# cppregex is regex with a preprocessed regex.c. Useful when the -# problem is inside some macro. -cppregex: regexcpp.o $(common_o) $(test_o) main.o - $(CC) -o $@ $(LDFLAGS) $^ $(LIBS) - -regexcpp.o: regexcpp.c - -regexcpp.c: regex.c regexcpp.sed - rm -f regexcpp.c - $(CC) -E $(ALL_CPPFLAGS) ../$(srcdir)/regex.c \ - | egrep -v '^#|^ *$$' \ - | sed -f regexcpp.sed \ - > regexcpp.c - chmod a-w regexcpp.c - -# Have to give this malloc special flags. -emacsmalloc.o: emacsmalloc.c - $(CC) -c $(CFLAGS) $(ALL_CPPFLAGS) $(emacsmallocflags) \ - ../$(srcdir)/test/emacsmalloc.c - -syntax: syntax.o - $(CC) $(CFLAGS) -o $@ syntax.o - -syntax.c: syntax.skel bits - sed '/\[\[\[replace.*\]\]\]/r bits' syntax.skel > $@ - -bits: regex.h - sed -n 1,/RE_SYNTAX_EMACS/p ../$(srcdir)/regex.h \ - | grep "#define RE_.*1" \ - | sed 's/^#define \(RE_[A-Z_]*\) .*/ TEST_BIT (\1);/' > $@ - -check: regex - ./regex - -TAGS: regex.c regex.h *.h *.c - $(ETAGS) -t $^ - -depend: - gcc -MM $(ALL_CPPFLAGS) *.c > /tmp/depend -.PHONY: depend - -install: -.PHONY: install - -clean mostlyclean: - rm -f *.o regex cppregex iregex fileregex regexcpp.c syntax - -distclean: clean - rm -f bits syntax.c Makefile - -extraclean: distclean - rm -f *~* *\#* patch* *.orig *.rej *.bak core a.out - -realclean: distclean - rm -f TAGS - -Makefile: Makefile.in ../config.status - (cd ..; sh config.status) - -# Prevent GNU make 3 from overflowing arg limit on system V. -.NOEXPORT: - -# Assumes $(distdir) is the place to put our files. -distfiles = ChangeLog TAGS *.in *.c *.h regexcpp.sed syntax.skel -dist: Makefile TAGS - mkdir $(distdir) - ln $(distfiles) $(distdir) - -# Automatically-generated dependencies below here. -alloca.o : alloca.c -bsd-interf.o : bsd-interf.c -debugmalloc.o : debugmalloc.c -emacsmalloc.o : emacsmalloc.c getpagesize.h -fileregex.o : fileregex.c .././regex.h -g++malloc.o : g++malloc.c //usr/include/stdio.h getpagesize.h -iregex.o : iregex.c .././regex.h -main.o : main.c test.h .././regex.h -malloc-test.o : malloc-test.c -other.o : other.c test.h .././regex.h -printchar.o : printchar.c -psx-basic.o : psx-basic.c test.h .././regex.h -psx-extend.o : psx-extend.c test.h .././regex.h -psx-generic.o : psx-generic.c test.h .././regex.h -psx-group.o : psx-group.c test.h .././regex.h -psx-interf.o : psx-interf.c test.h .././regex.h -psx-interv.o : psx-interv.c test.h .././regex.h -syntax.o : syntax.c .././regex.h -test.o : test.c test.h .././regex.h -tregress.o : tregress.c test.h .././regex.h -upcase.o : upcase.c -xmalloc.o : xmalloc.c diff --git a/gnu/libregex/test/Makefile.in b/gnu/libregex/test/Makefile.in deleted file mode 100644 index b6a413384f08..000000000000 --- a/gnu/libregex/test/Makefile.in +++ /dev/null @@ -1,168 +0,0 @@ -# Makefile for regex testing. -# -# Copyright (C) 1992 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -CPPFLAGS = -CFLAGS = -g -LDFLAGS = - -srcdir = @srcdir@ -VPATH = @srcdir@:../@srcdir@ - -CC = @CC@ -DEFS = @DEFS@ -LIBS = @LIBS@ $(LOADLIBES) - -ETAGS = etags -SHELL = /bin/sh - -debug = -DDEBUG -ALL_CPPFLAGS = -I. -I$(srcdir) -I../$(srcdir) $(DEFS) $(CPPFLAGS) $(debug) - -.c.o: - $(CC) $(ALL_CPPFLAGS) $(CFLAGS) -c $< - - -# Define this as `../regex.o' to get the optimized version. -regex_o = dregex.o -test_h = test.h -test_o = test.o bsd-interf.o other.o tregress.o psx-basic.o psx-extend.o \ - psx-generic.o psx-group.o psx-interf.o psx-interv.o -common_o = printchar.o upcase.o xmalloc.o $(malloc) - -# We have a lot of mallocs we can try when we run afoul of strange bugs. -malloc = @ALLOCA@ -#malloc = # the libc malloc -#malloc = g++malloc.o -#malloc = debugmalloc.o -#malloc = emacsmalloc.o -emacsmallocflags = -Drcheck -Dbotch=abort -DUSG - -# default is to do nothing. -default: - -all: regex syntax - -regex: $(regex_o) $(common_o) $(test_o) main.o - $(CC) -o $@ $(LDFLAGS) $^ $(LIBS) - -# As long as we're doing tests, we enable debugging. -dregex.o: ../regex.c ../regex.h - rm -f $@ - $(CC) $(ALL_CPPFLAGS) $(CFLAGS) -c ../$(srcdir)/regex.c - mv regex.o $@ - -# iregex is the interactive regex. -iregex: $(common_o) $(regex_o) iregex.o - $(CC) -o $@ $(LDFLAGS) $^ $(LIBS) - -# fileregex searches for an r.e. in every line of a given file. -fileregex_o = fileregex.o printchar.o $(regex_o) -fileregex: $(fileregex_o) - $(CC) -o $@ $(LDFLAGS) $(fileregex_o) $(LIBS) - -# cppregex is regex with a preprocessed regex.c. Useful when the -# problem is inside some macro. -cppregex: regexcpp.o $(common_o) $(test_o) main.o - $(CC) -o $@ $(LDFLAGS) $^ $(LIBS) - -regexcpp.o: regexcpp.c - -regexcpp.c: regex.c regexcpp.sed - rm -f regexcpp.c - $(CC) -E $(ALL_CPPFLAGS) ../$(srcdir)/regex.c \ - | egrep -v '^#|^ *$$' \ - | sed -f regexcpp.sed \ - > regexcpp.c - chmod a-w regexcpp.c - -# Have to give this malloc special flags. -emacsmalloc.o: emacsmalloc.c - $(CC) -c $(CFLAGS) $(ALL_CPPFLAGS) $(emacsmallocflags) \ - ../$(srcdir)/test/emacsmalloc.c - -syntax: syntax.o - $(CC) $(CFLAGS) -o $@ syntax.o - -syntax.c: syntax.skel bits - sed '/\[\[\[replace.*\]\]\]/r bits' syntax.skel > $@ - -bits: regex.h - sed -n 1,/RE_SYNTAX_EMACS/p ../$(srcdir)/regex.h \ - | grep "#define RE_.*1" \ - | sed 's/^#define \(RE_[A-Z_]*\) .*/ TEST_BIT (\1);/' > $@ - -check: regex - ./regex - -TAGS: regex.c regex.h *.h *.c - $(ETAGS) -t $^ - -depend: - gcc -MM $(ALL_CPPFLAGS) *.c > /tmp/depend -.PHONY: depend - -install: -.PHONY: install - -clean mostlyclean: - rm -f *.o regex cppregex iregex fileregex regexcpp.c syntax - -distclean: clean - rm -f bits syntax.c Makefile - -extraclean: distclean - rm -f *~* *\#* patch* *.orig *.rej *.bak core a.out - -realclean: distclean - rm -f TAGS - -Makefile: Makefile.in ../config.status - (cd ..; sh config.status) - -# Prevent GNU make 3 from overflowing arg limit on system V. -.NOEXPORT: - -# Assumes $(distdir) is the place to put our files. -distfiles = ChangeLog TAGS *.in *.c *.h regexcpp.sed syntax.skel -dist: Makefile TAGS - mkdir $(distdir) - ln $(distfiles) $(distdir) - -# Automatically-generated dependencies below here. -alloca.o : alloca.c -bsd-interf.o : bsd-interf.c -debugmalloc.o : debugmalloc.c -emacsmalloc.o : emacsmalloc.c getpagesize.h -fileregex.o : fileregex.c .././regex.h -g++malloc.o : g++malloc.c //usr/include/stdio.h getpagesize.h -iregex.o : iregex.c .././regex.h -main.o : main.c test.h .././regex.h -malloc-test.o : malloc-test.c -other.o : other.c test.h .././regex.h -printchar.o : printchar.c -psx-basic.o : psx-basic.c test.h .././regex.h -psx-extend.o : psx-extend.c test.h .././regex.h -psx-generic.o : psx-generic.c test.h .././regex.h -psx-group.o : psx-group.c test.h .././regex.h -psx-interf.o : psx-interf.c test.h .././regex.h -psx-interv.o : psx-interv.c test.h .././regex.h -syntax.o : syntax.c .././regex.h -test.o : test.c test.h .././regex.h -tregress.o : tregress.c test.h .././regex.h -upcase.o : upcase.c -xmalloc.o : xmalloc.c diff --git a/gnu/libregex/test/TAGS b/gnu/libregex/test/TAGS deleted file mode 100644 index d3aad750dcba..000000000000 --- a/gnu/libregex/test/TAGS +++ /dev/null @@ -1,373 +0,0 @@ - -.././regex.c,4137 -#define AT_STRINGS_BEG(3078,98376 -#define AT_STRINGS_END(3079,98449 -#define AT_WORD_BOUNDARY(3093,99002 -#define BUF_PUSH(887,24995 -#define BUF_PUSH_2(895,25208 -#define BUF_PUSH_3(904,25437 -#define DEBUG_POP(2336,74614 -#define DEBUG_PRINT1(471,14296 -#define DEBUG_PRINT1(785,21263 -#define DEBUG_PRINT2(472,14342 -#define DEBUG_PRINT3(473,14398 -#define DEBUG_PRINT3(787,21316 -#define DEBUG_PRINT4(474,14462 -#define DEBUG_PRINT_COMPILED_PATTERN(475,14534 -#define DEBUG_PRINT_COMPILED_PATTERN(789,21386 -#define DEBUG_PRINT_DOUBLE_STRING(477,14637 -#define DEBUG_PUSH(2338,74684 -#define DEBUG_STATEMENT(470,14267 -#define DOUBLE_FAIL_STACK(2299,73230 -#define EVER_MATCHED_SOMETHING(3028,96680 -#define EXTEND_BUFFER(941,26834 -#define EXTRACT_NUMBER(403,12499 -#define EXTRACT_NUMBER(422,12960 -#define EXTRACT_NUMBER_AND_INCR(430,13181 -#define EXTRACT_NUMBER_AND_INCR(448,13583 -#define FAIL_STACK_EMPTY(2271,72289 -#define FAIL_STACK_FULL(2273,72404 -#define FAIL_STACK_PTR_EMPTY(2272,72344 -#define FAIL_STACK_TOP(2274,72473 -#define FIRST_STRING_P(221,5848 -#define FREE_VAR(3100,99186 -#define FREE_VARIABLES(3101,99240 -#define FREE_VARIABLES(3116,99751 -#define GET_BUFFER_SPACE(882,24802 -#define GET_UNSIGNED_NUMBER(1017,29312 -#define INIT_FAIL_STACK(2279,72612 -#define INSERT_JUMP(923,26079 -#define INSERT_JUMP2(927,26236 -#define ISALNUM(147,3407 -#define ISALPHA(148,3455 -#define ISBLANK(135,3062 -#define ISBLANK(137,3116 -#define ISCNTRL(149,3503 -#define ISDIGIT(146,3359 -#define ISGRAPH(140,3185 -#define ISGRAPH(142,3239 -#define ISLOWER(150,3551 -#define ISPRINT(145,3311 -#define ISPUNCT(151,3599 -#define ISSPACE(152,3647 -#define ISUPPER(153,3695 -#define ISXDIGIT(154,3743 -#define IS_ACTIVE(3026,96578 -#define IS_CHAR_CLASS(1035,29793 -#define MATCHED_SOMETHING(3027,96621 -#define MAX(233,6292 -#define MIN(234,6334 -#define PATFETCH(852,23769 -#define PATFETCH_RAW(860,24020 -#define POINTER_TO_OFFSET(3050,97433 -#define POP_FAILURE_ITEM(2331,74426 -#define POP_FAILURE_POINT(2461,79538 -#define PREFETCH(3064,97916 -#define PUSH_FAILURE_ITEM(2327,74253 -#define PUSH_FAILURE_POINT(2352,75048 -#define PUSH_PATTERN_OP(2317,73841 -#define REGEX_REALLOCATE(185,4875 -#define REGEX_REALLOCATE(210,5495 -#define REGEX_TALLOC(227,6137 -#define REG_MATCH_NULL_STRING_P(3025,96511 -#define REG_UNSET(3055,97649 -#define RETALLOC(226,6058 -#define SET_LIST_BIT(1011,29089 -#define SET_REGS_MATCHED(3034,96936 -#define SIGN_EXTEND_CHAR(166,4109 -#define SIGN_EXTEND_CHAR(169,4217 -#define STORE_JUMP(915,25800 -#define STORE_JUMP2(919,25917 -#define STORE_NUMBER(384,11919 -#define STORE_NUMBER_AND_INCR(394,12242 -#define STREQ(231,6244 -#define SYNTAX(120,2790 -#define TALLOC(225,6003 -#define TRANSLATE(873,24503 -#define WORDCHAR_P(3086,98755 -alt_match_null_string_p 4466,149039 -#define assert(782,21217 -at_begline_loc_p 2131,67979 -at_endline_loc_p 2150,68557 -#define bcmp(54,1656 -bcmp_translate 4591,151831 -#define bcopy(57,1726 -typedef char boolean;236,6377 -#define bzero(60,1793 -common_op_match_null_string_p 4503,149895 -compile_range 2200,69997 -} compile_stack_elt_t;990,28602 -} compile_stack_type;998,28748 -extract_number 411,12714 -extract_number_and_incr 438,13370 -} fail_stack_type;2269,72269 -group_in_compile_stack 2172,69174 -group_match_null_string_p 4357,145267 -init_syntax_once 94,2365 -insert_op1 2091,67107 -insert_op2 2110,67475 -#define isascii(131,3018 -typedef int pattern_offset_t;981,28388 -print_compiled_pattern 726,19792 -print_double_string 753,20605 -print_fastmap 486,14835 -print_partial_compiled_pattern 518,15475 -re_comp 4650,153479 -re_compile_fastmap 2532,82428 -re_compile_pattern 4617,152520 -re_exec 4688,154373 -re_match 3136,100557 -re_match_2 3161,101399 -} re_opcode_t;378,11781 -re_search 2844,90872 -re_search_2 2877,91998 -re_set_registers 2817,90247 -re_set_syntax 808,22087 -regcomp 4736,155972 -regerror 4876,160188 -regex_compile 1062,30922 -regexec 4811,158371 -regfree 4920,161247 -} register_info_type;3023,96488 -typedef unsigned regnum_t;974,28172 -store_op1 2063,66535 -store_op2 2076,66768 -typedef const unsigned 2262,72103 - -.././regex.h,230 -#define _RE_ARGS(394,14981 -#define _RE_ARGS(398,15036 -} reg_errcode_t;270,10874 -typedef unsigned reg_syntax_t;38,1503 -typedef struct re_pattern_buffer regex_t;346,13556 -} regmatch_t;382,14634 -typedef int regoff_t;354,13814 - -getpagesize.h,84 -#define getpagesize(12,137 -#define getpagesize(15,191 -#define getpagesize(20,302 - -test.h,436 -#define BRACES_TO_OPS(107,3169 -#define INVALID_PATTERN(110,3328 -#define MATCH_SELF(114,3429 -#define PARENS_TO_OPS(108,3248 -#define SAFE_STRLEN(14,201 -#define TEST_POSITIONED_MATCH(116,3470 -#define TEST_REGISTERS(104,3011 -#define TEST_REGISTERS_2(97,2703 -#define TEST_SEARCH(127,3875 -#define TEST_SEARCH_2(123,3720 -#define TEST_TRUNCATED_MATCH(120,3608 -typedef enum { false = 0, true = 1 } boolean;16,255 -} test_type;33,572 - -alloca.c,128 -alloca 141,3996 -find_stack_direction 85,2553 -} header;127,3538 -typedef void *pointer;51,1721 -typedef char *pointer;53,1778 - -bsd-interf.c,51 -test_berk_search 8,106 -test_bsd_interface 33,738 - -debugmalloc.c,395 -#define TRACE(8,143 -#define TRACE1(9,197 -#define TRACE2(10,254 -#define TRACE3(11,319 -#define TRACE4(12,392 -#define USER_ALLOC(61,1440 -typedef char *address;15,480 -} *chunk;54,1225 -chunk_delete 115,2778 -chunk_insert 96,2294 -chunk_to_mem 79,1916 -free 261,5604 -free_list_available 175,3947 -malloc 203,4343 -mem_to_chunk 68,1703 -realloc 242,5309 -validate_list 153,3478 -xsbrk 21,545 - -emacsmalloc.c,574 -#define ASSERT(178,5884 -#define ASSERT(181,5985 -#define CHAIN(166,5430 -#define bcmp(73,2821 -#define bcopy(72,2777 -#define bzero(74,2868 -calloc 603,15983 -free 484,13255 -get_lim_data 736,18517 -get_lim_data 752,18767 -get_lim_data 759,18860 -getpool 374,10263 -malloc 413,11133 -malloc_init 218,6863 -malloc_mem_free 707,17940 -malloc_mem_used 688,17683 -malloc_stats 663,17320 -malloc_usable_size 233,7147 -memalign 618,16164 -morecore 244,7380 -realloc 541,14424 -#define start_of_data(110,3486 -#define start_of_data(115,3546 -sys_sbrk 815,20804 -valloc 645,17031 - -fileregex.c,13 -main 11,156 - -g++malloc.c,1543 -#define UPDATE_STATS(33,1090 -#define UPDATE_STATS(35,1131 -static inline int aligned_OK(343,11189 -void* calloc(1039,28692 -void cfree(1048,28894 -static inline void* chunk2mem(619,19336 -#define clear_inuse(592,18767 -static inline void consollink(716,21398 -static void do_free_stats(544,18016 -static void do_malloc_stats(534,17741 -766,22304 -extern 762,22235 - for 1260,34165 -void free(1028,28553 -static inline void frontlink(732,21717 -static unsigned int gcd(557,18251 - if 1212,32427 - if 1216,32582 - if 1220,32737 - if 1224,32880 - if 1229,33094 - if 1233,33251 - if 1238,33463 - if 1242,33609 - if 1247,33739 -#define inuse(590,18680 -static inline unsigned int lcm(580,18540 -void* malloc(939,26370 -static mchunkptr malloc_find_space(858,24561 -void malloc_stats(1201,32256 -unsigned int malloc_usable_size(1054,28936 -static volatile void malloc_user_error(286,9757 -static void malloc_user_error(288,9804 -typedef struct malloc_bin* mbinptr;320,10636 -typedef struct malloc_chunk* mchunkptr;309,10247 -static inline mchunkptr mem2chunk(643,19759 -void* memalign(1118,30363 -#define next_chunk(600,18910 -#define prev_chunk(604,19023 -void* realloc(1071,29263 -static inline unsigned int request2size(335,10993 -mchunkptr sanity_check(628,19486 -#define set_inuse(591,18723 -static inline void set_size(609,19149 -static inline mbinptr size2bin(499,16914 -static inline void split(685,20463 -static 768,22312 -static inline void unlink(671,20263 -void* valloc(1194,32107 -typedef volatile void 760,22184 -764,22271 - -iregex.c,54 -main 20,390 -print_regs 141,2638 -scanstring 87,1839 - -main.c,13 -main 12,242 - -malloc-test.c,112 -#define BITS_BLOCK(12,168 -#define BITS_MASK(13,228 -} bits_list_type;6,56 -init_bits_list 16,311 -main(32,621 - -other.c,18 -test_others 6,96 - -printchar.c,15 -printchar 2,5 - -psx-basic.c,23 -test_posix_basic 7,84 - -psx-extend.c,26 -test_posix_extended 7,88 - -psx-generic.c,26 -test_posix_generic 8,117 - -psx-group.c,20 -test_grouping 7,92 - -psx-interf.c,416 -fill_pmatch 174,4802 -get_error_string 18,260 -init_pattern_buffer 49,1434 -test_compile 67,1925 -test_eflags 245,6876 -test_error_code_allocation 562,16619 -test_error_code_message 524,15247 -test_ignore_case 303,8525 -test_newline 330,9199 -test_nsub 117,3319 -test_pmatch 188,5121 -test_posix_interface 614,18719 -test_posix_match 359,9938 -test_regcomp 138,3725 -test_regerror 592,17621 -test_regexec 394,10783 - -psx-interv.c,21 -test_intervals 6,93 - -test.c,607 -#define SET_FASTMAP(447,13999 -#define bcmp(18,362 -#define bcopy(19,415 -#define bzero(20,473 -compile_and_print_pattern 666,19653 -concat 97,2673 -delimiters_to_ops 571,17477 -general_test 115,2996 -invalid_pattern 542,16821 -#define memcmp(26,611 -#define memcpy(27,660 -print_pattern_info 635,18998 -set_all_registers 58,1390 -test_all_registers 506,15567 -test_case_fold 682,19993 -test_fastmap 460,14363 -test_fastmap_search 474,14668 -test_match 776,22235 -test_match_2 766,22040 -test_match_n_times 715,20798 -test_search_return 408,13011 -valid_nonposix_pattern 646,19239 -valid_pattern 557,17182 - -tregress.c,208 -#define SIMPLE_MATCH(74,1463 -#define SIMPLE_NONMATCH(75,1528 -do_match 78,1599 -itoa 10,199 -simple_compile 44,882 -simple_fail 21,353 -simple_fastmap 55,1115 -simple_search 100,2020 -test_regress 124,2513 - -upcase.c,0 - -xmalloc.c,14 -xmalloc 9,87 diff --git a/gnu/libregex/test/alloca.c b/gnu/libregex/test/alloca.c deleted file mode 100644 index c1ff22227f8e..000000000000 --- a/gnu/libregex/test/alloca.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - alloca -- (mostly) portable public-domain implementation -- D A Gwyn - - last edit: 86/05/30 rms - include config.h, since on VMS it renames some symbols. - Use xmalloc instead of malloc. - - This implementation of the PWB library alloca() function, - which is used to allocate space off the run-time stack so - that it is automatically reclaimed upon procedure exit, - was inspired by discussions with J. Q. Johnson of Cornell. - - It should work under any C implementation that uses an - actual procedure stack (as opposed to a linked list of - frames). There are some preprocessor constants that can - be defined when compiling for your specific system, for - improved efficiency; however, the defaults should be okay. - - The general concept of this implementation is to keep - track of all alloca()-allocated blocks, and reclaim any - that are found to be deeper in the stack than the current - invocation. This heuristic does not reclaim storage as - soon as it becomes invalid, but it will do so eventually. - - As a special case, alloca(0) reclaims storage without - allocating any. It is a good idea to use alloca(0) in - your main control loop, etc. to force garbage collection. -*/ -#ifndef lint -static char SCCSid[] = "@(#)alloca.c 1.1"; /* for the "what" utility */ -#endif - -#ifdef emacs -#include "config.h" -#ifdef static -/* actually, only want this if static is defined as "" - -- this is for usg, in which emacs must undefine static - in order to make unexec workable - */ -#ifndef STACK_DIRECTION -you -lose --- must know STACK_DIRECTION at compile-time -#endif /* STACK_DIRECTION undefined */ -#endif /* static */ -#endif /* emacs */ - -#ifndef alloca /* If compiling with GCC, this file's not needed. */ - -#ifdef __STDC__ -typedef void *pointer; /* generic pointer type */ -#else -typedef char *pointer; /* generic pointer type */ -#endif - -#define NULL 0 /* null pointer constant */ - -extern void free(); -extern pointer xmalloc(); - -/* - Define STACK_DIRECTION if you know the direction of stack - growth for your system; otherwise it will be automatically - deduced at run-time. - - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown -*/ - -#ifndef STACK_DIRECTION -#define STACK_DIRECTION 0 /* direction unknown */ -#endif - -#if STACK_DIRECTION != 0 - -#define STACK_DIR STACK_DIRECTION /* known at compile-time */ - -#else /* STACK_DIRECTION == 0; need run-time code */ - -static int stack_dir; /* 1 or -1 once known */ -#define STACK_DIR stack_dir - -static void -find_stack_direction (/* void */) -{ - static char *addr = NULL; /* address of first - `dummy', once known */ - auto char dummy; /* to get stack address */ - - if (addr == NULL) - { /* initial entry */ - addr = &dummy; - - find_stack_direction (); /* recurse once */ - } - else /* second entry */ - if (&dummy > addr) - stack_dir = 1; /* stack grew upward */ - else - stack_dir = -1; /* stack grew downward */ -} - -#endif /* STACK_DIRECTION == 0 */ - -/* - An "alloca header" is used to: - (a) chain together all alloca()ed blocks; - (b) keep track of stack depth. - - It is very important that sizeof(header) agree with malloc() - alignment chunk size. The following default should work okay. -*/ - -#ifndef ALIGN_SIZE -#define ALIGN_SIZE sizeof(double) -#endif - -typedef union hdr -{ - char align[ALIGN_SIZE]; /* to force sizeof(header) */ - struct - { - union hdr *next; /* for chaining headers */ - char *deep; /* for stack depth measure */ - } h; -} header; - -/* - alloca( size ) returns a pointer to at least `size' bytes of - storage which will be automatically reclaimed upon exit from - the procedure that called alloca(). Originally, this space - was supposed to be taken from the current stack frame of the - caller, but that method cannot be made to work for some - implementations of C, for example under Gould's UTX/32. -*/ - -static header *last_alloca_header = NULL; /* -> last alloca header */ - -pointer -alloca (size) /* returns pointer to storage */ - unsigned size; /* # bytes to allocate */ -{ - auto char probe; /* probes stack depth: */ - register char *depth = &probe; - -#if STACK_DIRECTION == 0 - if (STACK_DIR == 0) /* unknown growth direction */ - find_stack_direction (); -#endif - - /* Reclaim garbage, defined as all alloca()ed storage that - was allocated from deeper in the stack than currently. */ - - { - register header *hp; /* traverses linked list */ - - for (hp = last_alloca_header; hp != NULL;) - if ((STACK_DIR > 0 && hp->h.deep > depth) - || (STACK_DIR < 0 && hp->h.deep < depth)) - { - register header *np = hp->h.next; - - free ((pointer) hp); /* collect garbage */ - - hp = np; /* -> next header */ - } - else - break; /* rest are not deeper */ - - last_alloca_header = hp; /* -> last valid storage */ - } - - if (size == 0) - return NULL; /* no allocation required */ - - /* Allocate combined header + user data storage. */ - - { - register pointer new = xmalloc (sizeof (header) + size); - /* address of header */ - - ((header *)new)->h.next = last_alloca_header; - ((header *)new)->h.deep = depth; - - last_alloca_header = (header *)new; - - /* User storage begins just after header. */ - - return (pointer)((char *)new + sizeof(header)); - } -} - -#endif /* no alloca */ diff --git a/gnu/libregex/test/bsd-interf.c b/gnu/libregex/test/bsd-interf.c deleted file mode 100644 index 56f9e2a2fe68..000000000000 --- a/gnu/libregex/test/bsd-interf.c +++ /dev/null @@ -1,38 +0,0 @@ -/* bsd-interf.c: test BSD interface. */ - -#ifndef _POSIX_SOURCE /* whole file */ - -#include "test.h" - -void -test_berk_search (pattern, string) - const char *pattern; - char *string; -{ - const char *return_value = re_comp (pattern); - - if (return_value != 0) - { - printf ("This didn't compile: `%s'.\n", pattern); - printf (" The error message was: `%s'.\n", return_value); - } - else - if (test_should_match && re_exec (string) != strlen (string)) - { - printf ("Should have matched but didn't:\n"); - printf (" The pattern was: %s.\n", pattern); - if (string) - printf (" The string was: `%s'.'n", string); - else - printf (" The string was empty.\n"); - } -} - - -void -test_bsd_interface () -{ - test_berk_search ("a", "ab"); -} - -#endif /* _POSIX_SOURCE */ diff --git a/gnu/libregex/test/debugmalloc.c b/gnu/libregex/test/debugmalloc.c deleted file mode 100644 index 5c468e212439..000000000000 --- a/gnu/libregex/test/debugmalloc.c +++ /dev/null @@ -1,273 +0,0 @@ -/* debugmalloc.c: a malloc for debugging purposes. */ - -#include <stdio.h> -#include <assert.h> -#include <string.h> - -static unsigned trace = 0; -#define TRACE(s) if (trace) fprintf (stderr, "%s", s) -#define TRACE1(s, e1) if (trace) fprintf (stderr, s, e1) -#define TRACE2(s, e1, e2) if (trace) fprintf (stderr, s, e1, e2) -#define TRACE3(s, e1, e2, e3) if (trace) fprintf (stderr, s, e1, e2, e3) -#define TRACE4(s, e1, e2, e3, e4) \ - if (trace) fprintf (stderr, s, e1, e2, e3, e4) - -typedef char *address; - - -/* Wrap our calls to sbrk. */ - -address -xsbrk (incr) - int incr; -{ - extern char *sbrk (); - address ret = sbrk (incr); - - if (ret == (address) -1) - { - perror ("sbrk"); /* Actually, we should return NULL, not quit. */ - abort (); - } - - return ret; -} - - - -typedef struct chunk_struct -{ - /* This is the size (in bytes) that has actually been actually - allocated, not the size that the user requested. */ - unsigned alloc_size; - - /* This is the size the user requested. */ - unsigned user_size; - - /* Points to the next block in one of the lists. */ - struct chunk_struct *next; - - /* Now comes the user's memory. */ - address user_mem; - - /* After the user's memory is a constant. */ -} *chunk; - -#define MALLOC_OVERHEAD 16 - -/* We might play around with the `user_size' field, but the amount of - memory that is actually available in the chunk is always the size - allocated minus the overhead. */ -#define USER_ALLOC(c) ((c)->alloc_size - MALLOC_OVERHEAD) - -/* Given a pointer to a malloc-allocated block, the beginning of the - chunk should always be MALLOC_OVERHEAD - 4 bytes back, since the only - overhead after the user memory is the constant. */ - -chunk -mem_to_chunk (mem) - address mem; -{ - return (chunk) (mem - (MALLOC_OVERHEAD - 4)); -} - - -/* The other direction is even easier, since the user's memory starts at - the `user_mem' member in the chunk. */ - -address -chunk_to_mem (c) - chunk c; -{ - return (address) &(c->user_mem); -} - - - -/* We keep both all the allocated chunks and all the free chunks on - lists. Since we put the next pointers in the chunk structure, we - don't need a separate chunk_list structure. */ -chunk alloc_list = NULL, free_list = NULL; - - -/* We always append the new chunk at the beginning of the list. */ - -void -chunk_insert (chunk_list, new_c) - chunk *chunk_list; - chunk new_c; -{ - chunk c = *chunk_list; /* old beginning of list */ - - TRACE3 (" Inserting 0x%x at the beginning of 0x%x, before 0x%x.\n", - new_c, chunk_list, c); - - *chunk_list = new_c; - new_c->next = c; -} - - -/* Thus, removing an element means we have to search until we find it. - Have to delete before we insert, since insertion changes the next - pointer, which we need to put it on the other list. */ - -void -chunk_delete (chunk_list, dead_c) - chunk *chunk_list; - chunk dead_c; -{ - chunk c = *chunk_list; - chunk prev_c = NULL; - - TRACE2 (" Deleting 0x%x from 0x%x:", dead_c, chunk_list); - - while (c != dead_c && c != NULL) - { - TRACE1 (" 0x%x", c); - prev_c = c; - c = c->next; - } - - if (c == NULL) - { - fprintf (stderr, "Chunk at 0x%x not found on list.\n", dead_c); - abort (); - } - - if (prev_c == NULL) - { - TRACE1 (".\n Setting head to 0x%x.\n", c->next); - *chunk_list = c->next; - } - else - { - TRACE2 (".\n Linking next(0x%x) to 0x%x.\n", prev_c, c->next); - prev_c->next = c->next; - } -} - - -/* See if a list is hunky-dory. */ - -void -validate_list (chunk_list) - chunk *chunk_list; -{ - chunk c; - - TRACE1 (" Validating list at 0x%x:", chunk_list); - - for (c = *chunk_list; c != NULL; c = c->next) - { - assert (c->user_size < c->alloc_size); - assert (memcmp (chunk_to_mem (c) + c->user_size, "Karl", 4)); - TRACE2 (" 0x%x/%d", c, c->user_size); - } - - TRACE (".\n"); -} - - -/* See if we have a free chunk of a given size. We'll take the first - one that is big enough. */ - -chunk -free_list_available (needed) - unsigned needed; -{ - chunk c; - - TRACE1 (" Checking free list for %d bytes:", needed); - - if (free_list == NULL) - { - return NULL; - } - - c = free_list; - - while (c != NULL && USER_ALLOC (c) < needed) - { - TRACE2 (" 0x%x/%d", c, USER_ALLOC (c)); - c = c->next; - } - - TRACE1 ("\n Returning 0x%x.\n", c); - return c; -} - - - - -address -malloc (n) - unsigned n; -{ - address new_mem; - chunk c; - - TRACE1 ("Mallocing %d bytes.\n", n); - - validate_list (&free_list); - validate_list (&alloc_list); - - c = free_list_available (n); - - if (c == NULL) - { /* Nothing suitable on free list. Allocate a new chunk. */ - TRACE (" not on free list.\n"); - c = (chunk) xsbrk (n + MALLOC_OVERHEAD); - c->alloc_size = n + MALLOC_OVERHEAD; - } - else - { /* Found something on free list. Don't split it, just use as is. */ - TRACE (" found on free list.\n"); - chunk_delete (&free_list, c); - } - - /* If we took this from the free list, then the user size might be - different now, and consequently the constant at the end might be in - the wrong place. */ - c->user_size = n; - new_mem = chunk_to_mem (c); - memcpy (new_mem + n, "Karl", 4); - chunk_insert (&alloc_list, c); - - TRACE2 ("Malloc returning 0x%x (chunk 0x%x).\n", new_mem, c); - return new_mem; -} - - -address -realloc (mem, n) - address mem; - unsigned n; -{ - void free (); - chunk c = mem_to_chunk (mem); - address new_mem; - - TRACE3 ("Reallocing %d bytes at 0x%x (chunk 0x%x).\n", n, mem, c); - - new_mem = malloc (n); - memcpy (new_mem, mem, c->user_size); - free (mem); - - return new_mem; -} - - -void -free (mem) - address mem; -{ - chunk c = mem_to_chunk (mem); - - TRACE2 ("Freeing memory at 0x%x (chunk at 0x%x).\n", mem, c); - - validate_list (&free_list); - validate_list (&alloc_list); - - chunk_delete (&alloc_list, c); - chunk_insert (&free_list, c); -} diff --git a/gnu/libregex/test/emacsmalloc.c b/gnu/libregex/test/emacsmalloc.c deleted file mode 100644 index 6eee1fae1acb..000000000000 --- a/gnu/libregex/test/emacsmalloc.c +++ /dev/null @@ -1,844 +0,0 @@ -/* dynamic memory allocation for GNU. - Copyright (C) 1985, 1987 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -In other words, you are welcome to use, share and improve this program. -You are forbidden to forbid anyone else to use, share and improve -what you give them. Help stamp out software-hoarding! */ - - -/* - * @(#)nmalloc.c 1 (Caltech) 2/21/82 - * - * U of M Modified: 20 Jun 1983 ACT: strange hacks for Emacs - * - * Nov 1983, Mike@BRL, Added support for 4.1C/4.2 BSD. - * - * This is a very fast storage allocator. It allocates blocks of a small - * number of different sizes, and keeps free lists of each size. Blocks - * that don't exactly fit are passed up to the next larger size. In this - * implementation, the available sizes are (2^n)-4 (or -16) bytes long. - * This is designed for use in a program that uses vast quantities of - * memory, but bombs when it runs out. To make it a little better, it - * warns the user when he starts to get near the end. - * - * June 84, ACT: modified rcheck code to check the range given to malloc, - * rather than the range determined by the 2-power used. - * - * Jan 85, RMS: calls malloc_warning to issue warning on nearly full. - * No longer Emacs-specific; can serve as all-purpose malloc for GNU. - * You should call malloc_init to reinitialize after loading dumped Emacs. - * Call malloc_stats to get info on memory stats if MSTATS turned on. - * realloc knows how to return same block given, just changing its size, - * if the power of 2 is correct. - */ - -/* - * nextf[i] is the pointer to the next free block of size 2^(i+3). The - * smallest allocatable block is 8 bytes. The overhead information will - * go in the first int of the block, and the returned pointer will point - * to the second. - * -#ifdef MSTATS - * nmalloc[i] is the difference between the number of mallocs and frees - * for a given block size. -#endif MSTATS - */ - -#ifdef emacs -/* config.h specifies which kind of system this is. */ -#include "config.h" -#include <signal.h> -#else - -/* Determine which kind of system this is. */ -#include <sys/types.h> -#include <signal.h> - -#include <string.h> -#define bcopy(s,d,n) memcpy ((d), (s), (n)) -#define bcmp(s1,s2,n) memcmp ((s1), (s2), (n)) -#define bzero(s,n) memset ((s), 0, (n)) - -#ifndef SIGTSTP -#ifndef VMS -#ifndef USG -#define USG -#endif -#endif /* not VMS */ -#else /* SIGTSTP */ -#ifdef SIGIO -#define BSD4_2 -#endif /* SIGIO */ -#endif /* SIGTSTP */ - -#endif /* not emacs */ - -/* Define getpagesize () if the system does not. */ -#include "getpagesize.h" - -#ifdef BSD -#ifdef BSD4_1 -#include <sys/vlimit.h> /* warn the user when near the end */ -#else /* if 4.2 or newer */ -#include <sys/time.h> -#include <sys/resource.h> -#endif /* if 4.2 or newer */ -#endif - -#ifdef VMS -#include "vlimit.h" -#endif - -extern char *start_of_data (); - -#ifdef BSD -#ifndef DATA_SEG_BITS -#define start_of_data() &etext -#endif -#endif - -#ifndef emacs -#define start_of_data() &etext -#endif - -#define ISALLOC ((char) 0xf7) /* magic byte that implies allocation */ -#define ISFREE ((char) 0x54) /* magic byte that implies free block */ - /* this is for error checking only */ -#define ISMEMALIGN ((char) 0xd6) /* Stored before the value returned by - memalign, with the rest of the word - being the distance to the true - beginning of the block. */ - -extern char etext; - -/* These two are for user programs to look at, when they are interested. */ - -unsigned int malloc_sbrk_used; /* amount of data space used now */ -unsigned int malloc_sbrk_unused; /* amount more we can have */ - -/* start of data space; can be changed by calling init_malloc */ -static char *data_space_start; - -#ifdef MSTATS -static int nmalloc[30]; -static int nmal, nfre; -#endif /* MSTATS */ - -/* If range checking is not turned on, all we have is a flag indicating - whether memory is allocated, an index in nextf[], and a size field; to - realloc() memory we copy either size bytes or 1<<(index+3) bytes depending - on whether the former can hold the exact size (given the value of - 'index'). If range checking is on, we always need to know how much space - is allocated, so the 'size' field is never used. */ - -struct mhead { - char mh_alloc; /* ISALLOC or ISFREE */ - char mh_index; /* index in nextf[] */ -/* Remainder are valid only when block is allocated */ - unsigned short mh_size; /* size, if < 0x10000 */ -#ifdef rcheck - unsigned mh_nbytes; /* number of bytes allocated */ - int mh_magic4; /* should be == MAGIC4 */ -#endif /* rcheck */ -}; - -/* Access free-list pointer of a block. - It is stored at block + 4. - This is not a field in the mhead structure - because we want sizeof (struct mhead) - to describe the overhead for when the block is in use, - and we do not want the free-list pointer to count in that. */ - -#define CHAIN(a) \ - (*(struct mhead **) (sizeof (char *) + (char *) (a))) - -#ifdef rcheck - -/* To implement range checking, we write magic values in at the beginning and - end of each allocated block, and make sure they are undisturbed whenever a - free or a realloc occurs. */ -/* Written in each of the 4 bytes following the block's real space */ -#define MAGIC1 0x55 -/* Written in the 4 bytes before the block's real space */ -#define MAGIC4 0x55555555 -#define ASSERT(p) if (!(p)) botch("p"); else -#define EXTRA 4 /* 4 bytes extra for MAGIC1s */ -#else -#define ASSERT(p) if (!(p)) abort (); else -#define EXTRA 0 -#endif /* rcheck */ - - -/* nextf[i] is free list of blocks of size 2**(i + 3) */ - -static struct mhead *nextf[30]; - -/* busy[i] is nonzero while allocation of block size i is in progress. */ - -static char busy[30]; - -/* Number of bytes of writable memory we can expect to be able to get */ -static unsigned int lim_data; - -/* Level number of warnings already issued. - 0 -- no warnings issued. - 1 -- 75% warning already issued. - 2 -- 85% warning already issued. -*/ -static int warnlevel; - -/* Function to call to issue a warning; - 0 means don't issue them. */ -static void (*warnfunction) (); - -/* nonzero once initial bunch of free blocks made */ -static int gotpool; - -char *_malloc_base; - -static void getpool (); - -/* Cause reinitialization based on job parameters; - also declare where the end of pure storage is. */ -void -malloc_init (start, warnfun) - char *start; - void (*warnfun) (); -{ - if (start) - data_space_start = start; - lim_data = 0; - warnlevel = 0; - warnfunction = warnfun; -} - -/* Return the maximum size to which MEM can be realloc'd - without actually requiring copying. */ - -int -malloc_usable_size (mem) - char *mem; -{ - struct mhead *p - = (struct mhead *) (mem - ((sizeof (struct mhead) + 7) & ~7)); - int blocksize = 8 << p->mh_index; - - return blocksize - sizeof (struct mhead) - EXTRA; -} - -static void -morecore (nu) /* ask system for more memory */ - register int nu; /* size index to get more of */ -{ - char *sbrk (); - register char *cp; - register int nblks; - register unsigned int siz; - int oldmask; - -#ifdef BSD -#ifndef BSD4_1 - int newmask = -1; - /* Blocking these signals interferes with debugging, at least on BSD on - the HP 9000/300. */ -#ifdef SIGTRAP - newmask &= ~(1 << SIGTRAP); -#endif -#ifdef SIGILL - newmask &= ~(1 << SIGILL); -#endif -#ifdef SIGTSTP - newmask &= ~(1 << SIGTSTP); -#endif -#ifdef SIGSTOP - newmask &= ~(1 << SIGSTOP); -#endif - oldmask = sigsetmask (newmask); -#endif -#endif - - if (!data_space_start) - { - data_space_start = start_of_data (); - } - - if (lim_data == 0) - get_lim_data (); - - /* On initial startup, get two blocks of each size up to 1k bytes */ - if (!gotpool) - { getpool (); getpool (); gotpool = 1; } - - /* Find current end of memory and issue warning if getting near max */ - -#ifndef VMS - /* Maximum virtual memory on VMS is difficult to calculate since it - * depends on several dynmacially changing things. Also, alignment - * isn't that important. That is why much of the code here is ifdef'ed - * out for VMS systems. - */ - cp = sbrk (0); - siz = cp - data_space_start; - - if (warnfunction) - switch (warnlevel) - { - case 0: - if (siz > (lim_data / 4) * 3) - { - warnlevel++; - (*warnfunction) ("Warning: past 75% of memory limit"); - } - break; - case 1: - if (siz > (lim_data / 20) * 17) - { - warnlevel++; - (*warnfunction) ("Warning: past 85% of memory limit"); - } - break; - case 2: - if (siz > (lim_data / 20) * 19) - { - warnlevel++; - (*warnfunction) ("Warning: past 95% of memory limit"); - } - break; - } - - if ((int) cp & 0x3ff) /* land on 1K boundaries */ - sbrk (1024 - ((int) cp & 0x3ff)); -#endif /* not VMS */ - - /* Take at least 2k, and figure out how many blocks of the desired size - we're about to get */ - nblks = 1; - if ((siz = nu) < 8) - nblks = 1 << ((siz = 8) - nu); - - if ((cp = sbrk (1 << (siz + 3))) == (char *) -1) - { -#ifdef BSD -#ifndef BSD4_1 - sigsetmask (oldmask); -#endif -#endif - return; /* no more room! */ - } - malloc_sbrk_used = siz; - malloc_sbrk_unused = lim_data - siz; - -#ifndef VMS - if ((int) cp & 7) - { /* shouldn't happen, but just in case */ - cp = (char *) (((int) cp + 8) & ~7); - nblks--; - } -#endif /* not VMS */ - - /* save new header and link the nblks blocks together */ - nextf[nu] = (struct mhead *) cp; - siz = 1 << (nu + 3); - while (1) - { - ((struct mhead *) cp) -> mh_alloc = ISFREE; - ((struct mhead *) cp) -> mh_index = nu; - if (--nblks <= 0) break; - CHAIN ((struct mhead *) cp) = (struct mhead *) (cp + siz); - cp += siz; - } - CHAIN ((struct mhead *) cp) = 0; - -#ifdef BSD -#ifndef BSD4_1 - sigsetmask (oldmask); -#endif -#endif -} - -static void -getpool () -{ - register int nu; - char * sbrk (); - register char *cp = sbrk (0); - - if ((int) cp & 0x3ff) /* land on 1K boundaries */ - sbrk (1024 - ((int) cp & 0x3ff)); - - /* Record address of start of space allocated by malloc. */ - if (_malloc_base == 0) - _malloc_base = cp; - - /* Get 2k of storage */ - - cp = sbrk (04000); - if (cp == (char *) -1) - return; - - /* Divide it into an initial 8-word block - plus one block of size 2**nu for nu = 3 ... 10. */ - - CHAIN (cp) = nextf[0]; - nextf[0] = (struct mhead *) cp; - ((struct mhead *) cp) -> mh_alloc = ISFREE; - ((struct mhead *) cp) -> mh_index = 0; - cp += 8; - - for (nu = 0; nu < 7; nu++) - { - CHAIN (cp) = nextf[nu]; - nextf[nu] = (struct mhead *) cp; - ((struct mhead *) cp) -> mh_alloc = ISFREE; - ((struct mhead *) cp) -> mh_index = nu; - cp += 8 << nu; - } -} - -char * -malloc (n) /* get a block */ - unsigned n; -{ - register struct mhead *p; - register unsigned int nbytes; - register int nunits = 0; - - /* Figure out how many bytes are required, rounding up to the nearest - multiple of 8, then figure out which nestf[] area to use. - Both the beginning of the header and the beginning of the - block should be on an eight byte boundary. */ - nbytes = (n + ((sizeof *p + 7) & ~7) + EXTRA + 7) & ~7; - { - register unsigned int shiftr = (nbytes - 1) >> 2; - - while (shiftr >>= 1) - nunits++; - } - - /* In case this is reentrant use of malloc from signal handler, - pick a block size that no other malloc level is currently - trying to allocate. That's the easiest harmless way not to - interfere with the other level of execution. */ - while (busy[nunits]) nunits++; - busy[nunits] = 1; - - /* If there are no blocks of the appropriate size, go get some */ - /* COULD SPLIT UP A LARGER BLOCK HERE ... ACT */ - if (nextf[nunits] == 0) - morecore (nunits); - - /* Get one block off the list, and set the new list head */ - if ((p = nextf[nunits]) == 0) - { - busy[nunits] = 0; - return 0; - } - nextf[nunits] = CHAIN (p); - busy[nunits] = 0; - - /* Check for free block clobbered */ - /* If not for this check, we would gobble a clobbered free chain ptr */ - /* and bomb out on the NEXT allocate of this size block */ - if (p -> mh_alloc != ISFREE || p -> mh_index != nunits) -#ifdef rcheck - botch ("block on free list clobbered"); -#else /* not rcheck */ - abort (); -#endif /* not rcheck */ - - /* Fill in the info, and if range checking, set up the magic numbers */ - p -> mh_alloc = ISALLOC; -#ifdef rcheck - p -> mh_nbytes = n; - p -> mh_magic4 = MAGIC4; - { - /* Get the location n after the beginning of the user's space. */ - register char *m = (char *) p + ((sizeof *p + 7) & ~7) + n; - - *m++ = MAGIC1, *m++ = MAGIC1, *m++ = MAGIC1, *m = MAGIC1; - } -#else /* not rcheck */ - p -> mh_size = n; -#endif /* not rcheck */ -#ifdef MSTATS - nmalloc[nunits]++; - nmal++; -#endif /* MSTATS */ - return (char *) p + ((sizeof *p + 7) & ~7); -} - -free (mem) - char *mem; -{ - register struct mhead *p; - { - register char *ap = mem; - - if (ap == 0) - return; - - p = (struct mhead *) (ap - ((sizeof *p + 7) & ~7)); - if (p -> mh_alloc == ISMEMALIGN) - { - ap -= p->mh_size; - p = (struct mhead *) (ap - ((sizeof *p + 7) & ~7)); - } - -#ifndef rcheck - if (p -> mh_alloc != ISALLOC) - abort (); - -#else rcheck - if (p -> mh_alloc != ISALLOC) - { - if (p -> mh_alloc == ISFREE) - botch ("free: Called with already freed block argument\n"); - else - botch ("free: Called with bad argument\n"); - } - - ASSERT (p -> mh_magic4 == MAGIC4); - ap += p -> mh_nbytes; - ASSERT (*ap++ == MAGIC1); ASSERT (*ap++ == MAGIC1); - ASSERT (*ap++ == MAGIC1); ASSERT (*ap == MAGIC1); -#endif /* rcheck */ - } - { - register int nunits = p -> mh_index; - - ASSERT (nunits <= 29); - p -> mh_alloc = ISFREE; - - /* Protect against signal handlers calling malloc. */ - busy[nunits] = 1; - /* Put this block on the free list. */ - CHAIN (p) = nextf[nunits]; - nextf[nunits] = p; - busy[nunits] = 0; - -#ifdef MSTATS - nmalloc[nunits]--; - nfre++; -#endif /* MSTATS */ - } -} - -char * -realloc (mem, n) - char *mem; - register unsigned n; -{ - register struct mhead *p; - register unsigned int tocopy; - register unsigned int nbytes; - register int nunits; - - if (mem == 0) - return malloc (n); - p = (struct mhead *) (mem - ((sizeof *p + 7) & ~7)); - nunits = p -> mh_index; - ASSERT (p -> mh_alloc == ISALLOC); -#ifdef rcheck - ASSERT (p -> mh_magic4 == MAGIC4); - { - register char *m = mem + (tocopy = p -> mh_nbytes); - ASSERT (*m++ == MAGIC1); ASSERT (*m++ == MAGIC1); - ASSERT (*m++ == MAGIC1); ASSERT (*m == MAGIC1); - } -#else /* not rcheck */ - if (p -> mh_index >= 13) - tocopy = (1 << (p -> mh_index + 3)) - ((sizeof *p + 7) & ~7); - else - tocopy = p -> mh_size; -#endif /* not rcheck */ - - /* See if desired size rounds to same power of 2 as actual size. */ - nbytes = (n + ((sizeof *p + 7) & ~7) + EXTRA + 7) & ~7; - - /* If ok, use the same block, just marking its size as changed. */ - if (nbytes > (4 << nunits) && nbytes <= (8 << nunits)) - { -#ifdef rcheck - register char *m = mem + tocopy; - *m++ = 0; *m++ = 0; *m++ = 0; *m++ = 0; - p-> mh_nbytes = n; - m = mem + n; - *m++ = MAGIC1; *m++ = MAGIC1; *m++ = MAGIC1; *m++ = MAGIC1; -#else /* not rcheck */ - p -> mh_size = n; -#endif /* not rcheck */ - return mem; - } - - if (n < tocopy) - tocopy = n; - { - register char *new; - - if ((new = malloc (n)) == 0) - return 0; - bcopy (mem, new, tocopy); - free (mem); - return new; - } -} - -/* This is in case something linked with Emacs calls calloc. */ - -char * -calloc (num, size) - unsigned num, size; -{ - register char *mem; - - num *= size; - mem = malloc (num); - if (mem != 0) - bzero (mem, num); - return mem; -} - -#ifndef VMS - -char * -memalign (alignment, size) - unsigned alignment, size; -{ - register char *ptr = malloc (size + alignment); - register char *aligned; - register struct mhead *p; - - if (ptr == 0) - return 0; - /* If entire block has the desired alignment, just accept it. */ - if (((int) ptr & (alignment - 1)) == 0) - return ptr; - /* Otherwise, get address of byte in the block that has that alignment. */ - aligned = (char *) (((int) ptr + alignment - 1) & -alignment); - - /* Store a suitable indication of how to free the block, - so that free can find the true beginning of it. */ - p = (struct mhead *) (aligned - ((7 + sizeof (struct mhead)) & ~7)); - p -> mh_size = aligned - ptr; - p -> mh_alloc = ISMEMALIGN; - return aligned; -} - -#ifndef HPUX -/* This runs into trouble with getpagesize on HPUX. - Patching out seems cleaner than the ugly fix needed. */ -char * -valloc (size) -{ - return memalign (getpagesize (), size); -} -#endif /* not HPUX */ -#endif /* not VMS */ - -#ifdef MSTATS -/* Return statistics describing allocation of blocks of size 2**n. */ - -struct mstats_value - { - int blocksize; - int nfree; - int nused; - }; - -struct mstats_value -malloc_stats (size) - int size; -{ - struct mstats_value v; - register int i; - register struct mhead *p; - - v.nfree = 0; - - if (size < 0 || size >= 30) - { - v.blocksize = 0; - v.nused = 0; - return v; - } - - v.blocksize = 1 << (size + 3); - v.nused = nmalloc[size]; - - for (p = nextf[size]; p; p = CHAIN (p)) - v.nfree++; - - return v; -} -int -malloc_mem_used () -{ - int i; - int size_used; - - size_used = 0; - - for (i = 0; i < 30; i++) - { - int allocation_size = 1 << (i + 3); - struct mhead *p; - - size_used += nmalloc[i] * allocation_size; - } - - return size_used; -} - -int -malloc_mem_free () -{ - int i; - int size_unused; - - size_unused = 0; - - for (i = 0; i < 30; i++) - { - int allocation_size = 1 << (i + 3); - struct mhead *p; - - for (p = nextf[i]; p ; p = CHAIN (p)) - size_unused += allocation_size; - } - - return size_unused; -} -#endif /* MSTATS */ - -/* - * This function returns the total number of bytes that the process - * will be allowed to allocate via the sbrk(2) system call. On - * BSD systems this is the total space allocatable to stack and - * data. On USG systems this is the data space only. - */ - -#ifdef USG - -get_lim_data () -{ - extern long ulimit (); - -#ifdef ULIMIT_BREAK_VALUE - lim_data = ULIMIT_BREAK_VALUE; -#else - lim_data = ulimit (3, 0); -#endif - - lim_data -= (long) data_space_start; -} - -#else /* not USG */ -#if defined (BSD4_1) || defined (VMS) - -get_lim_data () -{ - lim_data = vlimit (LIM_DATA, -1); -} - -#else /* not BSD4_1 and not VMS */ - -get_lim_data () -{ - struct rlimit XXrlimit; - - getrlimit (RLIMIT_DATA, &XXrlimit); -#ifdef RLIM_INFINITY - lim_data = XXrlimit.rlim_cur & RLIM_INFINITY; /* soft limit */ -#else - lim_data = XXrlimit.rlim_cur; /* soft limit */ -#endif -} - -#endif /* not BSD4_1 and not VMS */ -#endif /* not USG */ - -#ifdef VMS -/* There is a problem when dumping and restoring things on VMS. Calls - * to SBRK don't necessarily result in contiguous allocation. Dumping - * doesn't work when it isn't. Therefore, we make the initial - * allocation contiguous by allocating a big chunk, and do SBRKs from - * there. Once Emacs has dumped there is no reason to continue - * contiguous allocation, malloc doesn't depend on it. - * - * There is a further problem of using brk and sbrk while using VMS C - * run time library routines malloc, calloc, etc. The documentation - * says that this is a no-no, although I'm not sure why this would be - * a problem. In any case, we remove the necessity to call brk and - * sbrk, by calling calloc (to assure zero filled data) rather than - * sbrk. - * - * VMS_ALLOCATION_SIZE is the size of the allocation array. This - * should be larger than the malloc size before dumping. Making this - * too large will result in the startup procedure slowing down since - * it will require more space and time to map it in. - * - * The value for VMS_ALLOCATION_SIZE in the following define was determined - * by running emacs linked (and a large allocation) with the debugger and - * looking to see how much storage was used. The allocation was 201 pages, - * so I rounded it up to a power of two. - */ -#ifndef VMS_ALLOCATION_SIZE -#define VMS_ALLOCATION_SIZE (512*256) -#endif - -/* Use VMS RTL definitions */ -#undef sbrk -#undef brk -#undef malloc -int vms_out_initial = 0; -char vms_initial_buffer[VMS_ALLOCATION_SIZE]; -static char *vms_current_brk = &vms_initial_buffer; -static char *vms_end_brk = &vms_initial_buffer[VMS_ALLOCATION_SIZE-1]; - -#include <stdio.h> - -char * -sys_sbrk (incr) - int incr; -{ - char *sbrk(), *temp, *ptr; - - if (vms_out_initial) - { - /* out of initial allocation... */ - if (!(temp = malloc (incr))) - temp = (char *) -1; - } - else - { - /* otherwise, go out of our area */ - ptr = vms_current_brk + incr; /* new current_brk */ - if (ptr <= vms_end_brk) - { - temp = vms_current_brk; - vms_current_brk = ptr; - } - else - { - vms_out_initial = 1; /* mark as out of initial allocation */ - if (!(temp = malloc (incr))) - temp = (char *) -1; - } - } - return temp; -} -#endif /* VMS */ diff --git a/gnu/libregex/test/fileregex.c b/gnu/libregex/test/fileregex.c deleted file mode 100644 index 2c27a0f5ddcd..000000000000 --- a/gnu/libregex/test/fileregex.c +++ /dev/null @@ -1,77 +0,0 @@ -#include <sys/types.h> -#include <stdio.h> -#include "regex.h" - -#define BYTEWIDTH 8 - -/* Sorry, but this is just a test program. */ -#define LINE_MAX 500 - -int -main (argc, argv) - int argc; - char *argv[]; -{ - FILE *f; - char *filename; - char pat[500]; /* Sorry for that maximum size, too. */ - char line[LINE_MAX]; - struct re_pattern_buffer buf; - char fastmap[(1 << BYTEWIDTH)]; - const char *compile_ret; - unsigned lineno = 1; - unsigned nfound = 0; - - /* Actually, it might be useful to allow the data file to be standard - input, and to specify the pattern on the command line. */ - if (argc != 2) - { - fprintf (stderr, "Usage: %s <filename>.\n", argv[0]); - exit (1); - } - - filename = argv[1]; - f = fopen (filename, "r"); - if (f == NULL) - perror (filename); - - buf.allocated = 0; - buf.buffer = NULL; - buf.fastmap = fastmap; - - printf ("Pattern = ", pat); - gets (pat); - - if (feof (stdin)) - { - putchar ('\n'); - exit (0); - } - - compile_ret = re_compile_pattern (pat, strlen (pat), &buf); - if (compile_ret != NULL) - { - fprintf (stderr, "%s: %s\n", pat, compile_ret); - exit (1); - } - - while (fgets (line, LINE_MAX, f) != NULL) - { - size_t len = strlen (line); - struct re_registers regs; - int search_ret - = re_search_2 (&buf, NULL, 0, line, len, 0, len, ®s, len); - - if (search_ret == -2) - { - fprintf (stderr, "%s:%d: re_search failed.\n", filename, lineno); - exit (1); - } - - nfound += search_ret != -1; - lineno++; - } - - printf ("Matches found: %u (out of %u lines).\n", nfound, lineno - 1); - return 0; -} diff --git a/gnu/libregex/test/g++malloc.c b/gnu/libregex/test/g++malloc.c deleted file mode 100644 index d55ce45643d3..000000000000 --- a/gnu/libregex/test/g++malloc.c +++ /dev/null @@ -1,1288 +0,0 @@ -#define inline - -/* -Copyright (C) 1989 Free Software Foundation - written by Doug Lea (dl@oswego.edu) - -This file is part of GNU CC. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY. No author or distributor -accepts responsibility to anyone for the consequences of using it -or for whether it serves any particular purpose or works at all, -unless he says so in writing. Refer to the GNU CC General Public -License for full details. - -Everyone is granted permission to copy, modify and redistribute -GNU CC, but only under the conditions described in the -GNU CC General Public License. A copy of this license is -supposed to have been given to you along with GNU CC so you -can know your rights and responsibilities. It should be in a -file named COPYING. Among other things, the copyright notice -and this notice must be preserved on all copies. -*/ - - - -#ifndef NO_LIBGXX_MALLOC /* ignore whole file otherwise */ - -/* compile with -DMALLOC_STATS to collect statistics */ -/* collecting statistics slows down malloc by at least 15% */ - -#ifdef MALLOC_STATS -#define UPDATE_STATS(ARGS) {ARGS;} -#else -#define UPDATE_STATS(ARGS) -#endif - -/* History - - - Tue Jan 16 04:54:27 1990 Doug Lea (dl at g.oswego.edu) - - version 1 released in libg++ - - Sun Jan 21 05:52:47 1990 Doug Lea (dl at g.oswego.edu) - - bins are now own struct for, sanity. - - new victim search strategy: scan up and consolidate. - Both faster and less fragmentation. - - refined when to scan bins for consolidation, via consollink, etc. - - realloc: always try to expand chunk, avoiding some fragmentation. - - changed a few inlines into macros - - hardwired SBRK_UNIT to 4096 for uniformity across systems - - Tue Mar 20 14:18:23 1990 Doug Lea (dl at g.oswego.edu) - - calloc and cfree now correctly parameterized. - - Sun Apr 1 10:00:48 1990 Doug Lea (dl at g.oswego.edu) - - added memalign and valloc. - - Sun Jun 24 05:46:48 1990 Doug Lea (dl at g.oswego.edu) - - #include gepagesize.h only ifndef sun - cache pagesize after first call - - Wed Jul 25 08:35:19 1990 Doug Lea (dl at g.oswego.edu) - - No longer rely on a `designated victim': - - 1. It sometimes caused splits of large chunks - when smaller ones would do, leading to - bad worst-case fragmentation. - - 2. Scanning through the av array fast anyway, - so the overhead isn't worth it. - - To compensate, several other minor changes: - - 1. Unusable chunks are checked for consolidation during - searches inside bins, better distributing chunks - across bins. - - 2. Chunks are returned when found in malloc_find_space, - rather than finishing cleaning everything up, to - avoid wasted iterations due to (1). -*/ - -/* - A version of malloc/free/realloc tuned for C++ applications. - - Here's what you probably want to know first: - - In various tests, this appears to be about as fast as, - and usually substantially less memory-wasteful than BSD/GNUemacs malloc. - - Generally, it is slower (by perhaps 20%) than bsd-style malloc - only when bsd malloc would waste a great deal of space in - fragmented blocks, which this malloc recovers; or when, by - chance or design, nearly all requests are near the bsd malloc - power-of-2 allocation bin boundaries, and as many chunks are - used as are allocated. - - It uses more space than bsd malloc only when, again by chance - or design, only bsdmalloc bin-sized requests are malloced, or when - little dynamic space is malloced, since this malloc may grab larger - chunks from the system at a time than bsd. - - In other words, this malloc seems generally superior to bsd - except perhaps for programs that are specially tuned to - deal with bsdmalloc's characteristics. But even here, the - performance differences are slight. - - - This malloc, like any other, is a compromised design. - - - Chunks of memory are maintained using a `boundary tag' method as - described in e.g., Knuth or Standish. This means that the size of - the chunk is stored both in the front of the chunk and at the end. - This makes consolidating fragmented chunks into bigger chunks very fast. - The size field is also used to hold bits representing whether a - chunk is free or in use. - - Malloced chunks have space overhead of 8 bytes: The preceding - and trailing size fields. When they are freed, the list pointer - fields are also needed. - - Available chunks are kept in doubly linked lists. The lists are - maintained in an array of bins using a power-of-two method, except - that instead of 32 bins (one for each 1 << i), there are 128: each - power of two is split in quarters. The use of very fine bin sizes - closely approximates the use of one bin per actually used size, - without necessitating the overhead of locating such bins. It is - especially desirable in common C++ applications where large numbers - of identically-sized blocks are malloced/freed in some dynamic - manner, and then later are all freed. The finer bin sizes make - finding blocks fast, with little wasted overallocation. The - consolidation methods ensure that once the collection of blocks is - no longer useful, fragments are gathered into bigger chunks awaiting new - roles. - - The bins av[i] serve as heads of the lists. Bins contain a dummy - header for the chunk lists, and a `dirty' field used to indicate - whether the list may need to be scanned for consolidation. - - On allocation, the bin corresponding to the request size is - scanned, and if there is a chunk with size >= requested, it - is split, if too big, and used. Chunks on the list which are - too small are examined for consolidation during this traversal. - - If no chunk exists in the list bigger bins are scanned in search of - a victim. - - If no victim can be found, then smaller bins are examined for - consolidation in order to construct a victim. - - Finally, if consolidation fails to come up with a usable chunk, - more space is obtained from the system. - - After a split, the remainder is placed on - the back of the appropriate bin list. (All freed chunks are placed - on fronts of lists. All remaindered or consolidated chunks are - placed on the rear. Correspondingly, searching within a bin - starts at the front, but finding victims is from the back. All - of this approximates the effect of having 2 kinds of lists per - bin: returned chunks vs unallocated chunks, but without the overhead - of maintaining 2 lists.) - - Deallocation (free) consists only of placing the chunk on - a list. - - Reallocation proceeds in the usual way. If a chunk can be extended, - it is, else a malloc-copy-free sequence is taken. - - memalign requests more than enough space from malloc, finds a - spot within that chunk that meets the alignment request, and - then possibly frees the leading and trailing space. Overreliance - on memalign is a sure way to fragment space. - - - Some other implementation matters: - - 8 byte alignment is currently hardwired into the design. Calling - memalign will return a chunk that is both 8-byte aligned, and - meets the requested alignment. - - The basic overhead of a used chunk is 8 bytes: 4 at the front and - 4 at the end. - - When a chunk is free, 8 additional bytes are needed for free list - pointers. Thus, the minimum allocatable size is 16 bytes. - - The existence of front and back overhead permits some reasonably - effective fence-bashing checks: The front and back fields must - be identical. This is checked only within free() and realloc(). - The checks are fast enough to be made non-optional. - - The overwriting of parts of freed memory with the freelist pointers - can also be very effective (albeit in an annoying way) in helping - users track down dangling pointers. - - User overwriting of freed space will often result in crashes - within malloc or free. - - These routines are also tuned to C++ in that free(0) is a noop and - a failed malloc automatically calls (*new_handler)(). - - malloc(0) returns a pointer to something of the minimum allocatable size. - - Additional memory is gathered from the system (via sbrk) in a - way that allows chunks obtained across different sbrk calls to - be consolidated, but does not require contiguous memory: Thus, - it should be safe to intersperse mallocs with other sbrk calls. - - This malloc is NOT designed to work in multiprocessing applications. - No semaphores or other concurrency control are provided to ensure - that multiple malloc or free calls don't run at the same time, - which could be disasterous. - - VERY heavy use of inlines is made, for clarity. If this malloc - is ported via a compiler without inlining capabilities, all - inlines should be transformed into macros -- making them non-inline - makes malloc at least twice as slow. - - -*/ - - -/* preliminaries */ - -#ifdef __cplusplus -#include <stdio.h> -#else -#include "//usr/include/stdio.h" /* needed for error reporting */ -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef USG -extern void* memset(void*, int, int); -extern void* memcpy(void*, const void*, int); -/*inline void bzero(void* s, int l) { memset(s, 0, l); }*/ -#else -/*extern void bzero(void*, unsigned int);*/ -#endif - -/*extern void bcopy(void*, void*, unsigned int);*/ - -extern void* sbrk(unsigned int); - -/* Put this in instead of commmented out stuff above. */ -#define bcopy(s,d,n) memcpy((d),(s),(n)) -#define bcmp(s1,s2,n) memcmp((s1),(s2),(n)) -#define bzero(s,n) memset((s),0,(n)) - - -#ifdef __GNUC__ -extern volatile void abort(); -#else -extern void abort(); -#endif - -#ifdef __cplusplus -}; /* end of extern "C" */ -#endif - - -/* A good multiple to call sbrk with */ - -#define SBRK_UNIT 4096 - - - -/* how to die on detected error */ - -#ifdef __GNUC__ -static volatile void malloc_user_error() -#else -static void malloc_user_error() -#endif -{ - fputs("malloc/free/realloc: clobbered space detected\n", stderr); abort(); -} - - - -/* Basic overhead for each malloc'ed chunk */ - - -struct malloc_chunk -{ - unsigned int size; /* Size in bytes, including overhead. */ - /* Or'ed with INUSE if in use. */ - - struct malloc_chunk* fd; /* double links -- used only if free. */ - struct malloc_chunk* bk; - -}; - -typedef struct malloc_chunk* mchunkptr; - -struct malloc_bin -{ - struct malloc_chunk hd; /* dummy list header */ - unsigned int dirty; /* True if maybe consolidatable */ - /* Wasting a word here makes */ - /* sizeof(bin) a power of 2, */ - /* which makes size2bin() faster */ -}; - -typedef struct malloc_bin* mbinptr; - - -/* sizes, alignments */ - - -#define SIZE_SZ (sizeof(unsigned int)) -#define MALLOC_MIN_OVERHEAD (SIZE_SZ + SIZE_SZ) -#define MALLOC_ALIGN_MASK (MALLOC_MIN_OVERHEAD - 1) - -#define MINSIZE (sizeof(struct malloc_chunk) + SIZE_SZ) /* MUST == 16! */ - - -/* pad request bytes into a usable size */ - -static inline unsigned int request2size(unsigned int request) -{ - return (request == 0) ? MINSIZE : - ((request + MALLOC_MIN_OVERHEAD + MALLOC_ALIGN_MASK) - & ~(MALLOC_ALIGN_MASK)); -} - - -static inline int aligned_OK(void* m) -{ - return ((unsigned int)(m) & (MALLOC_ALIGN_MASK)) == 0; -} - - -/* size field or'd with INUSE when in use */ -#define INUSE 0x1 - - - -/* the bins, initialized to have null double linked lists */ - -#define MAXBIN 120 /* 1 more than needed for 32 bit addresses */ - -#define FIRSTBIN (&(av[0])) - -static struct malloc_bin av[MAXBIN] = -{ - { { 0, &(av[0].hd), &(av[0].hd) }, 0 }, - { { 0, &(av[1].hd), &(av[1].hd) }, 0 }, - { { 0, &(av[2].hd), &(av[2].hd) }, 0 }, - { { 0, &(av[3].hd), &(av[3].hd) }, 0 }, - { { 0, &(av[4].hd), &(av[4].hd) }, 0 }, - { { 0, &(av[5].hd), &(av[5].hd) }, 0 }, - { { 0, &(av[6].hd), &(av[6].hd) }, 0 }, - { { 0, &(av[7].hd), &(av[7].hd) }, 0 }, - { { 0, &(av[8].hd), &(av[8].hd) }, 0 }, - { { 0, &(av[9].hd), &(av[9].hd) }, 0 }, - - { { 0, &(av[10].hd), &(av[10].hd) }, 0 }, - { { 0, &(av[11].hd), &(av[11].hd) }, 0 }, - { { 0, &(av[12].hd), &(av[12].hd) }, 0 }, - { { 0, &(av[13].hd), &(av[13].hd) }, 0 }, - { { 0, &(av[14].hd), &(av[14].hd) }, 0 }, - { { 0, &(av[15].hd), &(av[15].hd) }, 0 }, - { { 0, &(av[16].hd), &(av[16].hd) }, 0 }, - { { 0, &(av[17].hd), &(av[17].hd) }, 0 }, - { { 0, &(av[18].hd), &(av[18].hd) }, 0 }, - { { 0, &(av[19].hd), &(av[19].hd) }, 0 }, - - { { 0, &(av[20].hd), &(av[20].hd) }, 0 }, - { { 0, &(av[21].hd), &(av[21].hd) }, 0 }, - { { 0, &(av[22].hd), &(av[22].hd) }, 0 }, - { { 0, &(av[23].hd), &(av[23].hd) }, 0 }, - { { 0, &(av[24].hd), &(av[24].hd) }, 0 }, - { { 0, &(av[25].hd), &(av[25].hd) }, 0 }, - { { 0, &(av[26].hd), &(av[26].hd) }, 0 }, - { { 0, &(av[27].hd), &(av[27].hd) }, 0 }, - { { 0, &(av[28].hd), &(av[28].hd) }, 0 }, - { { 0, &(av[29].hd), &(av[29].hd) }, 0 }, - - { { 0, &(av[30].hd), &(av[30].hd) }, 0 }, - { { 0, &(av[31].hd), &(av[31].hd) }, 0 }, - { { 0, &(av[32].hd), &(av[32].hd) }, 0 }, - { { 0, &(av[33].hd), &(av[33].hd) }, 0 }, - { { 0, &(av[34].hd), &(av[34].hd) }, 0 }, - { { 0, &(av[35].hd), &(av[35].hd) }, 0 }, - { { 0, &(av[36].hd), &(av[36].hd) }, 0 }, - { { 0, &(av[37].hd), &(av[37].hd) }, 0 }, - { { 0, &(av[38].hd), &(av[38].hd) }, 0 }, - { { 0, &(av[39].hd), &(av[39].hd) }, 0 }, - - { { 0, &(av[40].hd), &(av[40].hd) }, 0 }, - { { 0, &(av[41].hd), &(av[41].hd) }, 0 }, - { { 0, &(av[42].hd), &(av[42].hd) }, 0 }, - { { 0, &(av[43].hd), &(av[43].hd) }, 0 }, - { { 0, &(av[44].hd), &(av[44].hd) }, 0 }, - { { 0, &(av[45].hd), &(av[45].hd) }, 0 }, - { { 0, &(av[46].hd), &(av[46].hd) }, 0 }, - { { 0, &(av[47].hd), &(av[47].hd) }, 0 }, - { { 0, &(av[48].hd), &(av[48].hd) }, 0 }, - { { 0, &(av[49].hd), &(av[49].hd) }, 0 }, - - { { 0, &(av[50].hd), &(av[50].hd) }, 0 }, - { { 0, &(av[51].hd), &(av[51].hd) }, 0 }, - { { 0, &(av[52].hd), &(av[52].hd) }, 0 }, - { { 0, &(av[53].hd), &(av[53].hd) }, 0 }, - { { 0, &(av[54].hd), &(av[54].hd) }, 0 }, - { { 0, &(av[55].hd), &(av[55].hd) }, 0 }, - { { 0, &(av[56].hd), &(av[56].hd) }, 0 }, - { { 0, &(av[57].hd), &(av[57].hd) }, 0 }, - { { 0, &(av[58].hd), &(av[58].hd) }, 0 }, - { { 0, &(av[59].hd), &(av[59].hd) }, 0 }, - - { { 0, &(av[60].hd), &(av[60].hd) }, 0 }, - { { 0, &(av[61].hd), &(av[61].hd) }, 0 }, - { { 0, &(av[62].hd), &(av[62].hd) }, 0 }, - { { 0, &(av[63].hd), &(av[63].hd) }, 0 }, - { { 0, &(av[64].hd), &(av[64].hd) }, 0 }, - { { 0, &(av[65].hd), &(av[65].hd) }, 0 }, - { { 0, &(av[66].hd), &(av[66].hd) }, 0 }, - { { 0, &(av[67].hd), &(av[67].hd) }, 0 }, - { { 0, &(av[68].hd), &(av[68].hd) }, 0 }, - { { 0, &(av[69].hd), &(av[69].hd) }, 0 }, - - { { 0, &(av[70].hd), &(av[70].hd) }, 0 }, - { { 0, &(av[71].hd), &(av[71].hd) }, 0 }, - { { 0, &(av[72].hd), &(av[72].hd) }, 0 }, - { { 0, &(av[73].hd), &(av[73].hd) }, 0 }, - { { 0, &(av[74].hd), &(av[74].hd) }, 0 }, - { { 0, &(av[75].hd), &(av[75].hd) }, 0 }, - { { 0, &(av[76].hd), &(av[76].hd) }, 0 }, - { { 0, &(av[77].hd), &(av[77].hd) }, 0 }, - { { 0, &(av[78].hd), &(av[78].hd) }, 0 }, - { { 0, &(av[79].hd), &(av[79].hd) }, 0 }, - - { { 0, &(av[80].hd), &(av[80].hd) }, 0 }, - { { 0, &(av[81].hd), &(av[81].hd) }, 0 }, - { { 0, &(av[82].hd), &(av[82].hd) }, 0 }, - { { 0, &(av[83].hd), &(av[83].hd) }, 0 }, - { { 0, &(av[84].hd), &(av[84].hd) }, 0 }, - { { 0, &(av[85].hd), &(av[85].hd) }, 0 }, - { { 0, &(av[86].hd), &(av[86].hd) }, 0 }, - { { 0, &(av[87].hd), &(av[87].hd) }, 0 }, - { { 0, &(av[88].hd), &(av[88].hd) }, 0 }, - { { 0, &(av[89].hd), &(av[89].hd) }, 0 }, - - { { 0, &(av[90].hd), &(av[90].hd) }, 0 }, - { { 0, &(av[91].hd), &(av[91].hd) }, 0 }, - { { 0, &(av[92].hd), &(av[92].hd) }, 0 }, - { { 0, &(av[93].hd), &(av[93].hd) }, 0 }, - { { 0, &(av[94].hd), &(av[94].hd) }, 0 }, - { { 0, &(av[95].hd), &(av[95].hd) }, 0 }, - { { 0, &(av[96].hd), &(av[96].hd) }, 0 }, - { { 0, &(av[97].hd), &(av[97].hd) }, 0 }, - { { 0, &(av[98].hd), &(av[98].hd) }, 0 }, - { { 0, &(av[99].hd), &(av[99].hd) }, 0 }, - - { { 0, &(av[100].hd), &(av[100].hd) }, 0 }, - { { 0, &(av[101].hd), &(av[101].hd) }, 0 }, - { { 0, &(av[102].hd), &(av[102].hd) }, 0 }, - { { 0, &(av[103].hd), &(av[103].hd) }, 0 }, - { { 0, &(av[104].hd), &(av[104].hd) }, 0 }, - { { 0, &(av[105].hd), &(av[105].hd) }, 0 }, - { { 0, &(av[106].hd), &(av[106].hd) }, 0 }, - { { 0, &(av[107].hd), &(av[107].hd) }, 0 }, - { { 0, &(av[108].hd), &(av[108].hd) }, 0 }, - { { 0, &(av[109].hd), &(av[109].hd) }, 0 }, - - { { 0, &(av[110].hd), &(av[110].hd) }, 0 }, - { { 0, &(av[111].hd), &(av[111].hd) }, 0 }, - { { 0, &(av[112].hd), &(av[112].hd) }, 0 }, - { { 0, &(av[113].hd), &(av[113].hd) }, 0 }, - { { 0, &(av[114].hd), &(av[114].hd) }, 0 }, - { { 0, &(av[115].hd), &(av[115].hd) }, 0 }, - { { 0, &(av[116].hd), &(av[116].hd) }, 0 }, - { { 0, &(av[117].hd), &(av[117].hd) }, 0 }, - { { 0, &(av[118].hd), &(av[118].hd) }, 0 }, - { { 0, &(av[119].hd), &(av[119].hd) }, 0 } -}; - -/* - indexing into bins -*/ - -static inline mbinptr size2bin(unsigned int sz) -{ - mbinptr b = av; - while (sz >= (MINSIZE * 2)) { b += 4; sz >>= 1; } /* find power of 2 */ - b += (sz - MINSIZE) >> 2; /* find quadrant */ - return b; -} - - - -/* counts maintained if MALLOC_STATS defined */ - -#ifdef MALLOC_STATS - -static unsigned int sbrked_mem; -static unsigned int requested_mem; -static unsigned int malloced_mem; -static unsigned int freed_mem; -static unsigned int max_used_mem; - -static unsigned int n_sbrks; -static unsigned int n_mallocs; -static unsigned int n_frees; -static unsigned int n_reallocs; -static unsigned int n_reallocs_with_copy; -static unsigned int n_avail; -static unsigned int max_inuse; - -static unsigned int n_malloc_chunks; -static unsigned int n_malloc_bins; - -static unsigned int n_split; -static unsigned int n_consol; - - -static void do_malloc_stats(const mchunkptr p) -{ - ++n_mallocs; - if ((n_mallocs-n_frees) > max_inuse) - max_inuse = n_mallocs - n_frees; - malloced_mem += (p->size & ~(INUSE)); - if (malloced_mem - freed_mem > max_used_mem) - max_used_mem = malloced_mem - freed_mem; -} - -static void do_free_stats(const mchunkptr p) -{ - ++n_frees; - freed_mem += (p->size & ~(INUSE)); -} - -#endif - - - -/* Utilities needed below for memalign */ -/* This is redundant with libg++ support, but not if used stand-alone */ - -static unsigned int gcd(unsigned int a, unsigned int b) -{ - unsigned int tmp; - - if (b > a) - { - tmp = a; a = b; b = tmp; - } - for(;;) - { - if (b == 0) - return a; - else if (b == 1) - return b; - else - { - tmp = b; - b = a % b; - a = tmp; - } - } -} - -static inline unsigned int lcm(unsigned int x, unsigned int y) -{ - return x / gcd(x, y) * y; -} - - - -/* maintaining INUSE via size field */ - - -#define inuse(p) ((p)->size & INUSE) -#define set_inuse(p) ((p)->size |= INUSE) -#define clear_inuse(b) ((p)->size &= ~INUSE) - - -/* operations on malloc_chunk addresses */ - - -/* return ptr to next physical malloc_chunk */ - -#define next_chunk(p) ((mchunkptr)((char*)(p) + (p)->size)) - -/* return ptr to previous physical malloc_chunk */ - -#define prev_chunk(p) ((mchunkptr)((char*)(p)-((((int*)(p))[-1]) & ~(INUSE)))) - -/* place size at front and back of chunk */ - - -static inline void set_size(mchunkptr p, unsigned int sz) -{ - p->size = *((int*)((char*)(p) + sz - SIZE_SZ)) = sz; -} - - - - -/* conversion from malloc headers to user pointers, and back */ - -static inline void* chunk2mem(mchunkptr p) -{ - void *mem; - set_inuse(p); -mem = (void*)((char*)(p) + SIZE_SZ); - return mem; -} - -/* xxxx my own */ -mchunkptr sanity_check(void* mem) -{ - mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ); - - /* a quick sanity check */ - unsigned int sz = p->size & ~(INUSE); - if (p->size == sz || sz != *((int*)((char*)(p) + sz - SIZE_SZ))) - malloc_user_error(); - - return p; -} - - - - -static inline mchunkptr mem2chunk(void* mem) -{ - mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ); - - /* a quick sanity check */ - unsigned int sz = p->size & ~(INUSE); - if (p->size == sz || sz != *((int*)((char*)(p) + sz - SIZE_SZ))) - malloc_user_error(); - - p->size = sz; /* clears INUSE */ - return p; -} - - - -/* maintaining bins & pointers */ - - -/* maximum bin actually used */ - -static mbinptr malloc_maxbin = FIRSTBIN; - - -/* operations on lists inside bins */ - - -/* take a chunk off a list */ - -static inline void unlink(mchunkptr p) -{ - mchunkptr b = p->bk; - mchunkptr f = p->fd; - - f->bk = b; b->fd = f; - - UPDATE_STATS (--n_avail); -} - - - -/* split a chunk and place on the back of a list */ - -static inline void split(mchunkptr p, unsigned int offset) -{ - unsigned int room = p->size - offset; - if (room >= MINSIZE) - { - mbinptr bn = size2bin(room); /* new bin */ - mchunkptr h = &(bn->hd); /* its head */ - mchunkptr b = h->bk; /* old back element */ - mchunkptr t = (mchunkptr)((char*)(p) + offset); /* remaindered chunk */ - - /* set size */ - t->size = *((int*)((char*)(t) + room - SIZE_SZ)) = room; - - /* link up */ - t->bk = b; t->fd = h; h->bk = b->fd = t; - - /* adjust maxbin (h == b means was empty) */ - if (h == b && bn > malloc_maxbin) malloc_maxbin = bn; - - /* adjust size of chunk to be returned */ - p->size = *((int*)((char*)(p) + offset - SIZE_SZ)) = offset; - - UPDATE_STATS ((++n_split, ++n_avail)); - } -} - - - -/* place a consolidated chunk on the back of a list */ -/* like above, except no split */ - -static inline void consollink(mchunkptr p) -{ - mbinptr bn = size2bin(p->size); - mchunkptr h = &(bn->hd); - mchunkptr b = h->bk; - - p->bk = b; p->fd = h; h->bk = b->fd = p; - - if (h == b && bn > malloc_maxbin) malloc_maxbin = bn; - - UPDATE_STATS(++n_avail); -} - - -/* place a freed chunk on the front of a list */ - -static inline void frontlink(mchunkptr p) -{ - mbinptr bn = size2bin(p->size); - mchunkptr h = &(bn->hd); - mchunkptr f = h->fd; - - p->bk = h; p->fd = f; f->bk = h->fd = p; - - if (h == f && bn > malloc_maxbin) malloc_maxbin = bn; - - bn->dirty = 1; - - UPDATE_STATS(++n_avail); -} - - - -/* Dealing with sbrk */ - - -/* To link consecutive sbrk regions when possible */ - -static int* last_sbrk_end; - - -/* who to call when sbrk returns failure */ - -#ifndef NO_NEW_HANDLER -typedef volatile void (*vfp)(); -#ifdef __cplusplus -extern "C" vfp __new_handler; -#else -extern vfp __new_handler; -#endif -#endif - -static mchunkptr malloc_from_sys(unsigned nb) -{ - mchunkptr p; - unsigned int sbrk_size; - int* ip; - - /* Minimally, we need to pad with enough space */ - /* to place dummy size/use fields to ends if needed */ - - sbrk_size = ((nb + SBRK_UNIT - 1 + SIZE_SZ + SIZE_SZ) - / SBRK_UNIT) * SBRK_UNIT; - - ip = (int*)(sbrk(sbrk_size)); - if ((char*)ip == (char*)(-1)) /* sbrk returns -1 on failure */ - { -#ifndef NO_NEW_HANDLER - (*__new_handler) (); -#endif - return 0; - } - - UPDATE_STATS ((++n_sbrks, sbrked_mem += sbrk_size)); - - - if (last_sbrk_end != &ip[-1]) - { - /* It's either first time through or someone else called sbrk. */ - /* Arrange end-markers at front & back */ - - /* Shouldn't be necessary, but better to be safe */ - while (!aligned_OK(ip)) { ++ip; sbrk_size -= SIZE_SZ; } - - - /* Mark the front as in use to prevent merging. */ - /* Note we can get away with only 1 word, not MINSIZE overhead here */ - - *ip++ = SIZE_SZ | INUSE; - - p = (mchunkptr)ip; - set_size(p,sbrk_size - (SIZE_SZ + SIZE_SZ)); - - } - else - { - mchunkptr l; - - /* We can safely make the header start at end of prev sbrked chunk. */ - /* We will still have space left at the end from a previous call */ - /* to place the end marker, below */ - - p = (mchunkptr)(last_sbrk_end); - set_size(p, sbrk_size); - - - /* Even better, maybe we can merge with last fragment: */ - - l = prev_chunk(p); - if (!inuse(l)) - { - unlink(l); - set_size(l, p->size + l->size); - p = l; - } - - } - - /* mark the end of sbrked space as in use to prevent merging */ - - last_sbrk_end = (int*)((char*)p + p->size); - *last_sbrk_end = SIZE_SZ | INUSE; - - UPDATE_STATS((++n_avail, ++n_malloc_chunks)); - - /* make it safe to unlink in malloc */ - UPDATE_STATS(++n_avail); - p->fd = p->bk = p; - - return p; -} - - - -/* Consolidate dirty bins. */ -/* Stop if found a chunk big enough to satisfy current malloc request */ - -/* (It requires much less bookkeeping to consolidate entire bins */ -/* at once than to keep records of which chunks might be */ -/* consolidatable. So long as the lists are short, which we */ -/* try to ensure via small bin ranges, there is little wasted effort.) */ - -static mchunkptr malloc_find_space(unsigned int nb) -{ - mbinptr b; - - /* first, re-adjust max used bin */ - - while (malloc_maxbin >= FIRSTBIN && - malloc_maxbin->hd.bk == &(malloc_maxbin->hd)) - { - malloc_maxbin->dirty = 0; - --malloc_maxbin; - } - - for (b = malloc_maxbin; b >= FIRSTBIN; --b) - { - UPDATE_STATS(++n_malloc_bins); - - if (b->dirty) - { - mchunkptr h = &(b->hd); /* head of list */ - mchunkptr p = h->fd; /* chunk traverser */ - - while (p != h) - { - mchunkptr nextp = p->fd; /* save, in case of relinks */ - int consolidated = 0; /* only unlink/relink if consolidated */ - - mchunkptr t; - - UPDATE_STATS(++n_malloc_chunks); - - while (!inuse(t = prev_chunk(p))) /* consolidate backward */ - { - if (!consolidated) { consolidated = 1; unlink(p); } - if (t == nextp) nextp = t->fd; - unlink(t); - set_size(t, t->size + p->size); - p = t; - UPDATE_STATS (++n_consol); - } - - while (!inuse(t = next_chunk(p))) /* consolidate forward */ - { - if (!consolidated) { consolidated = 1; unlink(p); } - if (t == nextp) nextp = t->fd; - unlink(t); - set_size(p, p->size + t->size); - UPDATE_STATS (++n_consol); - } - - if (consolidated) - { - if (p->size >= nb) - { - /* make it safe to unlink in malloc */ - UPDATE_STATS(++n_avail); - p->fd = p->bk = p; - return p; - } - else - consollink(p); - } - - p = nextp; - - } - - b->dirty = 0; - - } - } - - /* nothing available - sbrk some more */ - - return malloc_from_sys(nb); -} - - - -/* Finally, the user-level functions */ - -void* malloc(unsigned int bytes) -{ - unsigned int nb = request2size(bytes); /* padded request size */ - mbinptr b = size2bin(nb); /* corresponding bin */ - mchunkptr hd = &(b->hd); /* head of its list */ - mchunkptr p = hd->fd; /* chunk traverser */ - - UPDATE_STATS((requested_mem+=bytes, ++n_malloc_bins)); - - /* Try a (near) exact match in own bin */ - /* clean out unusable but consolidatable chunks in bin while traversing */ - - while (p != hd) - { - UPDATE_STATS(++n_malloc_chunks); - if (p->size >= nb) - goto found; - else /* try to consolidate; same code as malloc_find_space */ - { - mchunkptr nextp = p->fd; /* save, in case of relinks */ - int consolidated = 0; /* only unlink/relink if consolidated */ - - mchunkptr t; - - while (!inuse(t = prev_chunk(p))) /* consolidate backward */ - { - if (!consolidated) { consolidated = 1; unlink(p); } - if (t == nextp) nextp = t->fd; - unlink(t); - set_size(t, t->size + p->size); - p = t; - UPDATE_STATS (++n_consol); - } - - while (!inuse(t = next_chunk(p))) /* consolidate forward */ - { - if (!consolidated) { consolidated = 1; unlink(p); } - if (t == nextp) nextp = t->fd; - unlink(t); - set_size(p, p->size + t->size); - UPDATE_STATS (++n_consol); - } - - if (consolidated) - { - if (p->size >= nb) - { - /* make it safe to unlink again below */ - UPDATE_STATS(++n_avail); - p->fd = p->bk = p; - goto found; - } - else - consollink(p); - } - - p = nextp; - - } - } - - b->dirty = 0; /* true if got here */ - - /* Scan bigger bins for a victim */ - - while (++b <= malloc_maxbin) - { - UPDATE_STATS(++n_malloc_bins); - if ((p = b->hd.bk) != &(b->hd)) /* no need to check size */ - goto found; - } - - /* Consolidate or sbrk */ - - p = malloc_find_space(nb); - - if (p == 0) return 0; /* allocation failure */ - - found: /* Use what we found */ - - unlink(p); - split(p, nb); - UPDATE_STATS(do_malloc_stats(p)); - return chunk2mem(p); -} - - - - -void free(void* mem) -{ - if (mem != 0) - { - mchunkptr p = mem2chunk(mem); - UPDATE_STATS(do_free_stats(p)); - frontlink(p); - } -} - - -void* calloc(unsigned int n, unsigned int elem_size) -{ - unsigned int sz = n * elem_size; - void* p = malloc(sz); - bzero(p, sz); - return p; -}; - -/* This is here for compatibility with older systems */ -void cfree(void *mem) -{ - free(mem); -} - - -unsigned int malloc_usable_size(void* mem) -{ - if (mem == 0) - return 0; - else - { - mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ); - unsigned int sz = p->size & ~(INUSE); - if (p->size == sz || sz != *((int*)((char*)(p) + sz - SIZE_SZ))) - return 0; - else - return sz - MALLOC_MIN_OVERHEAD; - } -} - - - -void* realloc(void* mem, unsigned int bytes) -{ - if (mem == 0) - return malloc(bytes); - else - { - unsigned int nb = request2size(bytes); - mchunkptr p = mem2chunk(mem); - unsigned int oldsize = p->size; - int room; - mchunkptr nxt; - - UPDATE_STATS((++n_reallocs, requested_mem += bytes-oldsize)); - - /* try to expand (even if already big enough), to clean up chunk */ - - while (!inuse(nxt = next_chunk(p))) - { - UPDATE_STATS ((malloced_mem += nxt->size, ++n_consol)); - unlink(nxt); - set_size(p, p->size + nxt->size); - } - - room = p->size - nb; - if (room >= 0) - { - split(p, nb); - UPDATE_STATS(malloced_mem -= room); - return chunk2mem(p); - } - else /* do the obvious */ - { - void* newmem; - set_inuse(p); /* don't let malloc consolidate us yet! */ - newmem = malloc(nb); - bcopy(mem, newmem, oldsize - SIZE_SZ); - free(mem); - UPDATE_STATS(++n_reallocs_with_copy); - return newmem; - } - } -} - - - -/* return a pointer to space with at least the alignment requested */ - -void* memalign(unsigned int alignment, unsigned int bytes) -{ - mchunkptr p; - unsigned int nb = request2size(bytes); - - /* find an alignment that both we and the user can live with: */ - /* least common multiple guarantees mutual happiness */ - unsigned int align = lcm(alignment, MALLOC_MIN_OVERHEAD); - unsigned int mask = align - 1; - - /* call malloc with worst case padding to hit alignment; */ - /* we will give back extra */ - - unsigned int req = nb + align + MINSIZE; - void* m = malloc(req); - - if (m == 0) return m; - - p = mem2chunk(m); - - /* keep statistics on track */ - - UPDATE_STATS(--n_mallocs); - UPDATE_STATS(malloced_mem -= p->size); - UPDATE_STATS(requested_mem -= req); - UPDATE_STATS(requested_mem += bytes); - - if (((int)(m) & (mask)) != 0) /* misaligned */ - { - - /* find an aligned spot inside chunk */ - - mchunkptr ap = (mchunkptr)(( ((int)(m) + mask) & -align) - SIZE_SZ); - - unsigned int gap = (unsigned int)(ap) - (unsigned int)(p); - unsigned int room; - - /* we need to give back leading space in a chunk of at least MINSIZE */ - - if (gap < MINSIZE) - { - /* This works since align >= MINSIZE */ - /* and we've malloc'd enough total room */ - - ap = (mchunkptr)( (int)(ap) + align ); - gap += align; - } - - if (gap + nb > p->size) /* can't happen unless chunk sizes corrupted */ - malloc_user_error(); - - room = p->size - gap; - - /* give back leader */ - set_size(p, gap); - consollink(p); - - /* use the rest */ - p = ap; - set_size(p, room); - } - - /* also give back spare room at the end */ - - split(p, nb); - UPDATE_STATS(do_malloc_stats(p)); - return chunk2mem(p); - -} - -#ifndef sun -#include "getpagesize.h" -#endif - -static unsigned int malloc_pagesize = 0; - -void* valloc(unsigned int bytes) -{ - if (malloc_pagesize == 0) malloc_pagesize = getpagesize(); - return memalign (malloc_pagesize, bytes); -} - - -void malloc_stats() -{ -#ifndef MALLOC_STATS -} -#else - int i; - mchunkptr p; - double nm = (double)(n_mallocs + n_reallocs); - - fprintf(stderr, "\nmalloc statistics\n\n"); - - if (n_mallocs != 0) - fprintf(stderr, "requests = %10u total size = %10u\tave = %10u\n", - n_mallocs, requested_mem, requested_mem/n_mallocs); - - if (n_mallocs != 0) - fprintf(stderr, "mallocs = %10u total size = %10u\tave = %10u\n", - n_mallocs, malloced_mem, malloced_mem/n_mallocs); - - if (n_frees != 0) - fprintf(stderr, "frees = %10u total size = %10u\tave = %10u\n", - n_frees, freed_mem, freed_mem/n_frees); - - if (n_mallocs-n_frees != 0) - fprintf(stderr, "in use = %10u total size = %10u\tave = %10u\n", - n_mallocs-n_frees, malloced_mem-freed_mem, - (malloced_mem-freed_mem) / (n_mallocs-n_frees)); - - if (max_inuse != 0) - fprintf(stderr, "max in use= %10u total size = %10u\tave = %10u\n", - max_inuse, max_used_mem, max_used_mem / max_inuse); - - if (n_avail != 0) - fprintf(stderr, "available = %10u total size = %10u\tave = %10u\n", - n_avail, sbrked_mem - (malloced_mem-freed_mem), - (sbrked_mem - (malloced_mem-freed_mem)) / n_avail); - - if (n_sbrks != 0) - fprintf(stderr, "sbrks = %10u total size = %10u\tave = %10u\n\n", - n_sbrks, sbrked_mem, sbrked_mem/ n_sbrks); - - if (n_reallocs != 0) - fprintf(stderr, "reallocs = %10u with copy = %10u\n\n", - n_reallocs, n_reallocs_with_copy); - - - if (nm != 0) - { - fprintf(stderr, "chunks scanned per malloc = %6.3f\n", - n_malloc_chunks / nm); - fprintf(stderr, "bins scanned per malloc = %6.3f\n", - n_malloc_bins / nm); - fprintf(stderr, "splits per malloc = %6.3f\n", - n_split / nm); - fprintf(stderr, "consolidations per malloc = %6.3f\n", - n_consol / nm); - } - - fprintf(stderr, "\nfree chunks:\n"); - for (i = 0; i < MAXBIN; ++i) - { - p = av[i].hd.fd; - if (p != &(av[i].hd)) - { - unsigned int count = 1; - unsigned int sz = p->size; - for (p = p->fd; p != &(av[i].hd); p = p->fd) - { - if (p->size == sz) - ++count; - else - { - fprintf(stderr, "\tsize = %10u count = %5u\n", sz, count); - count = 1; - sz = p->size; - } - } - - fprintf(stderr, "\tsize = %10u count = %5u\n", sz, count); - - } - } -} -#endif /* MALLOC_STATS */ - -#endif /* NO_LIBGXX_MALLOC */ - - diff --git a/gnu/libregex/test/getpagesize.h b/gnu/libregex/test/getpagesize.h deleted file mode 100644 index 32adae61efa2..000000000000 --- a/gnu/libregex/test/getpagesize.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifdef BSD -#ifndef BSD4_1 -#define HAVE_GETPAGESIZE -#endif -#endif - -#ifndef HAVE_GETPAGESIZE - -#include <sys/param.h> - -#ifdef EXEC_PAGESIZE -#define getpagesize() EXEC_PAGESIZE -#else -#ifdef NBPG -#define getpagesize() NBPG * CLSIZE -#ifndef CLSIZE -#define CLSIZE 1 -#endif /* no CLSIZE */ -#else /* no NBPG */ -#define getpagesize() NBPC -#endif /* no NBPG */ -#endif /* no EXEC_PAGESIZE */ - -#endif /* not HAVE_GETPAGESIZE */ - diff --git a/gnu/libregex/test/iregex.c b/gnu/libregex/test/iregex.c deleted file mode 100644 index 2346d441fcdb..000000000000 --- a/gnu/libregex/test/iregex.c +++ /dev/null @@ -1,164 +0,0 @@ -/* Main program for interactive testing. For maximum output, compile - this and regex.c with -DDEBUG. */ - -#include <stdio.h> -#include <sys/types.h> -#include "regex.h" - -/* Don't bother to guess about <string.h> vs <strings.h>, etc. */ -extern int strlen (); - -#define BYTEWIDTH 8 - -extern void printchar (); -extern char upcase[]; - -static void scanstring (); -static void print_regs (); - -int -main (argc, argv) - int argc; - char **argv; -{ - int i; - struct re_pattern_buffer buf; - char fastmap[(1 << BYTEWIDTH)]; - - /* Allow a command argument to specify the style of syntax. You can - use the `syntax' program to decode integer syntax values. */ - if (argc > 1) - re_set_syntax (atoi (argv[1])); - - buf.allocated = 0; - buf.buffer = NULL; - buf.fastmap = fastmap; - buf.translate = upcase; - - for (;;) - { - char pat[500], str[500]; - struct re_registers regs; - - /* Some C compilers don't like `char pat[500] = ""'. */ - pat[0] = 0; - - printf ("Pattern (%s) = ", pat); - gets (pat); - scanstring (pat); - - if (feof (stdin)) - { - putchar ('\n'); - exit (0); - } - - if (*pat) - { - re_compile_pattern (pat, strlen (pat), &buf); - re_compile_fastmap (&buf); -#ifdef DEBUG - print_compiled_pattern (&buf); -#endif - } - - printf ("String = "); - gets (str); /* Now read the string to match against */ - scanstring (str); - - i = re_match (&buf, str, strlen (str), 0, ®s); - printf ("Match value %d.\t", i); - if (i >= 0) - print_regs (regs); - putchar ('\n'); - - i = re_search (&buf, str, strlen (str), 0, strlen (str), ®s); - printf ("Search value %d.\t", i); - if (i >= 0) - print_regs (regs); - putchar ('\n'); - } - - /* We never get here, but what the heck. */ - return 0; -} - -void -scanstring (s) - char *s; -{ - char *write = s; - - while (*s != '\0') - { - if (*s == '\\') - { - s++; - - switch (*s) - { - case '\0': - break; - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - *write = *s++ - '0'; - - if ('0' <= *s && *s <= '9') - { - *write = (*write << 3) + (*s++ - '0'); - if ('0' <= *s && *s <= '9') - *write = (*write << 3) + (*s++ - '0'); - } - write++; - break; - - case 'n': - *write++ = '\n'; - s++; - break; - - case 't': - *write++ = '\t'; - s++; - break; - - default: - *write++ = *s++; - break; - } - } - else - *write++ = *s++; - } - - *write++ = '\0'; -} - -/* Print REGS in human-readable form. */ - -void -print_regs (regs) - struct re_registers regs; -{ - int i, end; - - printf ("Registers: "); - - if (regs.num_regs == 0 || regs.start[0] == -1) - { - printf ("(none)"); - } - else - { - /* Find the last register pair that matched. */ - for (end = regs.num_regs - 1; end >= 0; end--) - if (regs.start[end] != -1) - break; - - printf ("[%d ", regs.start[0]); - for (i = 1; i <= end; i++) - printf ("(%d %d) ", regs.start[i], regs.end[i]); - printf ("%d]", regs.end[0]); - } -} diff --git a/gnu/libregex/test/main.c b/gnu/libregex/test/main.c deleted file mode 100644 index 28ae31528579..000000000000 --- a/gnu/libregex/test/main.c +++ /dev/null @@ -1,49 +0,0 @@ -/* Main routine for running various tests. Meant only to be linked with - all the auxiliary test source files, with `test' undefined. */ - -#include "test.h" - -test_type t = all_test; - - -/* Use this to run the tests we've thought of. */ - -int -main () -{ - switch (t) - { - case all_test: - test_regress (); - test_others (); - test_posix_basic (); - test_posix_extended (); - test_posix_interface (); - break; - - case other_test: - test_others (); - break; - - case posix_basic_test: - test_posix_basic (); - break; - - case posix_extended_test: - test_posix_extended (); - break; - - case posix_interface_test: - test_posix_interface (); - break; - - case regress_test: - test_regress (); - break; - - default: - fprintf (stderr, "Unknown test %d.\n", t); - } - - return 0; -} diff --git a/gnu/libregex/test/malloc-test.c b/gnu/libregex/test/malloc-test.c deleted file mode 100644 index 7e27a15a8905..000000000000 --- a/gnu/libregex/test/malloc-test.c +++ /dev/null @@ -1,47 +0,0 @@ - - -typedef struct { - unsigned *bits; - unsigned size; -} bits_list_type; - -#define BYTEWIDTH 8 -#define NULL 0 - -#define BITS_BLOCK_SIZE (sizeof (unsigned) * BYTEWIDTH) -#define BITS_BLOCK(position) ((position) / BITS_BLOCK_SIZE) -#define BITS_MASK(position) (1 << ((position) % BITS_BLOCK_SIZE)) - -static unsigned -init_bits_list (bits_list_ptr) - bits_list_type *bits_list_ptr; -{ - bits_list_ptr->bits = NULL; - bits_list_ptr->bits = (unsigned *) malloc (sizeof (unsigned)); - - if (bits_list_ptr->bits == NULL) - return 0; - - bits_list_ptr->bits[0] = (unsigned)0; - bits_list_ptr->size = BITS_BLOCK_SIZE; - - return 1; -} - - -main() -{ - bits_list_type dummy; - bits_list_type dummy_1; - bits_list_type dummy_2; - bits_list_type dummy_3; - - init_bits_list (&dummy); -printf("init 1\n"); - init_bits_list (&dummy_1); -printf("init 2\n"); - init_bits_list (&dummy_2); -printf("init 3\n"); - init_bits_list (&dummy_3); -printf("init 4\n"); -} diff --git a/gnu/libregex/test/other.c b/gnu/libregex/test/other.c deleted file mode 100644 index d2ceb3844883..000000000000 --- a/gnu/libregex/test/other.c +++ /dev/null @@ -1,503 +0,0 @@ -/* other.c: test (not exhaustively) non-POSIX regular expressions. */ - -#include "test.h" - -void -test_others () -{ - struct re_registers regs; - - printf ("\nStarting non-POSIX tests.\n"); - t = other_test; - - test_should_match = true; - - /* The big question: does the group participate in the match, or match - the empty string? */ - re_set_syntax (RE_NO_BK_PARENS); - test_match ("(a*)*ab", "ab"); - TEST_REGISTERS ("(a*)*ab", "ab", 0, 2, 0, 0, -1, -1); - test_match ("(a*)*", ""); - TEST_REGISTERS ("(a*)*ab", "ab", 0, 0, 0, 0, -1, -1); - - /* This tests finding the highest and lowest active registers. */ - test_match ("(a(b)c(d(e)f)g)h(i(j)k(l(m)n)o)\\1\\2\\3\\4\\5\\6\\7\\8", - "abcdefghijklmnoabcdefgbdefeijklmnojlmnm"); - - /* Test that \< and \> match at the beginning and end of the string. */ - test_match ("\\<abc\\>", "abc"); - - /* May as well test \` and \' while we're at it. */ - test_match ("\\`abc\\'", "abc"); - -#if 0 - /* Test backreferencing and the fastmap -- which doesn't work. */ - test_fastmap ("(a)*\\1", "a", 0, 0); -#endif - - /* But at least we shouldn't search improperly. */ - test_search_return (-1, "(a)\\1", ""); - - re_set_syntax (RE_SYNTAX_EMACS); - - MATCH_SELF("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - MATCH_SELF ("a^"); - MATCH_SELF ("a^b"); - MATCH_SELF ("$a"); - MATCH_SELF ("a$b"); - - re_set_syntax (RE_BACKSLASH_ESCAPE_IN_LISTS); - test_match ("[\\^a]", "a"); - test_match ("[\\^a]", "^"); - - /* These op characters should be ordinary if RE_CONTEXT_INVALID_OPS - isn't set. */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_BRACES | RE_INTERVALS - | RE_NO_BK_PARENS); - MATCH_SELF ("*"); - test_match ("a|*", "*"); - test_match ("(*)", "*"); - - MATCH_SELF ("+"); - test_match ("a|+", "+"); - test_match ("(+)", "+"); - - MATCH_SELF ("?"); - test_match ("a|?", "?"); - test_match ("(?)", "?"); - - MATCH_SELF ("{1}"); - test_match ("a|{1}", "a"); - test_match ("a|{1}", "{1}"); - test_match ("({1})", "{1}"); - - test_match ("\\{", "{"); - - - re_set_syntax (RE_LIMITED_OPS); - MATCH_SELF ("|"); - MATCH_SELF ("a|"); - MATCH_SELF ("a|"); - MATCH_SELF ("a||"); - MATCH_SELF ("a||"); - MATCH_SELF ("(|)"); - - re_set_syntax (RE_SYNTAX_EMACS); - TEST_SEARCH ("^a", "b\na", 0, 3); - TEST_SEARCH ("b$", "b\na", 0, 3); - -#if 0 - /* Newline is no longer special for anchors (16 Sep 92). --karl */ - test_match_2 ("a\n^b", "a", "\nb"); - test_match_2 ("a$\nb", "a\n", "b"); -#endif - - /* Test grouping. */ - re_set_syntax (RE_NO_BK_PARENS); - - test_match ("()", ""); - test_fastmap ("()", "", 0, 0); - TEST_REGISTERS ("()", "", 0, 0, 0, 0, -1, -1); - - test_match ("((((((((()))))))))", ""); - test_fastmap ("((((((((()))))))))", "", 0, 0); - test_match ("a()b", "ab"); - TEST_REGISTERS ("a()b", "ab", 0, 2, 1, 1, -1, -1); - - test_match ("(((((((((())))))))))", ""); - test_fastmap ("(((((((((())))))))))", "", 0, 0); - - test_match ("()*", ""); - TEST_REGISTERS ("()*", "", 0, 0, 0, 0, -1, -1); /* empty string */ - test_match ("(())*", ""); - - re_set_syntax (RE_CONTEXT_INDEP_OPS); - test_match ("*", ""); - - re_set_syntax (RE_INTERVALS | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES); - test_match ("{1}", ""); /* Should remain an interval. */ - MATCH_SELF ("{1"); /* Not a valid interval. */ - - re_set_syntax (RE_NEWLINE_ALT); - test_match ("a\nb", "a"); - test_match ("a\nb", "b"); - - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS); - test_match ("^a", "a"); - test_match ("(^a)", "a"); - test_match ("(a|^b)", "b"); - test_match ("a$", "a"); - test_match ("(a$)", "a"); - test_match ("a$|b", "a"); - - /* You should be able to have empty alternatives if RE_NO_EMPTY_ALTS - isn't set. */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS); - - test_match ("|", ""); - test_match ("^|a", ""); - test_match ("^|a", "a"); - test_match ("a|", ""); - test_match ("a|", "a"); - test_match ("a|$", ""); - test_match ("a|$", "a"); - test_match ("a||b", "a"); - test_match ("a||b", ""); - test_match ("a||b", "b"); - test_match ("(|a)", ""); - test_match ("(|a)", "a"); - test_match ("(a|)", ""); - test_match ("(a|)", "a"); - - TEST_SEARCH ("a|$", "xa", 0, 2); - TEST_SEARCH ("a|$", "x", 0, 1); - TEST_SEARCH ("$|b", "x", 0, 1); - TEST_SEARCH ("$|b", "xb", 0, 2); - TEST_SEARCH ("c(a|$)", "xca", 0, 3); - TEST_SEARCH ("c(a|$)", "xc", 0, 2); - TEST_SEARCH ("c($|b)", "xcb", 0, 3); - TEST_SEARCH ("c($|b)", "xc", 0, 2); - TEST_SEARCH ("c($|b$)", "xcb", 0, 3); - TEST_SEARCH ("c($|b$)", "xc", 0, 2); - TEST_SEARCH ("c(a$|$)", "xca", 0, 3); - TEST_SEARCH ("c(a$|$)", "xc", 0, 2); - TEST_SEARCH ("(a$|b$)|$", "x", 0, 1); - TEST_SEARCH ("(a$|b$)|$", "xa", 0, 2); - TEST_SEARCH ("(a$|b$)|$", "xb", 0, 2); - TEST_SEARCH ("(a$|$)|c$", "x", 0, 1); - TEST_SEARCH ("(a$|$)|c$", "xa", 0, 2); - TEST_SEARCH ("(a$|$)|c$", "xc", 0, 2); - TEST_SEARCH ("($|b$)|c$", "x", 0, 1); - TEST_SEARCH ("($|b$)|c$", "xb", 0, 2); - TEST_SEARCH ("($|b$)|c$", "xc", 0, 2); - TEST_SEARCH ("c$|(a$|$)", "x", 0, 1); - TEST_SEARCH ("c$|(a$|$)", "xa", 0, 2); - TEST_SEARCH ("c$|(a$|$)", "xc", 0, 2); - TEST_SEARCH ("c$|($|b$)", "x", 0, 1); - TEST_SEARCH ("c$|($|b$)", "xb", 0, 2); - TEST_SEARCH ("c$|($|b$)", "xc", 0, 2); - TEST_SEARCH ("$|(a$|b$)", "x", 0, 1); - TEST_SEARCH ("$|(a$|b$)", "xa", 0, 2); - TEST_SEARCH ("$|(a$|b$)", "xb", 0, 2); - TEST_SEARCH ("c(a$|b$)|$", "x", 0, 1); - TEST_SEARCH ("c(a$|b$)|$", "xca", 0, 3); - TEST_SEARCH ("c(a$|b$)|$", "xcb", 0, 3); - TEST_SEARCH ("c(a$|$)|d$", "xc", 0, 2); - TEST_SEARCH ("c(a$|$)|d$", "xca", 0, 3); - TEST_SEARCH ("c(a$|$)|d$", "xd", 0, 2); - TEST_SEARCH ("c($|b$)|d$", "xc", 0, 2); - TEST_SEARCH ("c($|b$)|d$", "xcb", 0, 3); - TEST_SEARCH ("c($|b$)|d$", "xd", 0, 2); - TEST_SEARCH ("d(c$|e((a$|$)))", "xdc", 0, 3); - TEST_SEARCH ("d(c$|e((a$|$)))", "xde", 0, 3); - TEST_SEARCH ("d(c$|e((a$|$)))", "xdea", 0, 4); - TEST_SEARCH ("d(c$|e(($|b$)))", "xdc", 0, 3); - TEST_SEARCH ("d(c$|e(($|b$)))", "xde", 0, 3); - TEST_SEARCH ("d(c$|e(($|b$)))", "xdeb", 0, 4); - TEST_SEARCH ("d($|e((a$|b$)))", "xd", 0, 2); - TEST_SEARCH ("d($|e((a$|b$)))", "xdea", 0, 4); - TEST_SEARCH ("d($|e((a$|b$)))", "xdeb", 0, 4); - TEST_SEARCH ("a(b$|c$)|$", "x", 0, 1); - TEST_SEARCH ("a(b$|c$)|$", "xab", 0, 3); - TEST_SEARCH ("a(b$|c$)|$", "xac", 0, 3); - TEST_SEARCH ("a(b$|$)|d$", "xa", 0, 2); - TEST_SEARCH ("a(b$|$)|d$", "xab", 0, 3); - TEST_SEARCH ("a(b$|$)|d$", "xd", 0, 2); - TEST_SEARCH ("a($|c$)|d$", "xa", 0, 2); - TEST_SEARCH ("a($|c$)|d$", "xac", 0, 3); - TEST_SEARCH ("a($|c$)|d$", "xd", 0, 2); - TEST_SEARCH ("d$|a(b$|$)", "xd", 0, 2); - TEST_SEARCH ("d$|a(b$|$)", "xa", 0, 2); - TEST_SEARCH ("d$|a(b$|$)", "xab", 0, 3); - TEST_SEARCH ("d$|a($|c$)", "xd", 0, 2); - TEST_SEARCH ("d$|a($|c$)", "xa", 0, 2); - TEST_SEARCH ("d$|a($|c$)", "xac", 0, 3); - TEST_SEARCH ("$|a(b$|c$)", "x", 0, 1); - TEST_SEARCH ("$|a(b$|c$)", "xab", 0, 3); - TEST_SEARCH ("$|a(b$|c$)", "xac", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "xab", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "xac", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "xd", 0, 2); - TEST_SEARCH ("(a)(b$|$)|d$", "xa", 0, 2); - TEST_SEARCH ("(a)(b$|$)|d$", "xab", 0, 3); - TEST_SEARCH ("(a)(b$|$)|d$", "xd", 0, 2); - TEST_SEARCH ("(a)($|c$)|d$", "xa", 0, 2); - TEST_SEARCH ("(a)($|c$)|d$", "xac", 0, 3); - TEST_SEARCH ("(a)($|c$)|d$", "xd", 0, 2); - TEST_SEARCH ("d$|(a)(b$|$)", "xd", 0, 2); - TEST_SEARCH ("d$|(a)(b$|$)", "xa", 0, 2); - TEST_SEARCH ("d$|(a)(b$|$)", "xab", 0, 3); - TEST_SEARCH ("d$|(a)($|c$)", "xd", 0, 2); - TEST_SEARCH ("d$|(a)($|c$)", "xa", 0, 2); - TEST_SEARCH ("d$|(a)($|c$)", "xac", 0, 3); - TEST_SEARCH ("$|(a)(b$|c$)", "x", 0, 1); - TEST_SEARCH ("$|(a)(b$|c$)", "xab", 0, 3); - TEST_SEARCH ("$|(a)(b$|c$)", "xac", 0, 3); - TEST_SEARCH ("d$|(c$|(a$|$))", "x", 0, 1); - TEST_SEARCH ("d$|(c$|(a$|$))", "xd", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|$))", "xc", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|$))", "xa", 0, 2); - TEST_SEARCH ("d$|(c$|($|b$))", "x", 0, 1); - TEST_SEARCH ("d$|(c$|($|b$))", "xd", 0, 2); - TEST_SEARCH ("d$|(c$|($|b$))", "xc", 0, 2); - TEST_SEARCH ("d$|(c$|($|b$))", "xb", 0, 2); - TEST_SEARCH ("d$|($|(a$|b$))", "x", 0, 1); - TEST_SEARCH ("d$|($|(a$|b$))", "xd", 0, 2); - TEST_SEARCH ("d$|($|(a$|b$))", "xa", 0, 2); - TEST_SEARCH ("d$|($|(a$|b$))", "xb", 0, 2); - TEST_SEARCH ("$|(c$|(a$|b$))", "x", 0, 1); - TEST_SEARCH ("$|(c$|(a$|b$))", "xc", 0, 2); - TEST_SEARCH ("$|(c$|(a$|b$))", "xa", 0, 2); - TEST_SEARCH ("$|(c$|(a$|b$))", "xb", 0, 2); - TEST_SEARCH ("d$|c(a$|$)", "xd", 0, 2); - TEST_SEARCH ("d$|c(a$|$)", "xc", 0, 2); - TEST_SEARCH ("d$|c(a$|$)", "xca", 0, 3); - TEST_SEARCH ("d$|c($|b$)", "xd", 0, 2); - TEST_SEARCH ("d$|c($|b$)", "xc", 0, 2); - TEST_SEARCH ("d$|c($|b$)", "xcb", 0, 3); - TEST_SEARCH ("$|c(a$|b$)", "x", 0, 1); - TEST_SEARCH ("$|c(a$|b$)", "xca", 0, 3); - TEST_SEARCH ("$|c(a$|b$)", "xcb", 0, 3); - TEST_SEARCH ("e(d$|c((a$|$)))", "xed", 0, 3); - TEST_SEARCH ("e(d$|c((a$|$)))", "xec", 0, 3); - TEST_SEARCH ("e(d$|c((a$|$)))", "xeca", 0, 3); - TEST_SEARCH ("e(d$|c(($|b$)))", "xed", 0, 3); - TEST_SEARCH ("e(d$|c(($|b$)))", "xec", 0, 3); - TEST_SEARCH ("e(d$|c(($|b$)))", "xecb", 0, 4); - TEST_SEARCH ("e($|c((a$|b$)))", "xe", 0, 2); - TEST_SEARCH ("e($|c((a$|b$)))", "xeca", 0, 4); - TEST_SEARCH ("e($|c((a$|b$)))", "xecb", 0, 4); - TEST_SEARCH ("ed$|(c((a$|$)))", "xed", 0, 3); - TEST_SEARCH ("ed$|(c((a$|$)))", "xc", 0, 2); - TEST_SEARCH ("ed$|(c((a$|$)))", "xca", 0, 3); - TEST_SEARCH ("ed$|(c(($|b$)))", "xed", 0, 3); - TEST_SEARCH ("ed$|(c(($|b$)))", "xc", 0, 2); - TEST_SEARCH ("ed$|(c(($|b$)))", "xcb", 0, 3); - TEST_SEARCH ("$|(c((a$|b$)))", "x", 0, 1); - TEST_SEARCH ("$|(c((a$|b$)))", "xca", 0, 3); - TEST_SEARCH ("$|(c((a$|b$)))", "xcb", 0, 3); - TEST_SEARCH ("d$|($|(a|b)$)", "x", 0, 1); - TEST_SEARCH ("d$|($|(a|b)$)", "xa", 0, 2); - TEST_SEARCH ("d$|($|(a|b)$)", "xb", 0, 2); - TEST_SEARCH ("$|(c$|(a|b)$)", "x", 0, 1); - TEST_SEARCH ("$|(c$|(a|b)$)", "xc", 0, 2); - TEST_SEARCH ("$|(c$|(a|b)$)", "xa", 0, 2); - TEST_SEARCH ("$|(c$|(a|b)$)", "xb", 0, 2); - - re_set_syntax (0); - test_match ("[^\n]", "a"); - test_match ("[^a]", "\n"); - - TEST_SEARCH ("^a", "b\na", 0, 3); - TEST_SEARCH ("b$", "b\na", 0, 3); - - test_case_fold ("[!-`]", "A"); - test_case_fold ("[!-`]", "a"); - - re_set_syntax (RE_CONTEXT_INDEP_OPS | RE_NO_BK_VBAR | RE_NO_BK_PARENS - | RE_NO_BK_BRACES | RE_INTERVALS); - valid_nonposix_pattern ("()^a"); - valid_nonposix_pattern ("()\\1^a"); - - /* Per Cederqvist (cedar@lysator.liu.se) bug. */ - - re_set_syntax (RE_SYNTAX_EMACS); - - /* One `a' before the \n and 638 a's after it. */ - test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "a\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - - /* No a's before the \n and 639 a's after it. */ - test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - - /* One `a' before the \n and 639 a's after it. */ - test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "a\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - - /* No a's before the \n and 640 a's after it. */ - test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS); - TEST_SEARCH ("^(^a)", "ab", 0, 2); - TEST_SEARCH ("(a$)$", "ba", 0, 2); - test_match ("a|$b", "$b"); - - /* Mike's curiosity item. */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS); - test_all_registers ("(foo|foobar)(foo|bar)*\\1(foo|bar)*", - "foobarfoobar", "", - 0, 12, 0, 3, 3, 6, 9, 12, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1); - - /* Another one from Mike. */ - test_match ("(foo|foobarfoo)(bar)*", "foobarfoo"); - - /* And another. */ - test_match("(foo|foobar)(bar|barfoo)?\\1", "foobarfoobar"); - - re_set_syntax (RE_NO_BK_PARENS | RE_INTERVALS | RE_NO_BK_VBAR - | RE_NO_BK_BRACES); /* xx get new ones from ext.*/ - test_match ("((a{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)*", "bb"); - test_all_registers ("((a{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)*", "", "bb", - 0, 2, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1); - - test_match ("((a+?*{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)", "b"); - test_all_registers ("((a+?*{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)", "", "b", - 0, 1, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1); - - /* Valid anchoring. */ - /* See generic_test.c and extended_test.c for more search - tests. xx Not sure all these tests are represented in the - search tests. */ - - re_set_syntax (RE_NO_BK_PARENS | RE_NO_BK_VBAR); - valid_nonposix_pattern - ("(((((((((((((((((((((((((((((((((^a)))))))))))))))))))))))))))))))))"); - valid_nonposix_pattern - ("(((((((((((((((((((((((((((((((((a$)))))))))))))))))))))))))))))))))"); - valid_nonposix_pattern ("\\b\\B\\<\\>\\`\\'^a"); - valid_nonposix_pattern ("a$\\b\\B\\<\\>\\`\\'"); - valid_nonposix_pattern ("(^a)"); - valid_nonposix_pattern ("(a$)"); - valid_nonposix_pattern ("(^a)b"); - valid_nonposix_pattern ("b(a$)"); - valid_nonposix_pattern ("(^a|^b)c"); - valid_nonposix_pattern ("c(a$|b$)"); - valid_nonposix_pattern ("(^a|^b)|^c"); - valid_nonposix_pattern ("(a$|b$)|c$"); - valid_nonposix_pattern ("^c|(^a|^b)"); - valid_nonposix_pattern ("c$|(a$|b$)"); - valid_nonposix_pattern ("(^a|^b)c|^d"); - valid_nonposix_pattern ("c(a$|b$)|d$"); - valid_nonposix_pattern ("(((^a|^b))c|^d)e"); - valid_nonposix_pattern ("(c((a|b))|d)e$"); - valid_nonposix_pattern ("^d(c|e((a|b)))"); - valid_nonposix_pattern ("d(c$|e((a$|b$)))"); - valid_nonposix_pattern ("(((^a|^b))c)|^de"); - valid_nonposix_pattern ("(((a|b))c$)|de$"); - - valid_nonposix_pattern ("((a$)$)$"); - valid_nonposix_pattern ("^(^(^a))"); - - valid_nonposix_pattern ("^de|^(c((a|b)))"); - valid_nonposix_pattern ("^de|(^c((a|b)))"); - valid_nonposix_pattern ("de$|(c((a|b)$))"); - valid_nonposix_pattern ("de$|(c((a|b))$)"); - valid_nonposix_pattern ("de$|(c((a|b)))$"); - - valid_nonposix_pattern ("^a(b|c)|^d"); - valid_nonposix_pattern ("a(b$|c$)|d$"); - valid_nonposix_pattern ("^d|^a(b|c)"); - valid_nonposix_pattern ("d$|a(b$|c$)"); - valid_nonposix_pattern ("^d|^(b|c)a"); - valid_nonposix_pattern ("d$|(b|c)a$"); - valid_nonposix_pattern ("^(a)(b|c)|^d"); - valid_nonposix_pattern ("(a)(b|c)$|d$"); - valid_nonposix_pattern ("(^a)(b|c)|^d"); - valid_nonposix_pattern ("(a)(b$|c$)|d$"); - valid_nonposix_pattern ("^d|^(b|c)(a)"); - valid_nonposix_pattern ("d$|(b|c)(a)$"); - valid_nonposix_pattern ("^d|(^b|^c)(a)"); - valid_nonposix_pattern ("d$|(b|c)(a$)"); - valid_nonposix_pattern ("^d|^(a)(b|c)"); - valid_nonposix_pattern ("^d|(^a)(b|c)"); - valid_nonposix_pattern ("d$|(a)(b$|c$)"); - valid_nonposix_pattern ("((^a|^b)|^c)|^d"); - valid_nonposix_pattern ("d$|(c$|(a$|b$))"); - - - /* Tests shouldn't match. */ - test_should_match = false; - - /* Test that RE_CONTEXT_INVALID_OPS has precedence over - RE_CONTEXT_INDEP_OPS. */ - - re_set_syntax (RE_CONTEXT_INDEP_OPS | RE_CONTEXT_INVALID_OPS - | RE_NO_BK_VBAR | RE_NO_BK_PARENS - | RE_NO_BK_BRACES | RE_INTERVALS); - INVALID_PATTERN ("*"); - INVALID_PATTERN ("^*"); - INVALID_PATTERN ("a|*"); - INVALID_PATTERN ("(*)"); - - INVALID_PATTERN ("^+"); - INVALID_PATTERN ("+"); - INVALID_PATTERN ("a|+"); - INVALID_PATTERN ("(+)"); - - INVALID_PATTERN ("^?"); - INVALID_PATTERN ("?"); - INVALID_PATTERN ("a|?"); - INVALID_PATTERN ("(?)"); - - INVALID_PATTERN ("^{1}"); - INVALID_PATTERN ("{1}"); - INVALID_PATTERN ("a|{1}"); - INVALID_PATTERN ("({1})"); - -#if 0 - /* No longer have this syntax option -- POSIX says empty alternatives - are undefined as of draft 11.2. */ - - /* You can't have empty alternatives if RE_NO_EMPTY_ALTS is set. */ - - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS | RE_NO_EMPTY_ALTS); - - INVALID_PATTERN ("|"); - INVALID_PATTERN ("^|a"); - INVALID_PATTERN ("a|"); - INVALID_PATTERN ("a||"); - INVALID_PATTERN ("a||b"); - INVALID_PATTERN ("(|a)"); - INVALID_PATTERN ("(a|)"); - INVALID_PATTERN ("(a|)"); - - - /* Test above with `\(' and `\)'. */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_EMPTY_ALTS); - INVALID_PATTERN ("\\(|a\\)"); - INVALID_PATTERN ("\\(a|\\)"); - - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS | RE_NO_EMPTY_ALTS); - INVALID_PATTERN ("(|)()$|d$"); -#endif - - /* Test grouping. */ - test_match ("()", "a"); - - /* Test backslashed intervals that are CONTEXTly invalid if have - nothing on which to operate. */ - - re_set_syntax (RE_INTERVALS | RE_CONTEXT_INVALID_OPS); - INVALID_PATTERN ("\\{1\\}"); - - re_set_syntax (0); - test_match ("z-a", "a"); - - re_set_syntax (RE_BK_PLUS_QM); - INVALID_PATTERN ("a*\\"); - - re_set_syntax (0); - INVALID_PATTERN ("a*\\"); - - re_set_syntax (RE_BACKSLASH_ESCAPE_IN_LISTS); - INVALID_PATTERN ("[\\"); - -#if 0 - /* Empty groups are always ok now. (13 Sep 92) */ - re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS | RE_NO_EMPTY_GROUPS); - INVALID_PATTERN ("(|)()$|d$"); -#endif - - printf ("\nFinished non-POSIX tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/libregex/test/printchar.c b/gnu/libregex/test/printchar.c deleted file mode 100644 index 1b756f441be3..000000000000 --- a/gnu/libregex/test/printchar.c +++ /dev/null @@ -1,14 +0,0 @@ -void -printchar (c) - char c; -{ - if (c < 040 || c >= 0177) - { - putchar ('\\'); - putchar (((c >> 6) & 3) + '0'); - putchar (((c >> 3) & 7) + '0'); - putchar ((c & 7) + '0'); - } - else - putchar (c); -} diff --git a/gnu/libregex/test/psx-basic.c b/gnu/libregex/test/psx-basic.c deleted file mode 100644 index 52535b6b3076..000000000000 --- a/gnu/libregex/test/psx-basic.c +++ /dev/null @@ -1,253 +0,0 @@ -/* psx-basic.c: Test POSIX basic regular expressions. */ - -#include "test.h" - - -void -test_posix_basic () -{ - /* Intervals can only match up to RE_DUP_MAX occurences of anything. */ - char dup_max_plus_one[6]; - sprintf (dup_max_plus_one, "%d", RE_DUP_MAX + 1); - - printf ("\nStarting POSIX basic tests.\n"); - t = posix_basic_test; - - re_set_syntax (RE_SYNTAX_POSIX_MINIMAL_BASIC); - - test_posix_generic (); - - printf ("\nContinuing POSIX basic tests.\n"); - -/* Grouping tests that are not the same. */ - - test_should_match = false; - invalid_pattern (REG_EPAREN, PARENS_TO_OPS ("a)")); - - test_should_match = true; - /* Special characters. */ - MATCH_SELF ("*"); - test_match ("\\(*\\)", "*"); - test_match ("\\(^*\\)", "*"); - test_match ("**", "***"); - test_match ("***", "****"); - - MATCH_SELF ("{"); /* of extended... */ - MATCH_SELF ("()"); /* also non-Posix. */ - MATCH_SELF ("a+"); - MATCH_SELF ("a?"); - MATCH_SELF ("a|b"); - MATCH_SELF ("a|"); /* No alternations, */ - MATCH_SELF ("|a"); /* so OK if empty. */ - MATCH_SELF ("a||"); - test_match ("\\(|a\\)", "|a"); - test_match ("\\(a|\\)", "a|"); - test_match ("a\\+", "a+"); - test_match ("a\\?", "a?"); - test_match ("a\\|b", "a|b"); - test_match ("^*", "*"); - test_match ("^+", "+"); - test_match ("^?", "?"); - test_match ("^{", "{"); - /* Valid subexpressions - (empty) in basic only. */ - test_match ("\\(\\)", ""); - - test_match ("a\\(\\)", "a"); - test_match ("\\(\\)b", "b"); - test_match ("a\\(\\)b", "ab"); - TEST_REGISTERS ("a\\(\\)b", "ab", 0, 2, 1, 1, -1, -1); - - test_match ("\\(\\)*", ""); - test_match ("\\(\\(\\)\\)*", ""); - /* Valid back references. */ - - /* N.B.: back references to subexpressions that include a * are - undefined in the spec. The tests are in here to see if we handle - the situation consistently, but if it fails any of them, it doesn't - matter. */ - - test_match ("\\(\\)\\1", ""); - TEST_REGISTERS ("\\(\\)\\1", "", 0, 0, 0, 0, -1, -1); - - test_match ("\\(\\(\\)\\)\\(\\)\\2", ""); - - test_match ("\\(a\\)\\1", "aa"); - TEST_REGISTERS ("\\(a\\)\\1", "aa", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)\\1", "xaax", 1, 3, 1, 2, -1, -1); - - test_match ("\\(\\(a\\)\\)\\1", "aa"); - test_match ("\\(a\\)\\(b\\)\\2\\1", "abba"); - - test_match ("\\(a\\)*\\1", "aa"); - TEST_REGISTERS ("\\(a\\)*\\1", "aa", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)*\\1", "xaax", 0, 0, -1, -1, -1, -1); - - test_match ("\\(\\(a\\)\\2b\\)*", "aab"); - TEST_REGISTERS ("\\(\\(a\\)\\2b\\)*", "aab", 0, 3, 0, 3, 0, 1); - TEST_REGISTERS ("\\(\\(a\\)\\2b\\)*", "xaabx", 0, 0, -1, -1, -1, -1); - - test_match ("\\(a*\\)*\\1", ""); - test_match ("\\(a*\\)*\\1", "aa"); - TEST_REGISTERS ("\\(a*\\)*\\1", "aa", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a*\\)*\\1", "xaax", 0, 0, 0, 0, -1, -1); - - test_match ("\\(a*\\)*\\1", ""); - test_match ("\\(a*\\)*\\1", "aa"); - test_match ("\\(\\(a*\\)*\\)*\\1", "aa"); - test_match ("\\(ab*\\)*\\1", "abab"); - TEST_REGISTERS ("\\(ab*\\)*\\1", "abab", 0, 4, 0, 2, -1, -1); - TEST_REGISTERS ("\\(ab*\\)*\\1", "xababx", 0, 0, -1, -1, -1, -1); - - test_match ("\\(a*\\)ab\\1", "aaba"); - TEST_REGISTERS ("\\(a*\\)ab\\1", "aaba", 0, 4, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a*\\)ab\\1", "xaabax", 1, 5, 1, 2, -1, -1); - - test_match ("\\(a*\\)*ab\\1", "aaba"); - TEST_REGISTERS ("\\(a*\\)*ab\\1", "aaba", 0, 4, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a*\\)*ab\\1", "xaabax", 1, 5, 1, 2, -1, -1); - - test_match ("\\(\\(a*\\)b\\)*\\2", "abb"); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "abb", 0, 3, 2, 3, 2, 2); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "xabbx", 0, 0, -1, -1, -1, -1); - - /* Different from above. */ - test_match ("\\(\\(a*\\)b*\\)*\\2", "aa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aa", 0, 2, 0, 1, 0, 1); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b*\\)*\\2", "aba"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aba", 0, 3, 0, 2, 0, 1); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xabax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b\\)*\\2", "aababa"); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "aababa", 0, 6, 3, 5, 3, 4); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "xaababax", 0, 0, -1, -1, -1, -1); - - test_match ("\\(\\(a*\\)b*\\)*\\2", "aabaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aabaa", 0, 5, 0, 3, 0, 2); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaabaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b*\\)*\\2", "aabbaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aabbaa", 0, 6, 0, 4, 0, 2); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaabbaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b*\\)*\\2", "abaabaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "abaabaa", 0, 7, 2, 5, 2, 4); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaababaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\(\\(a*\\)b*\\)*a\\2", "aabaaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*a\\)*\\2", "aabaaa", 0, 6, 0, 3, 0, 2); - TEST_REGISTERS ("\\(\\(a*\\)b*a\\)*\\2", "xaabaax", 0, 0, -1, -1, -1, -1); - - test_match ("\\(\\(a*\\)b*\\)*\\2a", "aabaaa"); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2a", "aabaaa", 0, 6, 0, 3, 0, 2); - TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2a", "xaabaaax", 1, 7, 1, 4, 1, 3); - - test_match ("\\(\\(a*\\)b\\)*\\2\\1", "abaabaaaab"); - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2\\1", "abaabaaaab", 0, 10, 2, 5, 2, 4); - /* We are matching the empty string here. */ - TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2\\1", "xabaabaaaabx", 0, 0, -1, -1, -1, -1); - - test_match ("\\(a*b\\)\\1", "abab"); - test_match ("\\(a\\)\\1\\1", "aaa"); - test_match ("\\(a\\(c\\)d\\)\\1\\2", "acdacdc"); - - test_match ("\\(a\\)\\1*", "aaa"); - TEST_REGISTERS ("\\(a\\)\\1*", "aaa", 0, 3, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)\\1*", "xaaax", 1, 4, 1, 2, -1, -1); - - test_match ("\\(a\\)\\{1,3\\}b\\1", "aba"); - TEST_REGISTERS ("\\(a\\)\\{1,3\\}b\\1", "aba", 0, 3, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)\\{1,3\\}b\\1", "xabax", 1, 4, 1, 2, -1, -1); - - test_match ("\\(\\(a\\)\\2\\)*", "aaaa"); /* rms? */ - TEST_REGISTERS ("\\(\\(a*b\\)\\2\\)*", "bbabab", 0, 6, 2, 6, 2, 4); /* rms? */ - - test_match ("\\(\\(a\\)\\1\\)*", "a1a1"); - - test_match ("\\(\\(a\\)\\2\\)\\1", "aaaa"); - - test_match ("\\(\\(a*\\)\\2\\)\\1", "aaaa"); - TEST_REGISTERS ("\\(\\(a*\\)\\2\\)\\1", "aaaa", 0, 4, 0, 2, 0, 1); - TEST_REGISTERS ("\\(\\(a*\\)\\2\\)\\1", "xaaaax", 0, 0, 0, 0, 0, 0); - - test_match ("\\{1\\}", "{1}"); - test_match ("^\\{1\\}", "{1}"); - - test_match ("\\(a\\)\\1\\{1,2\\}", "aaa"); - TEST_REGISTERS ("\\(a\\)\\1\\{1,2\\}", "aaa", 0, 3, 0, 1, -1, -1); - TEST_REGISTERS ("\\(a\\)\\1\\{1,2\\}", "xaaax", 1, 4, 1, 2, -1, -1); - - - /* Per POSIX D11.1 p. 109, leftmost longest match. */ - - test_match (PARENS_TO_OPS ("(.*).*\\1"), "abcabc"); - - - /* Per POSIX D11.1, p. 125, leftmost longest match. */ - - test_match (PARENS_TO_OPS ("(ac*)c*d[ac]*\\1"), "acdacaaa"); - TEST_REGISTERS (PARENS_TO_OPS ("(ac*)c*d[ac]*\\1"), "acdacaaa", - 0, 8, 0, 1, -1, -1); - - /* Anchors become ordinary, sometimes. */ - MATCH_SELF ("a^"); - MATCH_SELF ("$a"); - MATCH_SELF ("$^"); - test_fastmap ("$a^", "$", 0, 0); - test_match ("$^*", "$^^"); - test_match ("\\($^\\)", "$^"); - test_match ("$*", "$$"); - /* xx -- known bug, solution pending test_match ("^^$", "^"); */ - test_match ("$\\{0,\\}", "$$"); - TEST_SEARCH ("^$*", "$$", 0, 2); - TEST_SEARCH ("^$\\{0,\\}", "$$", 0, 2); - MATCH_SELF ("2^10"); - MATCH_SELF ("$HOME"); - MATCH_SELF ("$1.35"); - - - /* Basic regular expressions, continued; these don't match their strings. */ - test_should_match = false; - - invalid_pattern (REG_EESCAPE, "\\(a\\"); - /* Invalid back references. */ - test_match ("\\(a\\)\\1", "ab"); - test_match ("\\(a\\)\\1\\1", "aab"); - test_match ("\\(a\\)\\(b\\)\\2\\1", "abab"); - test_match ("\\(a\\(c\\)d\\)\\1\\2", "acdc"); - test_match ("\\(a*b\\)\\1", "abaab"); - test_match ("\\(a\\)\\1*", "aaaaaaaaaab"); - test_match ("\\(\\(a\\)\\1\\)*", "aaa"); - invalid_pattern (REG_ESUBREG, "\\1"); - invalid_pattern (REG_ESUBREG, "\\(a\\)\\2"); - test_match ("\\(\\(a\\)\\2\\)*", "abaa"); - test_match ("\\(\\(a\\)\\1\\)*", "a"); - test_match ("\\(\\(a\\)\\2\\)\\1", "abaa"); - test_match ("\\(\\(a*\\)\\2\\)\\1", "abaa"); - /* Invalid intervals. */ - invalid_pattern (REG_EBRACE, "a\\{"); - - invalid_pattern (REG_BADBR, "a\\{-1"); - invalid_pattern (REG_BADBR, concat ("a\\{", (char *)dup_max_plus_one)); - invalid_pattern (REG_BADBR, concat (concat ("a\\{", (char *)dup_max_plus_one), ",")); - invalid_pattern (REG_BADBR, "a\\{1,0"); - - invalid_pattern (REG_EBRACE, "a\\{1"); - invalid_pattern (REG_EBRACE, "a\\{0,"); - invalid_pattern (REG_EBRACE, "a\\{0,1"); - invalid_pattern (REG_EBRACE, "a\\{0,1}"); - - printf ("\nFinished POSIX basic tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/libregex/test/psx-extend.c b/gnu/libregex/test/psx-extend.c deleted file mode 100644 index 6f02d67b8219..000000000000 --- a/gnu/libregex/test/psx-extend.c +++ /dev/null @@ -1,1244 +0,0 @@ -/* psx-extend.c: Test POSIX extended regular expressions. */ - -#include "test.h" - - -void -test_posix_extended () -{ - /* Intervals can only match up to RE_DUP_MAX occurences of anything. */ - char dup_max_plus_one[6]; - sprintf (dup_max_plus_one, "%d", RE_DUP_MAX + 1); - - - printf ("\nStarting POSIX extended tests.\n"); - t = posix_extended_test; - - re_set_syntax (RE_SYNTAX_POSIX_MINIMAL_EXTENDED); - - test_posix_generic (); - - printf ("\nContinuing POSIX extended tests.\n"); - - /* Grouping tests that differ from basic's. */ - - test_should_match = true; - MATCH_SELF ("a)"); - - /* Valid use of special characters. */ - test_match ("\\(a", "(a"); - test_match ("a\\+", "a+"); - test_match ("a\\?", "a?"); - test_match ("\\{a", "{a"); - test_match ("\\|a", "|a"); - test_match ("a\\|b", "a|b"); - test_match ("a\\|?", "a"); - test_match ("a\\|?", "a|"); - test_match ("a\\|*", "a"); - test_match ("a\\|*", "a||"); - test_match ("\\(*\\)", ")"); - test_match ("\\(*\\)", "(()"); - test_match ("a\\|+", "a|"); - test_match ("a\\|+", "a||"); - test_match ("\\(+\\)", "()"); - test_match ("\\(+\\)", "(()"); - test_match ("a\\||b", "a|"); - test_match ("\\(?\\)", ")"); - test_match ("\\(?\\)", "()"); - - test_match ("a+", "a"); - test_match ("a+", "aa"); - test_match ("a?", ""); - test_match ("a?", "a"); - - /* Bracket expressions. */ - test_match ("[(]", "("); - test_match ("[+]", "+"); - test_match ("[?]", "?"); - test_match ("[{]", "{"); - test_match ("[|]", "|"); - /* Subexpressions. */ - test_match ("(a+)*", ""); - test_match ("(a+)*", "aa"); - test_match ("(a?)*", ""); - test_match ("(a?)*", "aa"); - /* (No) back references. */ - test_match ("(a)\\1", "a1"); - /* Invalid as intervals, - but are valid patterns. */ - MATCH_SELF ("{"); - test_match ("^{", "{"); - test_match ("a|{", "{"); - test_match ("({)", "{"); - MATCH_SELF ("a{"); - MATCH_SELF ("a{}"); - MATCH_SELF ("a{-1"); - MATCH_SELF ("a{-1}"); - MATCH_SELF ("a{0"); - MATCH_SELF ("a{0,"); - MATCH_SELF (concat ("a{", dup_max_plus_one)); - MATCH_SELF (concat (concat ("a{", dup_max_plus_one), ",")); - MATCH_SELF ("a{1,0"); - MATCH_SELF ("a{1,0}"); - MATCH_SELF ("a{0,1"); - test_match ("[a{0,1}]", "}"); - test_match ("a{1,3}{-1}", "aaa{-1}"); - test_match (concat ("a{1,3}{", dup_max_plus_one), - concat ("aaa{", dup_max_plus_one)); - test_match ("a{1,3}{2,1}", "aaa{2,1}"); - test_match ("a{1,3}{1,2", "aaa{1,2"); - /* Valid consecutive repetitions. */ - test_match ("a*+", "a"); - test_match ("a*?", "a"); - test_match ("a++", "a"); - test_match ("a+*", "a"); - test_match ("a+?", "a"); - test_match ("a??", "a"); - test_match ("a?*", "a"); - test_match ("a?+", "a"); - - test_match ("a{2}?", ""); - test_match ("a{2}?", "aa"); - test_match ("a{2}+", "aa"); - test_match ("a{2}{2}", "aaaa"); - - test_match ("a{1}?*", ""); - test_match ("a{1}?*", "aa"); - - test_match ("(a?){0,3}b", "aaab"); - test_fastmap ("(a?){0,3}b", "ab", 0, 0); - test_match ("(a+){0,3}b", "b"); - test_fastmap ("(a+){0,3}b", "ab", 0, 0); - test_match ("(a+){0,3}b", "ab"); - test_fastmap ("(a+){0,3}b", "ab", 0, 0); - test_match ("(a+){1,3}b", "aaab"); - test_match ("(a?){1,3}b", "aaab"); - - test_match ("\\\\{1}", "\\"); /* Extended only. */ - - test_match ("(a?)?", "a"); - test_match ("(a?b)?c", "abc"); - test_match ("(a+)*b", "b"); - /* Alternatives. */ - test_match ("a|b", "a"); - test_match ("a|b", "b"); - test_fastmap ("a|b", "ab", 0, 0); - - TEST_SEARCH ("a|b", "cb", 0, 2); - TEST_SEARCH ("a|b", "cb", 0, 2); - - test_match ("(a|b|c)", "a"); - test_match ("(a|b|c)", "b"); - test_match ("(a|b|c)", "c"); - - test_match ("(a|b|c)*", "abccba"); - - test_match ("(a(b*))|c", "a"); /* xx do registers. */ - test_match ("(a(b*))|c", "ab"); - test_match ("(a(b*))|c", "c"); - - test_fastmap ("(a+?*|b)", "ab", 0, 0); - test_match ("(a+?*|b)", "b"); - TEST_REGISTERS ("(a+?*|b)", "b", 0, 1, 0, 1, -1, -1); - - test_fastmap ("(a+?*|b)*", "ab", 0, 0); - test_match ("(a+?*|b)*", "bb"); - TEST_REGISTERS ("(a+?*|b)*", "bb", 0, 2, 1, 2, -1, -1); - - test_fastmap ("(a*|b)*", "ab", 0, 0); - test_match ("(a*|b)*", "bb"); - TEST_REGISTERS ("(a*|b)*", "bb", 0, 2, 1, 2, -1, -1); - - test_fastmap ("((a*)|b)*", "ab", 0, 0); - test_match ("((a*)|b)*", "bb"); - TEST_REGISTERS ("((a*)|b)*", "bb", 0, 2, 1, 2, 1, 1); - - test_fastmap ("(a{0,}|b)*", "ab", 0, 0); - test_match ("(a{0,}|b)*", "bb"); - TEST_REGISTERS ("(a{0,}|b)*", "bb", 0, 2, 1, 2, -1, -1); - - test_fastmap ("((a{0,})|b)*", "ab", 0, 0); - test_match ("((a{0,})|b)*", "bb"); - TEST_REGISTERS ("((a{0,})|b)*", "bb", 0, 2, 1, 2, 1, 1); - - /* With c's */ - test_fastmap ("(a+?*|b)c", "abc", 0, 0); - test_match ("(a+?*|b)c", "bc"); - TEST_REGISTERS ("(a+?*|b)c", "bc", 0, 2, 0, 1, -1, -1); - - test_fastmap ("(a+?*|b)*c", "abc", 0, 0); - test_match ("(a+?*|b)*c", "bbc"); - TEST_REGISTERS ("(a+?*|b)*c", "bbc", 0, 3, 1, 2, -1, -1); - - test_fastmap ("(a*|b)*c", "abc", 0, 0); - test_match ("(a*|b)*c", "bbc"); - TEST_REGISTERS ("(a*|b)*c", "bbc", 0, 3, 1, 2, -1, -1); - - test_fastmap ("((a*)|b)*c", "abc", 0, 0); - test_match ("((a*)|b)*c", "bbc"); - TEST_REGISTERS ("((a*)|b)*c", "bbc", 0, 3, 1, 2, 1, 1); - - test_fastmap ("(a{0,}|b)*c", "abc", 0, 0); - test_match ("(a{0,}|b)*c", "bbc"); - TEST_REGISTERS ("(a{0,}|b)*c", "bbc", 0, 3, 1, 2, -1, -1); - - test_fastmap ("((a{0,})|b)*c", "abc", 0, 0); - test_match ("((a{0,})|b)*c", "bbc"); - TEST_REGISTERS ("((a{0,})|b)*c", "bbc", 0, 3, 1, 2, 1, 1); - - - test_fastmap ("((a{0,}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a{0,}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a{0,}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a{0,}\\b\\<)|b)*", "ab", 0, 0); - test_match ("((a{0,}\\b\\<)|b)*", "b"); - TEST_REGISTERS ("((a{0,}\\b\\<)|b)*", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,1}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,1}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,1}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,2}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,2}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,2}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - - test_fastmap ("((a+?*{0,4095}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,4095}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,4095}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,5119}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,5119}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,5119}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,6143}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,6143}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,6143}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,8191}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,8191}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,8191}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,16383}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,16383}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,16383}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - - test_fastmap ("((a+?*{0,}\\b\\<)|b)", "ab", 0, 0); - test_match ("((a+?*{0,}\\b\\<)|b)", "b"); - TEST_REGISTERS ("((a+?*{0,}\\b\\<)|b)", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,}\\b\\<)|b)*", "ab", 0, 0); - test_match ("((a+?*{0,}\\b\\<)|b)*", "b"); - TEST_REGISTERS ("((a+?*{0,}\\b\\<)|b)*", "b", - 0, 1, 0, 1, 0, 0); - - test_fastmap ("((a+?*{0,}\\b\\<)|b)*", "ab", 0, 0); - test_match ("((a+?*{0,}\\b\\<)|b)*", "bb"); - TEST_REGISTERS ("((a+?*{0,}\\b\\<)|b)*", "bb", - 0, 2, 1, 2, 0, 0); - - - /* `*' after group. */ - test_match ("(a*|b*)*c", "c"); - TEST_REGISTERS ("(a*|b*)*c", "c", 0, 1, 0, 0, -1, -1); - - test_match ("(a*|b*)*c", "ac"); - TEST_REGISTERS ("(a*|b*)*c", "ac", 0, 2, 0, 1, -1, -1); - - test_match ("(a*|b*)*c", "aac"); - TEST_REGISTERS ("(a*|b*)*c", "aac", 0, 3, 0, 2, -1, -1); - - test_match ("(a*|b*)*c", "bbc"); - TEST_REGISTERS ("(a*|b*)*c", "bbc", 0, 3, 0, 2, -1, -1); - - test_match ("(a*|b*)*c", "abc"); - TEST_REGISTERS ("(a*|b*)*c", "abc", 0, 3, 1, 2, -1, -1); - - /* No `*' after group. */ - test_match ("(a*|b*)c", "c"); - TEST_REGISTERS ("(a*|b*)c", "c", 0, 1, 0, 0, -1, -1); - - test_match ("(a*|b*)c", "ac"); - TEST_REGISTERS ("(a*|b*)c", "ac", 0, 2, 0, 1, -1, -1); - - test_match ("(a*|b*)c", "bc"); - TEST_REGISTERS ("(a*|b*)c", "bc", 0, 2, 0, 1, -1, -1); - - test_match ("(a*|b*)c", "aac"); - TEST_REGISTERS ("(a*|b*)c", "aac", 0, 3, 0, 2, -1, -1); - - /* Same as above, but with no `*'s in alternatives. - - test_match ("(a|b)*c", "c"); /* `*' after group. */ - TEST_REGISTERS ("(a|b)*c", "c", 0, 1, -1, -1, -1, -1); - - test_match ("(a|b)*c", "ac"); - TEST_REGISTERS ("(a|b)*c", "ac", 0, 2, 0, 1, -1, -1); - - test_match ("(a|b)*c", "bc"); - TEST_REGISTERS ("(a|b)*c", "bc", 0, 2, 0, 1, -1, -1); - - test_match ("(a|b)*c", "abc"); - TEST_REGISTERS ("(a|b)*c", "abc", 0, 3, 1, 2, -1, -1); - - - test_match ("(a*|b*)c", "bbc"); - TEST_REGISTERS ("(a*|b*)c", "bbc", 0, 3, 0, 2, -1, -1); - - /* Complicated second alternative. */ - - test_match ("(a*|(b*)*)*c", "bc"); - TEST_REGISTERS ("(a*|(b*)*)*c", "bc", 0, 2, 0, 1, 0, 1); - - test_match ("(a*|(b*|c*)*)*d", "bd"); - TEST_REGISTERS ("(a*|(b*|c*)*)*d", "bd", 0, 2, 0, 1, 0, 1); - - test_match ("(a*|(b*|c*)*)*d", "bbd"); - TEST_REGISTERS ("(a*|(b*|c*)*)*d", "bbd", 0, 3, 0, 2, 0, 2); - - test_match ("(a*|(b*|c*)*)*d", "cd"); - TEST_REGISTERS ("(a*|(b*|c*)*)*d", "cd", 0, 2, 0, 1, 0, 1); - - test_match ("(a*|(b*|c*)*)*d", "ccd"); - TEST_REGISTERS ("(a*|(b*|c*)*)*d", "ccd", 0, 3, 0, 2, 0, 2); - - test_match ("(a*|b*|c*)*d", "aad"); - TEST_REGISTERS ("(a*|b*|c*)*d", "aad", 0, 3, 0, 2, 0, 2); - - test_match ("(a*|b*|c*)*d", "bbd"); - TEST_REGISTERS ("(a*|b*|c*)*d", "bbd", 0, 3, 0, 2, 0, 2); - - test_match ("(a*|b*|c*)*d", "ccd"); - TEST_REGISTERS ("(a*|b*|c*)*d", "ccd", 0, 3, 0, 2, 0, 2); - - /* Valid anchoring. */ - valid_pattern ("a^"); - valid_pattern ("a^b"); - valid_pattern ("$a"); - valid_pattern ("a$b"); - valid_pattern ("foo^bar"); - valid_pattern ("foo$bar"); - valid_pattern ("(^)"); - valid_pattern ("($)"); - valid_pattern ("(^$)"); - - /* These are the same (but valid) as those (invalid) in other_test.c. */ - valid_pattern - ("(((((((((((((((((((((((((((((((((a^)))))))))))))))))))))))))))))))))"); - valid_pattern - ("((((((((((((((((((((((((((((((((($a)))))))))))))))))))))))))))))))))"); - valid_pattern ("\\(^a\\)"); - valid_pattern ("a\\|^b"); - valid_pattern ("\\w^a"); - valid_pattern ("\\W^a"); - valid_pattern ("(a^)"); - valid_pattern ("($a)"); - valid_pattern ("a(^b)"); - valid_pattern ("a$(b)"); - valid_pattern ("(a)^b"); - valid_pattern ("(a)$b"); - valid_pattern ("(a)(^b)"); - valid_pattern ("(a$)(b)"); - valid_pattern ("(a|b)^c"); - valid_pattern ("(a|b)$c"); - valid_pattern ("(a$|b)c"); - valid_pattern ("(a|b$)c"); - valid_pattern ("a(b|^c)"); - valid_pattern ("a(^b|c)"); - valid_pattern ("a$(b|c)"); - valid_pattern ("(a)(^b|c)"); - valid_pattern ("(a)(b|^c)"); - valid_pattern ("(b$|c)(a)"); - valid_pattern ("(b|c$)(a)"); - valid_pattern ("(a(^b|c))"); - valid_pattern ("(a(b|^c))"); - valid_pattern ("((b$|c)a)"); - valid_pattern ("((b|c$)a)"); - valid_pattern ("((^a|^b)^c)"); - valid_pattern ("(c$(a$|b$))"); - valid_pattern ("((^a|^b)^c)"); - valid_pattern ("((a$|b$)c)"); - valid_pattern ("(c$(a$|b$))"); - valid_pattern ("((^a|^b)|^c)^d"); - valid_pattern ("((a$|b$)|c$)d$"); - valid_pattern ("d$(c$|(a$|b$))"); - valid_pattern ("((^a|^b)|^c)(^d)"); - valid_pattern ("((a$|b$)|c$)(d$)"); - valid_pattern ("(d$)((a$|b$)|c$)"); - valid_pattern ("((^a|^b)|^c)((^d))"); - valid_pattern ("((a$|b$)|c$)((d$))"); - valid_pattern ("((d$))((a$|b$)|c$)"); - valid_pattern ("(((^a|^b))c|^d)^e"); - valid_pattern ("(((a$|b$))c|d$)$e$"); - valid_pattern ("e$(d$|c((a$|b$)))"); - valid_pattern ("(^a)((^b))"); - valid_pattern ("(a$)((b$))"); - valid_pattern ("((^a))(^b)"); - valid_pattern ("((a$))(b$)"); - valid_pattern ("((^a))((^b))"); - valid_pattern ("((a$))((b$))"); - valid_pattern ("((^a)^b)"); - valid_pattern ("((a$)b$)"); - valid_pattern ("(b$(a$))"); - valid_pattern ("(((^a)b)^c)"); - valid_pattern ("(((a$)b)c$)"); - valid_pattern ("(c$(b(a$)))"); - valid_pattern ("(((^a)b)c)^d"); - valid_pattern ("(((a$)b)c)d$"); - valid_pattern ("d$(c(b(a$)))"); - valid_pattern (".^a"); - valid_pattern ("a$."); - valid_pattern ("[a]^b"); - valid_pattern ("b$[a]"); - valid_pattern ("\\(a$\\)"); - valid_pattern ("a$\\|b"); - valid_pattern ("(^a|^b)^c"); - valid_pattern ("c$(a$|b$)"); - valid_pattern ("(^a|^b)^|^c"); - valid_pattern ("(a$|b$)$|$c$"); - valid_pattern ("(a$|$b$)$|c$"); - valid_pattern ("($a$|b$)$|c$"); - valid_pattern ("$(a$|b$)$|c$"); - valid_pattern ("^c|d(^a|^b)"); - valid_pattern ("(^a|^b)|d^c"); - valid_pattern ("c$|(a$|b$)d"); - valid_pattern ("c$d|(a$|b$)"); - valid_pattern ("c(^a|^b)|^d"); - valid_pattern ("(a$|b$)c|d$"); - valid_pattern ("c(((^a|^b))|^d)e"); - valid_pattern ("(c((^a|^b))|^d)e"); - valid_pattern ("((c(^a|^b))|^d)e"); - valid_pattern ("(((^a|^b))|c^d)e"); - valid_pattern ("(((^a|^b))|^d)^e"); - valid_pattern ("(c$((a|b))|d)e$"); - valid_pattern ("(c((a$|b$))|d)e$"); - valid_pattern ("(c((a|b)$)|d)e$"); - valid_pattern ("(c((a|b))|d$)e$"); - valid_pattern ("^d(^c|e((a|b)))"); - valid_pattern ("^d(c|^e((a|b)))"); - valid_pattern ("^d(c|e(^(a|b)))"); - valid_pattern ("^d(c|e((^a|b)))"); - valid_pattern ("^d(c|e((a|^b)))"); - valid_pattern ("^d(c|e((a|b^)))"); - valid_pattern ("^d(c|e((a|b)^))"); - valid_pattern ("^d(c|e((a|b))^)"); - valid_pattern ("^d(c|e((a|b)))^"); - valid_pattern ("d$(c$|e((a$|b$)))"); - valid_pattern ("d(c$|e$((a$|b$)))"); - valid_pattern ("(((^a|^b))^c)|^de"); - valid_pattern ("(((^a|^b))c)|^d^e"); - valid_pattern ("(((a$|b))c$)|de$"); - valid_pattern ("(((a|b$))c$)|de$"); - valid_pattern ("(((a|b))c$)|d$e$"); - valid_pattern ("^d^e|^(c((a|b)))"); - valid_pattern ("^de|^(c^((a|b)))"); - valid_pattern ("^de|^(c(^(a|b)))"); - valid_pattern ("^de|^(c((^a|b)))"); - valid_pattern ("^de|^(c((a|^b)))"); - valid_pattern ("^de|(^c(^(a|b)))"); - valid_pattern ("^de|(^c((^a|b)))"); - valid_pattern ("^de|(^c((a|^b)))"); - valid_pattern ("de$|(c($(a|b)$))"); - valid_pattern ("de$|(c$((a|b)$))"); - valid_pattern ("de$|($c((a|b)$))"); - valid_pattern ("de$|$(c((a|b)$))"); - valid_pattern ("de$|(c($(a|b))$)"); - valid_pattern ("de$|(c$((a|b))$)"); - valid_pattern ("de$|$(c((a|b))$)"); - valid_pattern ("de$|(c($(a|b)))$"); - valid_pattern ("de$|(c$((a|b)))$"); - valid_pattern ("de$|($c((a|b)))$"); - valid_pattern ("de$|$(c((a|b)))$"); - valid_pattern ("^a(^b|c)|^d"); - valid_pattern ("^a(b|^c)|^d"); - valid_pattern ("^a(b|c^)|^d"); - valid_pattern ("^a(b|c)^|^d"); - valid_pattern ("a$(b$|c$)|d$"); - valid_pattern ("^d|^a(^b|c)"); - valid_pattern ("^d|^a(b|^c)"); - valid_pattern ("d$|a$(b$|c$)"); - valid_pattern ("^d|^(b|c)^a"); - valid_pattern ("d$|(b|c$)a$"); - valid_pattern ("d$|(b$|c)a$"); - valid_pattern ("^(a)^(b|c)|^d"); - valid_pattern ("^(a)(^b|c)|^d"); - valid_pattern ("^(a)(b|^c)|^d"); - valid_pattern ("(a)$(b|c)$|d$"); - valid_pattern ("(a$)(b|c)$|d$"); - valid_pattern ("(^a)(^b|c)|^d"); - valid_pattern ("(^a)(b|^c)|^d"); - valid_pattern ("(a)$(b$|c$)|d$"); - valid_pattern ("(a$)(b$|c$)|d$"); - valid_pattern ("^d|^(b|c)^(a)"); - valid_pattern ("^d|^(b|c)(^a)"); - valid_pattern ("d$|(b|c$)(a)$"); - valid_pattern ("d$|(b$|c)(a)$"); - valid_pattern ("^d|(^b|^c)^(a)"); - valid_pattern ("^d|(^b|^c)(^a)"); - valid_pattern ("d$|(b|c)$(a$)"); - valid_pattern ("d$|(b|c$)(a$)"); - valid_pattern ("d$|(b$|c)(a$)"); - valid_pattern ("^d|^(a)^(b|c)"); - valid_pattern ("^d|^(a)(^b|c)"); - valid_pattern ("^d|^(a)(b|^c)"); - valid_pattern ("^d|(^a)^(b|c)"); - valid_pattern ("^d|(^a)(^b|c)"); - valid_pattern ("^d|(^a)(b|^c)"); - valid_pattern ("d$|(a)$(b$|c$)"); - valid_pattern ("d$|(a$)(b$|c$)"); - valid_pattern ("((e^a|^b)|^c)|^d"); - valid_pattern ("((^a|e^b)|^c)|^d"); - valid_pattern ("((^a|^b)|e^c)|^d"); - valid_pattern ("((^a|^b)|^c)|e^d"); - valid_pattern ("d$e|(c$|(a$|b$))"); - valid_pattern ("d$|(c$e|(a$|b$))"); - valid_pattern ("d$|(c$|(a$e|b$))"); - valid_pattern ("d$|(c$|(a$|b$e))"); - valid_pattern ("d$|(c$|(a$|b$)e)"); - valid_pattern ("d$|(c$|(a$|b$))e"); - valid_pattern ("(a|b)^|c"); - valid_pattern ("(a|b)|c^"); - valid_pattern ("$(a|b)|c"); - valid_pattern ("(a|b)|$c"); - valid_pattern ("(a^|^b)|^c"); - valid_pattern ("(^a|b^)|^c"); - valid_pattern ("(^a|^b)|c^"); - valid_pattern ("($a|b$)|c$"); - valid_pattern ("(a$|$b)|c$"); - valid_pattern ("(a$|b$)|$c"); - valid_pattern ("c^|(^a|^b)"); - valid_pattern ("^c|(a^|^b)"); - valid_pattern ("^c|(^a|b^)"); - valid_pattern ("$c|(a$|b$)"); - valid_pattern ("c$|($a|b$)"); - valid_pattern ("c$|(a$|$b)"); - valid_pattern ("c^|^(a|b)"); - valid_pattern ("^c|(a|b)^"); - valid_pattern ("$c|(a|b)$"); - valid_pattern ("c$|$(a|b)"); - valid_pattern ("(a^|^b)c|^d"); - valid_pattern ("(^a|b^)c|^d"); - valid_pattern ("(^a|^b)c|d^"); - valid_pattern ("(^a|^b)^c|^d"); - valid_pattern ("(a|b)c$|$d"); - valid_pattern ("(a|b)$c$|d$"); - valid_pattern ("(a|b)$c$|d$"); - valid_pattern ("(a|b$)c$|d$"); - valid_pattern ("(a$|b)c$|d$"); - valid_pattern ("($a|b)c$|d$"); - valid_pattern ("$(a|b)c$|d$"); - valid_pattern ("^d|^c^(a|b)"); - valid_pattern ("^d|^c(^a|b)"); - valid_pattern ("^d|^c(a|^b)"); - valid_pattern ("^d|^c(a|b^)"); - valid_pattern ("^d|^c(a|b)^"); - valid_pattern ("$d|c(a$|b$)"); - valid_pattern ("d$|c($a$|b$)"); - valid_pattern ("d$|c$(a$|b$)"); - valid_pattern ("d$|$c(a$|b$)"); - - valid_pattern ("(((a^|^b))c|^d)e"); - valid_pattern ("(((^a|b^))c|^d)e"); - valid_pattern ("(((^a|^b))^c|^d)e"); - valid_pattern ("((^(a|b))c|d^)e"); - valid_pattern ("(^((a|b))c|^d)^e"); - valid_pattern ("(^((a|b)^)c|^d)e"); - valid_pattern ("(^((a^|b))c|^d)e"); - valid_pattern ("(^((a|b^))c|^d)e"); - valid_pattern ("(^((a|b)^)c|^d)e"); - valid_pattern ("(^((a|b))^c|^d)e"); - valid_pattern ("(^((a|b))c^|^d)e"); - valid_pattern ("(^((a|b))c|^d^)e"); - valid_pattern ("(^((a|b))c|^d)^e"); - valid_pattern ("(((a|b))c|d)$e$"); - valid_pattern ("(((a|b))c|d$)e$"); - valid_pattern ("(((a|b))c|$d)e$"); - valid_pattern ("(((a|b))c$|d)e$"); - valid_pattern ("(((a|b))$c|d)e$"); - valid_pattern ("(((a|b)$)c|d)e$"); - valid_pattern ("(((a|b$))c|d)e$"); - valid_pattern ("(((a$|b))c|d)e$"); - valid_pattern ("((($a|b))c|d)e$"); - valid_pattern ("(($(a|b))c|d)e$"); - valid_pattern ("($((a|b))c|d)e$"); - valid_pattern ("$(((a|b))c|d)e$"); - valid_pattern ("(^((a|b)^)c|^d)e"); - valid_pattern ("(^((a|b))^c|^d)e"); - valid_pattern ("(^((a|b))c|^d^)e"); - valid_pattern ("(^((a|b))c|^d)^e"); - - valid_pattern ("^e(^d|c((a|b)))"); - valid_pattern ("^e(d|^c((a|b)))"); - valid_pattern ("^e(d|c^((a|b)))"); - valid_pattern ("^e(d|c(^(a|b)))"); - valid_pattern ("^e(d|c((^a|b)))"); - valid_pattern ("^e(d|c((a|^b)))"); - valid_pattern ("^e(d|c((a|b^)))"); - valid_pattern ("^e(d|c((a|b)^))"); - valid_pattern ("^e(d|c((a|b))^)"); - valid_pattern ("^e(d|c((a|b)))^"); - valid_pattern ("e$(d$|c((a$|b$)))"); - valid_pattern ("e(d$|c$((a$|b$)))"); - valid_pattern ("e(d$|c($(a$|b$)))"); - valid_pattern ("e(d$|c(($a$|b$)))"); - valid_pattern ("e$(d$|c((a|b)$))"); - valid_pattern ("e($d$|c((a|b)$))"); - valid_pattern ("e(d$|$c((a|b)$))"); - valid_pattern ("e(d$|c$((a|b)$))"); - valid_pattern ("e(d$|c($(a|b)$))"); - valid_pattern ("e(d$|c(($a|b)$))"); - valid_pattern ("e(d$|c((a|$b)$))"); - valid_pattern ("e(d$|c((a$|$b$)))"); - - valid_pattern ("e$(d$|c((a|b))$)"); - valid_pattern ("e($d$|c((a|b))$)"); - valid_pattern ("e(d$|$c((a|b))$)"); - valid_pattern ("e(d$|c$((a|b))$)"); - valid_pattern ("e(d$|c($(a|b))$)"); - valid_pattern ("e(d$|c(($a|b))$)"); - valid_pattern ("e(d$|c((a|$b))$)"); - valid_pattern ("e$(d$|c((a|b)))$"); - valid_pattern ("e($d$|c((a|b)))$"); - valid_pattern ("e(d$|$c((a|b)))$"); - valid_pattern ("e(d$|c$((a|b)))$"); - valid_pattern ("e(d$|c($(a|b)))$"); - valid_pattern ("e(d$|c(($a|b)))$"); - valid_pattern ("e(d$|c((a|$b)))$"); - valid_pattern ("(((^a|^b)^)c)|^de"); - valid_pattern ("(((^a|^b))^c)|^de"); - valid_pattern ("(((^a|^b))c)^|^de"); - valid_pattern ("$(((a|b))c$)|de$"); - valid_pattern ("($((a|b))c$)|de$"); - valid_pattern ("(($(a|b))c$)|de$"); - valid_pattern ("((($a|b))c$)|de$"); - valid_pattern ("(((a|$b))c$)|de$"); - valid_pattern ("(((a|b)$)c$)|de$"); - valid_pattern ("(((a|b))$c$)|de$"); - valid_pattern ("$(((a|b))c)$|de$"); - valid_pattern ("($((a|b))c)$|de$"); - valid_pattern ("(($(a|b))c)$|de$"); - valid_pattern ("((($a|b))c)$|de$"); - valid_pattern ("(((a|$b))c)$|de$"); - valid_pattern ("(((a|b)$)c)$|de$"); - valid_pattern ("(((a|b))$c)$|de$"); - valid_pattern ("^ed|^(c((a|b)))^"); - valid_pattern ("^ed|^(c((a|b))^)"); - valid_pattern ("^ed|^(c((a|b)^))"); - valid_pattern ("^ed|^(c((a|b^)))"); - valid_pattern ("^ed|^(c((a^|b)))"); - valid_pattern ("^ed|^(c((^a|b)))"); - valid_pattern ("^ed|^(c(^(a|b)))"); - valid_pattern ("^ed|^(c^((a|b)))"); - valid_pattern ("^ed|(^c((a|b)))^"); - valid_pattern ("^ed|(^c((a|b))^)"); - valid_pattern ("^ed|(^c((a|b)^))"); - valid_pattern ("^ed|(^c((a|b^)))"); - valid_pattern ("^ed|(^c((a|^b)))"); - valid_pattern ("^ed|(^c((a^|b)))"); - valid_pattern ("^ed|(^c((^a|b)))"); - valid_pattern ("^ed|(^c(^(a|b)))"); - valid_pattern ("^ed|(^c(^(a|b)))"); - valid_pattern ("^ed|(^c^((a|b)))"); - valid_pattern ("ed$|$(c((a|b)))$"); - valid_pattern ("ed$|($c((a|b)))$"); - valid_pattern ("ed$|(c$((a|b)))$"); - valid_pattern ("ed$|(c($(a|b)))$"); - valid_pattern ("ed$|(c(($a|b)))$"); - valid_pattern ("ed$|(c((a|$b)))$"); - valid_pattern ("ed$|$(c((a|b))$)"); - valid_pattern ("ed$|($c((a|b))$)"); - valid_pattern ("ed$|(c$((a|b))$)"); - valid_pattern ("ed$|(c($(a|b))$)"); - valid_pattern ("ed$|(c(($a|b))$)"); - valid_pattern ("ed$|(c((a|$b))$)"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a|b)$))"); - valid_pattern ("ed$|($c((a|b)$))"); - valid_pattern ("ed$|(c$((a|b)$))"); - valid_pattern ("ed$|(c($(a|b)$))"); - valid_pattern ("ed$|(c(($a|b)$))"); - valid_pattern ("ed$|(c((a|$b)$))"); - valid_pattern ("ed$|$(c((a$|b$)))"); - valid_pattern ("ed$|($c((a$|b$)))"); - valid_pattern ("ed$|(c$((a$|b$)))"); - valid_pattern ("ed$|(c($(a$|b$)))"); - valid_pattern ("ed$|(c(($a$|b$)))"); - valid_pattern ("ed$|(c((a$|$b$)))"); - valid_pattern ("^a(b|c)^|^d"); - valid_pattern ("^a(b|c^)|^d"); - valid_pattern ("^a(b|^c)|^d"); - valid_pattern ("^a(b^|c)|^d"); - valid_pattern ("^a(^b|c)|^d"); - valid_pattern ("^a^(b|c)|^d"); - valid_pattern ("$a(b$|c$)|d$"); - valid_pattern ("a$(b$|c$)|d$"); - valid_pattern ("a($b$|c$)|d$"); - valid_pattern ("a(b$|$c$)|d$"); - valid_pattern ("a(b$|c$)|$d$"); - valid_pattern ("^(a^)(b|c)|^d"); - valid_pattern ("^(a)^(b|c)|^d"); - valid_pattern ("^(a)(^b|c)|^d"); - valid_pattern ("^(a)(b^|c)|^d"); - valid_pattern ("^(a)(b|^c)|^d"); - valid_pattern ("^(a)(b|c^)|^d"); - valid_pattern ("^(a)(b|c)^|^d"); - valid_pattern ("(^a^)(b|c)|^d"); - valid_pattern ("(^a)^(b|c)|^d"); - valid_pattern ("(^a)(^b|c)|^d"); - valid_pattern ("(^a)(b^|c)|^d"); - valid_pattern ("(^a)(b|^c)|^d"); - valid_pattern ("(^a)(b|c^)|^d"); - valid_pattern ("(^a)(b|c)^|^d"); - - valid_pattern ("(a)(b$|c$)d$"); - valid_pattern ("(a)(b|$c)$|d$"); - valid_pattern ("(a)($b|c)$|d$"); - valid_pattern ("(a)$(b|c)$|d$"); - valid_pattern ("(a$)(b|c)$|d$"); - valid_pattern ("($a)(b|c)$|d$"); - valid_pattern ("$(a)(b|c)$|d$"); - valid_pattern ("(b|c)($a)$|d$"); - valid_pattern ("(b|c)$(a)$|d$"); - valid_pattern ("(b|c$)(a)$|d$"); - valid_pattern ("(b|$c)(a)$|d$"); - valid_pattern ("(b$|c)(a)$|d$"); - valid_pattern ("($b|c)(a)$|d$"); - valid_pattern ("$(b|c)(a)$|d$"); - valid_pattern ("(b|c)($a$)|d$"); - valid_pattern ("(b|c)$(a$)|d$"); - valid_pattern ("(b|c$)(a$)|d$"); - valid_pattern ("(b|$c)(a$)|d$"); - valid_pattern ("(b$|c)(a$)|d$"); - valid_pattern ("($b|c)(a$)|d$"); - valid_pattern ("$(b|c)(a$)|d$"); - valid_pattern ("(a)$(b$|c$)|d$"); - valid_pattern ("(a$)(b$|c$)|d$"); - valid_pattern ("($a)(b$|c$)|d$"); - valid_pattern ("$(a)(b$|c$)|d$"); - valid_pattern ("^d|^(b^|c)(a)"); - valid_pattern ("^d|^(b|c^)(a)"); - valid_pattern ("^d|^(b|c)^(a)"); - valid_pattern ("^d|^(b|c)(^a)"); - valid_pattern ("^d|^(b|c)(a^)"); - valid_pattern ("^d|^(b|c)(a)^"); - valid_pattern ("^d|(^b|^c^)(a)"); - valid_pattern ("^d|(^b|^c)^(a)"); - valid_pattern ("^d|(^b|^c)(^a)"); - valid_pattern ("^d|(^b|^c)(a^)"); - valid_pattern ("^d|(^b|^c)(a)^"); - valid_pattern ("d$|(b|c)($a$)"); - valid_pattern ("d$|(b|c)$(a$)"); - valid_pattern ("d$|(b|c$)(a$)"); - valid_pattern ("d$|(b$|c)(a$)"); - valid_pattern ("d$|($b|c)(a$)"); - valid_pattern ("d$|$(b|c)(a$)"); - valid_pattern ("d$|(b|c)($a)$"); - valid_pattern ("d$|(b|c)$(a)$"); - valid_pattern ("d$|(b|c$)(a)$"); - valid_pattern ("d$|(b$|c)(a)$"); - valid_pattern ("d$|($b|c)(a)$"); - valid_pattern ("d$|$(b|c)(a)$"); - valid_pattern ("^d|^(a^)(b|c)"); - valid_pattern ("^d|^(a)^(b|c)"); - valid_pattern ("^d|^(a)(^b|c)"); - valid_pattern ("^d|^(a)(b^|c)"); - valid_pattern ("^d|^(a)(b|^c)"); - valid_pattern ("^d|^(a)(b|c^)"); - valid_pattern ("^d|^(a)(b|c)^"); - valid_pattern ("^d|(^a^)(b|c)"); - valid_pattern ("^d|(^a)^(b|c)"); - valid_pattern ("^d|(^a)(^b|c)"); - valid_pattern ("^d|(^a)(b^|c)"); - valid_pattern ("^d|(^a)(b|^c)"); - valid_pattern ("^d|(^a)(b|c^)"); - valid_pattern ("^d|(^a)(b|c)^"); - valid_pattern ("d$|(a)$(b$|c$)"); - valid_pattern ("d$|(a$)(b$|c$)"); - valid_pattern ("d$|($a)(b$|c$)"); - valid_pattern ("d$|$(a)(b$|c$)"); - valid_pattern ("d$|(a)(b|$c)$"); - valid_pattern ("d$|(a)($b|c)$"); - valid_pattern ("d$|(a)$(b|c)$"); - valid_pattern ("d$|(a$)(b|c)$"); - valid_pattern ("d$|($a)(b|c)$"); - valid_pattern ("d$|$(a)(b|c)$"); - valid_pattern ("((^a|^b)|^c)|^d^"); - valid_pattern ("((^a|^b)|^c)^|^d"); - valid_pattern ("((^a|^b)|^c^)|^d"); - valid_pattern ("((^a|^b)^|^c)|^d"); - valid_pattern ("((^a|^b^)|^c)|^d"); - valid_pattern ("((^a^|^b)|^c)|^d"); - valid_pattern ("((a|b)|c)|$d$"); - valid_pattern ("((a|b)|$c)|d$"); - valid_pattern ("((a|$b)|c)|d$"); - valid_pattern ("(($a|b)|c)|d$"); - valid_pattern ("($(a|b)|c)|d$"); - valid_pattern ("$((a|b)|c)|d$"); - valid_pattern ("^d^|(c|(a|b))"); - valid_pattern ("^d|(c^|(a|b))"); - valid_pattern ("^d|(c|(a^|b))"); - valid_pattern ("^d|(c|(a|b^))"); - valid_pattern ("^d|(c|(a|b)^)"); - valid_pattern ("^d|(c|(a|b))^"); - valid_pattern ("d$|(c$|(a$|$b$))"); - valid_pattern ("d$|(c$|($a$|b$))"); - valid_pattern ("d$|($c$|(a$|b$))"); - valid_pattern ("d$|$(c$|(a$|b$))"); - valid_pattern ("$d$|(c$|(a$|b$))"); - valid_pattern ("d$|(c$|(a|$b)$)"); - valid_pattern ("d$|(c$|($a|b)$)"); - valid_pattern ("d$|($c$|(a|b)$)"); - valid_pattern ("d$|$(c$|(a|b)$)"); - valid_pattern ("$d$|(c$|(a|b)$)"); - valid_pattern ("d$|(c$|(a|$b))$"); - valid_pattern ("d$|(c$|($a|b))$"); - valid_pattern ("d$|($c$|(a|b))$"); - valid_pattern ("d$|$(c$|(a|b))$"); - valid_pattern ("$d$|(c$|(a|b))$"); - valid_pattern ("^c^|(^a|^b)"); - valid_pattern ("^c|(^a^|^b)"); - valid_pattern ("^c|(^a|^b^)"); - valid_pattern ("^c|(^a|^b)^"); - valid_pattern ("c$|(a$|$b$)"); - valid_pattern ("c$|($a$|b$)"); - valid_pattern ("c$|$(a$|b$)"); - valid_pattern ("$c$|(a$|b$)"); - valid_pattern ("^d^(c|e((a|b)))"); - valid_pattern ("^d(^c|e((a|b)))"); - valid_pattern ("^d(c^|e((a|b)))"); - valid_pattern ("^d(c|^e((a|b)))"); - valid_pattern ("^d(c|e^((a|b)))"); - valid_pattern ("^d(c|e(^(a|b)))"); - valid_pattern ("^d(c|e((^a|b)))"); - valid_pattern ("^d(c|e((a|^b)))"); - valid_pattern ("^d(c|e((a|b^)))"); - valid_pattern ("^d(c|e((a|b)^))"); - valid_pattern ("^d(c|e((a|b))^)"); - valid_pattern ("^d(c|e((a|b)))^"); - valid_pattern ("d(c$|e($(a$|b$)))"); - valid_pattern ("d(c$|e$((a$|b$)))"); - valid_pattern ("d(c$|$e((a$|b$)))"); - valid_pattern ("d($c$|e((a$|b$)))"); - valid_pattern ("d$(c$|e((a$|b$)))"); - valid_pattern ("$d(c$|e((a$|b$)))"); - valid_pattern ("^d|^a^(b|c)"); - valid_pattern ("^d|^a(^b|c)"); - valid_pattern ("^d|^a(b^|c)"); - valid_pattern ("^d|^a(b|^c)"); - valid_pattern ("^d|^a(b|c^)"); - valid_pattern ("^d|^a(b|c)^"); - valid_pattern ("d$|a($b$|c$)"); - valid_pattern ("d$|a$(b$|c$)"); - valid_pattern ("d$|$a(b$|c$)"); - valid_pattern ("$d$|a(b$|c$)"); - valid_pattern ("^d|^(b^|c)a"); - valid_pattern ("^d|^(b|c^)a"); - valid_pattern ("^d|^(b|c)^a"); - valid_pattern ("^d|^(b|c)a^"); - valid_pattern ("d$|(b|c)$a$"); - valid_pattern ("d$|(b|c$)a$"); - valid_pattern ("d$|(b|$c)a$"); - valid_pattern ("d$|(b$|c)a$"); - valid_pattern ("d$|($b|c)a$"); - valid_pattern ("d$|$(b|c)a$"); - valid_pattern ("$d$|(b|c)a$"); - - /* xx Do these use all the valid_nonposix_pattern ones in other_test.c? */ - - TEST_SEARCH ("(^a|^b)c", "ac", 0, 2); - TEST_SEARCH ("(^a|^b)c", "bc", 0, 2); - TEST_SEARCH ("c(a$|b$)", "ca", 0, 2); - TEST_SEARCH ("c(a$|b$)", "cb", 0, 2); - TEST_SEARCH ("^(a|b)|^c", "ad", 0, 2); - TEST_SEARCH ("^(a|b)|^c", "bd", 0, 2); - TEST_SEARCH ("(a|b)$|c$", "da", 0, 2); - TEST_SEARCH ("(a|b)$|c$", "db", 0, 2); - TEST_SEARCH ("(a|b)$|c$", "dc", 0, 2); - TEST_SEARCH ("(^a|^b)|^c", "ad", 0, 2); - TEST_SEARCH ("(^a|^b)|^c", "bd", 0, 2); - TEST_SEARCH ("(^a|^b)|^c", "cd", 0, 2); - TEST_SEARCH ("(a$|b$)|c$", "da", 0, 2); - TEST_SEARCH ("(a$|b$)|c$", "db", 0, 2); - TEST_SEARCH ("(a$|b$)|c$", "dc", 0, 2); - TEST_SEARCH ("^c|(^a|^b)", "ad", 0, 2); - TEST_SEARCH ("^c|(^a|^b)", "bd", 0, 2); - TEST_SEARCH ("^c|(^a|^b)", "cd", 0, 2); - TEST_SEARCH ("c$|(a$|b$)", "da", 0, 2); - TEST_SEARCH ("c$|(a$|b$)", "db", 0, 2); - TEST_SEARCH ("c$|(a$|b$)", "dc", 0, 2); - TEST_SEARCH ("^c|^(a|b)", "ad", 0, 2); - TEST_SEARCH ("^c|^(a|b)", "bd", 0, 2); - TEST_SEARCH ("^c|^(a|b)", "cd", 0, 2); - TEST_SEARCH ("c$|(a|b)$", "da", 0, 2); - TEST_SEARCH ("c$|(a|b)$", "db", 0, 2); - TEST_SEARCH ("c$|(a|b)$", "dc", 0, 2); - TEST_SEARCH ("(^a|^b)c|^d", "ace", 0, 3); - TEST_SEARCH ("(^a|^b)c|^d", "bce", 0, 3); - TEST_SEARCH ("(^a|^b)c|^d", "de", 0, 2); - TEST_SEARCH ("(a|b)c$|d$", "eac", 0, 3); - TEST_SEARCH ("(a|b)c$|d$", "ebc", 0, 3); - TEST_SEARCH ("(a|b)c$|d$", "ed", 0, 3); - TEST_SEARCH ("^d|^c(a|b)", "cae", 0, 3); - TEST_SEARCH ("^d|^c(a|b)", "cbe", 0, 3); - TEST_SEARCH ("^d|^c(a|b)", "de", 0, 3); - TEST_SEARCH ("d$|c(a$|b$)", "eca", 0, 3); - TEST_SEARCH ("d$|c(a$|b$)", "ecb", 0, 3); - TEST_SEARCH ("d$|c(a$|b$)", "ed", 0, 3); - - TEST_SEARCH ("(((^a|^b))c|^d)e", "acef", 0, 4); - TEST_SEARCH ("(((^a|^b))c|^d)e", "bcef", 0, 4); - TEST_SEARCH ("(((^a|^b))c|^d)e", "def", 0, 3); - - TEST_SEARCH ("((^(a|b))c|^d)e", "acef", 0, 4); - TEST_SEARCH ("((^(a|b))c|^d)e", "bcef", 0, 4); - TEST_SEARCH ("((^(a|b))c|^d)e", "def", 0, 3); - - TEST_SEARCH ("(^((a|b))c|^d)e", "acef", 0, 4); - TEST_SEARCH ("(^((a|b))c|^d)e", "bcef", 0, 4); - TEST_SEARCH ("(^((a|b))c|^d)e", "def", 0, 3); - - TEST_SEARCH ("(((a|b))c|d)e$", "face", 0, 4); - TEST_SEARCH ("(((a|b))c|d)e$", "fbce", 0, 4); - TEST_SEARCH ("(((a|b))c|d)e$", "fde", 0, 3); - - TEST_SEARCH ("^e(d|c((a|b)))", "edf", 0, 3); - TEST_SEARCH ("^e(d|c((a|b)))", "ecaf", 0, 4); - TEST_SEARCH ("^e(d|c((a|b)))", "ecbf", 0, 4); - - TEST_SEARCH ("e(d$|c((a$|b$)))", "fed", 0, 3); - TEST_SEARCH ("e(d$|c((a$|b$)))", "feca", 0, 4); - TEST_SEARCH ("e(d$|c((a$|b$)))", "fecb", 0, 4); - - TEST_SEARCH ("e(d$|c((a|b)$))", "fed", 0, 3); - TEST_SEARCH ("e(d$|c((a|b)$))", "feca", 0, 4); - TEST_SEARCH ("e(d$|c((a|b)$))", "fecb", 0, 4); - - TEST_SEARCH ("e(d$|c((a|b))$)", "fed", 0, 3); - TEST_SEARCH ("e(d$|c((a|b))$)", "feca", 0, 3); - TEST_SEARCH ("e(d$|c((a|b))$)", "fecb", 0, 3); - - TEST_SEARCH ("e(d$|c((a|b)))$", "fed", 0, 3); - TEST_SEARCH ("e(d$|c((a|b)))$", "feca", 0, 3); - TEST_SEARCH ("e(d$|c((a|b)))$", "fecb", 0, 3); - - TEST_SEARCH ("(((^a|^b))c)|^de", "acf", 0, 3); - TEST_SEARCH ("(((^a|^b))c)|^de", "bcf", 0, 3); - TEST_SEARCH ("(((^a|^b))c)|^de", "def", 0, 3); - - TEST_SEARCH ("(((a|b))c$)|de$", "fac", 0, 3); - TEST_SEARCH ("(((a|b))c$)|de$", "fbc", 0, 3); - TEST_SEARCH ("(((a|b))c$)|de$", "fde", 0, 3); - - TEST_SEARCH ("(((a|b))c)$|de$", "fac", 0, 3); - TEST_SEARCH ("(((a|b))c)$|de$", "fbc", 0, 3); - TEST_SEARCH ("(((a|b))c)$|de$", "fde", 0, 3); - - TEST_SEARCH ("^ed|^(c((a|b)))", "edf", 0, 3); - TEST_SEARCH ("^ed|^(c((a|b)))", "caf", 0, 3); - TEST_SEARCH ("^ed|^(c((a|b)))", "cbf", 0, 3); - - TEST_SEARCH ("^ed|(^c((a|b)))", "edf", 0, 3); - TEST_SEARCH ("^ed|(^c((a|b)))", "caf", 0, 3); - TEST_SEARCH ("^ed|(^c((a|b)))", "cbf", 0, 3); - - TEST_SEARCH ("ed$|(c((a|b)))$", "fed", 0, 3); - TEST_SEARCH ("ed$|(c((a|b)))$", "fca", 0, 3); - TEST_SEARCH ("ed$|(c((a|b)))$", "fcb", 0, 3); - - TEST_SEARCH ("ed$|(c((a|b))$)", "fed", 0, 3); - TEST_SEARCH ("ed$|(c((a|b))$)", "fca", 0, 3); - TEST_SEARCH ("ed$|(c((a|b))$)", "fcb", 0, 3); - - TEST_SEARCH ("ed$|(c((a|b)$))", "fed", 0, 3); - TEST_SEARCH ("ed$|(c((a|b)$))", "fca", 0, 3); - TEST_SEARCH ("ed$|(c((a|b)$))", "fcb", 0, 3); - - TEST_SEARCH ("ed$|(c((a$|b$)))", "fed", 0, 3); - TEST_SEARCH ("ed$|(c((a$|b$)))", "fca", 0, 3); - TEST_SEARCH ("ed$|(c((a$|b$)))", "fcb", 0, 3); - - TEST_SEARCH ("^a(b|c)|^d", "abe", 0, 3); - TEST_SEARCH ("^a(b|c)|^d", "ace", 0, 3); - TEST_SEARCH ("^a(b|c)|^d", "df", 0, 2); - - TEST_SEARCH ("a(b$|c$)|d$", "fab", 0, 3); - TEST_SEARCH ("a(b$|c$)|d$", "fac", 0, 3); - TEST_SEARCH ("a(b$|c$)|d$", "fd", 0, 2); - - TEST_SEARCH ("^(a)(b|c)|^d", "abe", 0, 3); - TEST_SEARCH ("^(a)(b|c)|^d", "ace", 0, 3); - TEST_SEARCH ("^(a)(b|c)|^d", "df", 0, 2); - - TEST_SEARCH ("(^a)(b|c)|^d", "abe", 0, 3); - TEST_SEARCH ("(^a)(b|c)|^d", "ace", 0, 3); - TEST_SEARCH ("(^a)(b|c)|^d", "df", 0, 2); - - TEST_SEARCH ("(a)(b|c)$|d$", "fab", 0, 3); - TEST_SEARCH ("(a)(b|c)$|d$", "fac", 0, 3); - TEST_SEARCH ("(a)(b|c)$|d$", "fd", 0, 2); - - TEST_SEARCH ("(b|c)(a)$|d$", "fba", 0, 3); - TEST_SEARCH ("(b|c)(a)$|d$", "fca", 0, 3); - TEST_SEARCH ("(b|c)(a)$|d$", "fd", 0, 2); - - TEST_SEARCH ("(b|c)(a$)|d$", "fba", 0, 3); - TEST_SEARCH ("(b|c)(a$)|d$", "fca", 0, 3); - TEST_SEARCH ("(b|c)(a$)|d$", "fd", 0, 2); - - TEST_SEARCH ("(a)(b$|c$)|d$", "fab", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "fac", 0, 3); - TEST_SEARCH ("(a)(b$|c$)|d$", "fd", 0, 2); - - TEST_SEARCH ("^d|^(b|c)(a)", "df", 0, 2); - TEST_SEARCH ("^d|^(b|c)(a)", "baf", 0, 3); - TEST_SEARCH ("^d|^(b|c)(a)", "caf", 0, 3); - - TEST_SEARCH ("^d|(^b|^c)(a)", "df", 0, 2); - TEST_SEARCH ("^d|(^b|^c)(a)", "baf", 0, 3); - TEST_SEARCH ("^d|(^b|^c)(a)", "caf", 0, 3); - - TEST_SEARCH ("d$|(b|c)(a$)", "fd", 0, 2); - TEST_SEARCH ("d$|(b|c)(a$)", "fba", 0, 3); - TEST_SEARCH ("d$|(b|c)(a$)", "fca", 0, 3); - - TEST_SEARCH ("d$|(b|c)(a)$", "fd", 0, 2); - TEST_SEARCH ("d$|(b|c)(a)$", "fba", 0, 3); - TEST_SEARCH ("d$|(b|c)(a)$", "fca", 0, 3); - - TEST_SEARCH ("d$|(b|c)(a$)", "fd", 0, 2); - TEST_SEARCH ("d$|(b|c)(a$)", "fba", 0, 3); - TEST_SEARCH ("d$|(b|c)(a$)", "fca", 0, 3); - - TEST_SEARCH ("^d|^(a)(b|c)", "df", 0, 2); - TEST_SEARCH ("^d|^(a)(b|c)", "abf", 0, 3); - TEST_SEARCH ("^d|^(a)(b|c)", "acf", 0, 3); - - TEST_SEARCH ("^d|(^a)(b|c)", "df", 0, 2); - TEST_SEARCH ("^d|(^a)(b|c)", "abf", 0, 3); - TEST_SEARCH ("^d|(^a)(b|c)", "acf", 0, 3); - - TEST_SEARCH ("d$|(a)(b$|c$)", "fd", 0, 2); - TEST_SEARCH ("d$|(a)(b$|c$)", "fab", 0, 3); - TEST_SEARCH ("d$|(a)(b$|c$)", "fac", 0, 3); - - TEST_SEARCH ("d$|(a)(b|c)$", "fd", 0, 2); - TEST_SEARCH ("d$|(a)(b|c)$", "fab", 0, 3); - TEST_SEARCH ("d$|(a)(b|c)$", "fac", 0, 3); - - TEST_SEARCH ("((^a|^b)|^c)|^d", "ae", 0, 2); - TEST_SEARCH ("((^a|^b)|^c)|^d", "be", 0, 2); - TEST_SEARCH ("((^a|^b)|^c)|^d", "ce", 0, 2); - TEST_SEARCH ("((^a|^b)|^c)|^d", "de", 0, 2); - - TEST_SEARCH ("((a|b)|c)|d$", "ed", 0, 2); - TEST_SEARCH ("((a|b)|c)|d$", "ea", 0, 2); - TEST_SEARCH ("((a|b)|c)|d$", "eb", 0, 2); - TEST_SEARCH ("((a|b)|c)|d$", "ec", 0, 2); - - TEST_SEARCH ("^d|(c|(a|b))", "de", 0, 2); - - TEST_SEARCH ("d$|(c$|(a$|b$))", "ed", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|b$))", "ec", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|b$))", "ea", 0, 2); - TEST_SEARCH ("d$|(c$|(a$|b$))", "eb", 0, 2); - - TEST_SEARCH ("d$|(c$|(a|b)$)", "ed", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b)$)", "ec", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b)$)", "ea", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b)$)", "eb", 0, 2); - - TEST_SEARCH ("d$|(c$|(a|b))$", "ed", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b))$", "ec", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b))$", "ea", 0, 2); - TEST_SEARCH ("d$|(c$|(a|b))$", "eb", 0, 2); - - test_match ("a|^b", "b"); - test_match ("a|b$", "b"); - test_match ("^b|a", "b"); - test_match ("b$|a", "b"); - test_match ("(^a)", "a"); - test_match ("(a$)", "a"); - TEST_SEARCH ("c|^ab", "aba", 0, 3); - TEST_SEARCH ("c|ba$", "aba", 0, 3); - TEST_SEARCH ("^ab|c", "aba", 0, 3); - TEST_SEARCH ("ba$|c", "aba", 0, 3); - TEST_SEARCH ("(^a)", "ab", 0, 2); - TEST_SEARCH ("(a$)", "ba", 0, 2); - - TEST_SEARCH ("(^a$)", "a", 0, 1); - TEST_SEARCH ("(^a)", "ab", 0, 2); - TEST_SEARCH ("(b$)", "ab", 0, 2); - - /* Backtracking. */ - /* Per POSIX D11.1 p. 108, leftmost longest match. */ - test_match ("(wee|week)(knights|night)", "weeknights"); - - test_match ("(fooq|foo)qbar", "fooqbar"); - test_match ("(fooq|foo)(qbarx|bar)", "fooqbarx"); - - /* Take first alternative that does the longest match. */ - test_all_registers ("(fooq|(foo)|(fo))((qbarx)|(oqbarx)|bar)", "fooqbarx", - "", 0, 8, 0, 3, 0, 3, -1, -1, 3, 8, 3, 8, -1, -1, -1, -1, -1, -1, - -1, -1); - - test_match ("(fooq|foo)*qbar", "fooqbar"); - test_match ("(fooq|foo)*(qbar)", "fooqbar"); - test_match ("(fooq|foo)*(qbar)*", "fooqbar"); - - test_match ("(fooq|fo|o)*qbar", "fooqbar"); - test_match ("(fooq|fo|o)*(qbar)", "fooqbar"); - test_match ("(fooq|fo|o)*(qbar)*", "fooqbar"); - - test_match ("(fooq|fo|o)*(qbar|q)*", "fooqbar"); - test_match ("(fooq|foo)*(qbarx|bar)", "fooqbarx"); - test_match ("(fooq|foo)*(qbarx|bar)*", "fooqbarx"); - - test_match ("(fooq|fo|o)+(qbar|q)+", "fooqbar"); - test_match ("(fooq|foo)+(qbarx|bar)", "fooqbarx"); - test_match ("(fooq|foo)+(qbarx|bar)+", "fooqbarx"); - - /* Per Mike Haertel. */ - test_match ("(foo|foobarfoo)(bar)*", "foobarfoo"); - - /* Combination. */ - test_match ("[ab]?c", "ac"); - test_match ("[ab]*c", "ac"); - test_match ("[ab]+c", "ac"); - test_match ("(a|b)?c", "ac"); - test_match ("(a|b)*c", "ac"); - test_match ("(a|b)+c", "ac"); - test_match ("(a*c)?b", "b"); - test_match ("(a*c)+b", "aacb"); - /* Registers. */ - /* Per David A. Willcox. */ - test_match ("a((b)|(c))d", "acd"); - test_all_registers ("a((b)|(c))d", "acd", "", 0, 3, 1, 2, -1, -1, 1, 2, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - - /* Extended regular expressions, continued; these don't match their strings. */ - test_should_match = false; - -#if 0 - /* Invalid use of special characters. */ - /* These are not invalid anymore, since POSIX says the behavior is - undefined, and we prefer context-independent to context-invalid. */ - invalid_pattern (REG_BADRPT, "*"); - invalid_pattern (REG_BADRPT, "a|*"); - invalid_pattern (REG_BADRPT, "(*)"); - invalid_pattern (REG_BADRPT, "^*"); - invalid_pattern (REG_BADRPT, "+"); - invalid_pattern (REG_BADRPT, "a|+"); - invalid_pattern (REG_BADRPT, "(+)"); - invalid_pattern (REG_BADRPT, "^+"); - - invalid_pattern (REG_BADRPT, "?"); - invalid_pattern (REG_BADRPT, "a|?"); - invalid_pattern (REG_BADRPT, "(?)"); - invalid_pattern (REG_BADRPT, "^?"); - - invalid_pattern (REG_BADPAT, "|"); - invalid_pattern (REG_BADPAT, "a|"); - invalid_pattern (REG_BADPAT, "a||"); - invalid_pattern (REG_BADPAT, "(|a)"); - invalid_pattern (REG_BADPAT, "(a|)"); - - invalid_pattern (REG_BADPAT, PARENS_TO_OPS ("(|)")); - - invalid_pattern (REG_BADRPT, "{1}"); - invalid_pattern (REG_BADRPT, "a|{1}"); - invalid_pattern (REG_BADRPT, "^{1}"); - invalid_pattern (REG_BADRPT, "({1})"); - - invalid_pattern (REG_BADPAT, "|b"); - - invalid_pattern (REG_BADRPT, "^{0,}*"); - invalid_pattern (REG_BADRPT, "$*"); - invalid_pattern (REG_BADRPT, "${0,}*"); -#endif /* 0 */ - - invalid_pattern (REG_EESCAPE, "\\"); - - test_match ("a?b", "a"); - - - test_match ("a+", ""); - test_match ("a+b", "a"); - test_match ("a?", "b"); - -#if 0 - /* We make empty groups valid now, since they are undefined in POSIX. - (13 Sep 92) */ - /* Subexpressions. */ - invalid_pattern (REG_BADPAT, "()"); - invalid_pattern (REG_BADPAT, "a()"); - invalid_pattern (REG_BADPAT, "()b"); - invalid_pattern (REG_BADPAT, "a()b"); - invalid_pattern (REG_BADPAT, "()*"); - invalid_pattern (REG_BADPAT, "(()*"); -#endif - /* Invalid intervals. */ - test_match ("a{2}*", "aaa"); - test_match ("a{2}?", "aaa"); - test_match ("a{2}+", "aaa"); - test_match ("a{2}{2}", "aaa"); - test_match ("a{1}{1}{2}", "aaa"); - test_match ("a{1}{1}{2}", "a"); - /* Invalid alternation. */ - test_match ("a|b", "c"); - - TEST_SEARCH ("c|^ba", "aba", 0, 3); - TEST_SEARCH ("c|ab$", "aba", 0, 3); - TEST_SEARCH ("^ba|c", "aba", 0, 3); - TEST_SEARCH ("ab$|c", "aba", 0, 3); - /* Invalid anchoring. */ - TEST_SEARCH ("(^a)", "ba", 0, 2); - TEST_SEARCH ("(b$)", "ba", 0, 2); - - printf ("\nFinished POSIX extended tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/libregex/test/psx-generic.c b/gnu/libregex/test/psx-generic.c deleted file mode 100644 index 340e93875515..000000000000 --- a/gnu/libregex/test/psx-generic.c +++ /dev/null @@ -1,336 +0,0 @@ -/* psx-generic.c: test POSIX re's independent of us using basic or - extended syntax. */ - -#include "test.h" - - -void -test_posix_generic () -{ - int omit_generic_tests = 0; /* reset in debugger to skip */ - - if (omit_generic_tests) - return; - /* Tests somewhat in the order of P1003.2. */ - - /* Both posix basic and extended; should match. */ - - printf ("\nStarting generic POSIX tests.\n"); - test_grouping (); - test_intervals (); - - test_should_match = true; - /* Ordinary characters. */ - printf ("\nContinuing generic POSIX tests.\n"); - - MATCH_SELF (""); - test_fastmap ("", "", 0, 0); - test_fastmap_search ("", "", "", 0, 0, 2, 0, 0); - TEST_REGISTERS ("", "", 0, 0, -1, -1, -1, -1); - TEST_SEARCH ("", "", 0, 0); - TEST_SEARCH_2 ("", "", "", 0, 1, 0); - - MATCH_SELF ("abc"); - test_fastmap ("abc", "a", 0, 0); - TEST_REGISTERS ("abc", "abc", 0, 3, -1, -1, -1, -1); - TEST_REGISTERS ("abc", "xabcx", 1, 4, -1, -1, -1, -1); - - test_match ("\\a","a"); - test_match ("\\0", "0"); - - TEST_SEARCH ("a", "ab", 0, 2); - TEST_SEARCH ("b", "ab", 0, 2); - TEST_SEARCH ("a", "ab", 1, -2); - TEST_SEARCH_2 ("a", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("b", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("a", "a", "b", 1, -2, 2); - - test_match ("\n", "\n"); - test_match ("a\n", "a\n"); - test_match ("\nb", "\nb"); - test_match ("a\nb", "a\nb"); - - TEST_SEARCH ("b", "baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 236, -237); - /* Valid use of special characters. */ - test_match ("a*", "aa"); - test_fastmap ("a*", "a", 0, 0); - TEST_REGISTERS ("a*", "aa", 0, 2, -1, -1, -1, -1); - - test_match ("a*b", "aab"); - test_fastmap ("a*b", "ab", 0, 0); - - test_match ("a*ab", "aab"); - TEST_REGISTERS ("a*a", "aa", 0, 2, -1, -1, -1, -1); - TEST_REGISTERS ("a*a", "xaax", 1, 3, -1, -1, -1, -1); - - test_match ("\\{", "{"); - test_match ("\\^", "^"); - test_match ("\\.", "."); - test_match ("\\*", "*"); - test_match ("\\[", "["); - test_match ("\\$", "$"); - test_match ("\\\\", "\\"); - - test_match ("ab*", "a"); - test_match ("ab*", "abb"); - - /* Valid consecutive repetitions. */ - test_match ("a**", "a"); - /* Valid period. */ - test_match (".", "a"); - TEST_REGISTERS (".", "a", 0, 1, -1, -1, -1, -1); - test_match (".", "\004"); - test_match (".", "\n"); - /* Valid bracket expressions. */ - test_match ("[ab]", "a"); - test_match ("[ab]", "b"); - test_fastmap ("[ab]", "ab", 0, 0); - TEST_REGISTERS ("[ab]", "a", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS ("[ab]", "xax", 1, 2, -1, -1, -1, -1); - - test_fastmap ("[^ab]", "ab", 1, 1); - test_match ("[^ab]", "c"); - test_match ("[^a]", "\n"); - - test_match ("[a]*a", "aa"); - - test_match ("[[]", "["); - test_match ("[]]", "]"); - test_match ("[.]", "."); - test_match ("[*]", "*"); - test_match ("[\\]", "\\"); - test_match ("[\\(]", "("); - test_match ("[\\)]", ")"); - test_match ("[^]]", "a"); - test_match ("[a^]", "^"); - test_match ("[a$]", "$"); - test_match ("[]a]", "]"); - test_match ("[a][]]", "a]"); - test_match ("[\n]", "\n"); - test_match ("[^a]", "\n"); - test_match ("[a-]", "a"); - - TEST_REGISTERS ("\\`[ \t\n]*", " karl (Karl Berry)", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS ("[ \t\n]*\\'", " karl (Karl Berry)", 18, 18, -1, -1, -1, -1); - - /* Collating, noncollating, - equivalence classes aren't - implemented yet. */ - - - /* Character classes. */ - test_match ("[:alpha:]", "p"); - test_match ("[[:alpha:]]", "a"); - test_match ("[[:alpha:]]", "z"); - test_match ("[[:alpha:]]", "A"); - test_match ("[[:alpha:]]", "Z"); - test_match ("[[:upper:]]", "A"); - test_match ("[[:upper:]]", "Z"); - test_match ("[[:lower:]]", "a"); - test_match ("[[:lower:]]", "z"); - - test_match ("[[:digit:]]", "0"); - test_match ("[[:digit:]]", "9"); - test_fastmap ("[[:digit:]]", "0123456789", 0, 0); - - test_match ("[[:alnum:]]", "0"); - test_match ("[[:alnum:]]", "9"); - test_match ("[[:alnum:]]", "a"); - test_match ("[[:alnum:]]", "z"); - test_match ("[[:alnum:]]", "A"); - test_match ("[[:alnum:]]", "Z"); - test_match ("[[:xdigit:]]", "0"); - test_match ("[[:xdigit:]]", "9"); - test_match ("[[:xdigit:]]", "A"); - test_match ("[[:xdigit:]]", "F"); - test_match ("[[:xdigit:]]", "a"); - test_match ("[[:xdigit:]]", "f"); - test_match ("[[:space:]]", " "); - test_match ("[[:print:]]", " "); - test_match ("[[:print:]]", "~"); - test_match ("[[:punct:]]", ","); - test_match ("[[:graph:]]", "!"); - test_match ("[[:graph:]]", "~"); - test_match ("[[:cntrl:]]", "\177"); - test_match ("[[:digit:]a]", "a"); - test_match ("[[:digit:]a]", "2"); - test_match ("[a[:digit:]]", "a"); - test_match ("[a[:digit:]]", "2"); - test_match ("[[:]", "["); - test_match ("[:]", ":"); - test_match ("[[:a]", "["); - test_match ("[[:alpha:a]", "["); - /* Valid ranges. */ - test_match ("[a-a]", "a"); - test_fastmap ("[a-a]", "a", 0, 0); - TEST_REGISTERS ("[a-a]", "xax", 1, 2, -1, -1, -1, -1); - - test_match ("[a-z]", "z"); - test_fastmap ("[a-z]", "abcdefghijklmnopqrstuvwxyz", 0, 0); - test_match ("[-a]", "-"); /* First */ - test_match ("[-a]", "a"); - test_match ("[a-]", "-"); /* Last */ - test_match ("[a-]", "a"); - test_match ("[--@]", "@"); /* First and starting point. */ - - test_match ("[%--a]", "%"); /* Ending point. */ - test_match ("[%--a]", "-"); /* Ditto. */ - - test_match ("[a%--]", "%"); /* Both ending point and last. */ - test_match ("[a%--]", "-"); - test_match ("[%--a]", "a"); /* Ending point only. */ - test_match ("[a-c-f]", "e"); /* Piggyback. */ - - test_match ("[)-+--/]", "*"); - test_match ("[)-+--/]", ","); - test_match ("[)-+--/]", "/"); - test_match ("[[:digit:]-]", "-"); - /* Concatenation ????*/ - test_match ("[ab][cd]", "ac"); - test_fastmap ("[ab][cd]", "ab", 0, 0); - TEST_REGISTERS ("[ab][cd]", "ad", 0, 2, -1, -1, -1, -1); - TEST_REGISTERS ("[ab][cd]", "xadx", 1, 3, -1, -1, -1, -1); - - /* Valid expression anchoring. */ - test_match ("^a", "a"); - test_fastmap ("^a", "a", 0, 0); - TEST_REGISTERS ("^a", "ax", 0, 1, -1, -1, -1, -1); - - test_match ("^", ""); - TEST_REGISTERS ("^", "", 0, 0, -1, -1, -1, -1); - test_match ("$", ""); - TEST_REGISTERS ("$", "", 0, 0, -1, -1, -1, -1); - - test_match ("a$", "a"); - test_fastmap ("a$", "a", 0, 0); - TEST_REGISTERS ("a$", "xa", 1, 2, -1, -1, -1, -1); - - test_match ("^ab$", "ab"); - test_fastmap ("^ab$", "a", 0, 0); - TEST_REGISTERS ("^a$", "a", 0, 1, -1, -1, -1, -1); - - test_fastmap ("^$", "", 0, 0); - test_match ("^$", ""); - TEST_REGISTERS ("^$", "", 0, 0, -1, -1, -1, -1); - - TEST_SEARCH (PARENS_TO_OPS ("(^a)"), "ab", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)"), "ba", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("^(^a)"), "ab", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)$"), "ba", 0, 2); - - /* Two strings. */ - test_match_2 ("ab", "a", "b"); - TEST_REGISTERS_2 ("ab", "a", "b", 0, 2, -1, -1, -1, -1); - - test_match_2 ("a", "", "a"); - test_match_2 ("a", "a", ""); - test_match_2 ("ab", "a", "b"); - /* (start)pos. */ - TEST_POSITIONED_MATCH ("b", "ab", 1); - /* mstop. */ - TEST_TRUNCATED_MATCH ("a", "ab", 1); - - - /* Both basic and extended, continued; should not match. */ - - test_should_match = false; - /* Ordinary characters. */ - test_match ("abc", "ab"); - - TEST_SEARCH ("c", "ab", 0, 2); - TEST_SEARCH ("c", "ab", 0, 2); - TEST_SEARCH ("c", "ab", 1, -2); - TEST_SEARCH ("c", "ab", 0, 10); - TEST_SEARCH ("c", "ab", 1, -10); - TEST_SEARCH_2 ("c", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("c", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("c", "a", "b", 0, 2, 2); - TEST_SEARCH_2 ("c", "a", "b", 1, -2, 2); - TEST_SEARCH_2 ("c", "a", "b", 1, -2, 2); - - TEST_SEARCH ("c", "baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 236, -237); - - /* Invalid use of special characters. */ - invalid_pattern (REG_EESCAPE, "\\"); - invalid_pattern (REG_EESCAPE, "a\\"); - invalid_pattern (REG_EESCAPE, "a*\\"); - /* Invalid period. */ - test_match (".", ""); - /* Invalid bracket expressions. */ - test_match ("[ab]", "c"); - test_match ("[^b]", "b"); - test_match ("[^]]", "]"); - - invalid_pattern (REG_EBRACK, "["); - invalid_pattern (REG_EBRACK, "[^"); - invalid_pattern (REG_EBRACK, "[a"); - invalid_pattern (REG_EBRACK, "[]"); - invalid_pattern (REG_EBRACK, "[]a"); - invalid_pattern (REG_EBRACK, "a[]a"); - - - test_match ("[:alpha:]", "q"); /* Character classes. */ - test_match ("[[:alpha:]]", "2"); - test_match ("[[:upper:]]", "a"); - test_match ("[[:lower:]]", "A"); - test_match ("[[:digit:]]", "a"); - test_match ("[[:alnum:]]", ":"); - test_match ("[[:xdigit:]]", "g"); - test_match ("[[:space:]]", "a"); - test_match ("[[:print:]]", "\177"); - test_match ("[[:punct:]]", "a"); - test_match ("[[:graph:]]", " "); - test_match ("[[:cntrl:]]", "a"); - invalid_pattern (REG_EBRACK, "[[:"); - invalid_pattern (REG_EBRACK, "[[:alpha:"); - invalid_pattern (REG_EBRACK, "[[:alpha:]"); - invalid_pattern (REG_ECTYPE, "[[::]]"); - invalid_pattern (REG_ECTYPE, "[[:a:]]"); - invalid_pattern (REG_ECTYPE, "[[:alpo:]]"); - invalid_pattern (REG_ECTYPE, "[[:a:]"); - - test_match ("[a-z]", "2"); /* Invalid ranges. */ - test_match ("[^-a]", "-"); - test_match ("[^a-]", "-"); - test_match ("[)-+--/]", "."); - invalid_pattern (REG_ERANGE, "[z-a]"); /* Empty */ - invalid_pattern (REG_ERANGE, "[a--]"); /* Empty */ - invalid_pattern (REG_ERANGE, "[[:digit:]-9]"); - invalid_pattern (REG_ERANGE, "[a-[:alpha:]]"); - invalid_pattern (REG_ERANGE, "[a-"); - invalid_pattern (REG_EBRACK, "[a-z"); - - test_match ("[ab][cd]", "ae"); /* Concatenation. */ - test_match ("b*c", "b"); /* Star. */ - - /* Invalid anchoring. */ - test_match ("^", "a"); - test_match ("^a", "ba"); - test_match ("$", "b"); - test_match ("a$", "ab"); - test_match ("^$", "a"); - test_match ("^ab$", "a"); - - TEST_SEARCH ("^a", "b\na", 0, 3); - TEST_SEARCH ("b$", "b\na", 0, 3); - - test_match_2 ("^a", "\n", "a"); - test_match_2 ("a$", "a", "\n"); - - TEST_SEARCH (PARENS_TO_OPS ("(^a)"), "ba", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)"), "ab", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("^(^a)"), "ba", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)$"), "ab", 0, 2); - - printf ("\nFinished generic POSIX tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/libregex/test/psx-group.c b/gnu/libregex/test/psx-group.c deleted file mode 100644 index 08ae8a28d37a..000000000000 --- a/gnu/libregex/test/psx-group.c +++ /dev/null @@ -1,440 +0,0 @@ -/* psx-group.c: test POSIX grouping, both basic and extended. */ - -#include "test.h" - - -void -test_grouping () -{ - printf ("\nStarting POSIX grouping tests.\n"); - - test_should_match = true; - - test_fastmap (PARENS_TO_OPS ("(a)"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)"), "a", 0, 1, 0, 1, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a)"), "xax", 1, 2, 1, 2, -1, -1); - - test_match (PARENS_TO_OPS ("((a))"), "a"); - test_fastmap (PARENS_TO_OPS ("((a))"), "a", 0, 0); - TEST_REGISTERS (PARENS_TO_OPS ("((a))"), "a", 0, 1, 0, 1, 0, 1); - TEST_REGISTERS (PARENS_TO_OPS ("((a))"), "xax", 1, 2, 1, 2, 1, 2); - - test_fastmap (PARENS_TO_OPS ("(a)(b)"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)(b)"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)(b)"), "ab", 0, 2, 0, 1, 1, 2); - - TEST_REGISTERS (PARENS_TO_OPS ("(a)(b)"), "xabx", 1, 3, 1, 2, 2, 3); - - test_all_registers (PARENS_TO_OPS ("((a)(b))"), "ab", "", 0, 2, 0, 2, 0, 1, - 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - - /* Test that we simply ignore groups past the 255th. */ - test_match (PARENS_TO_OPS ("((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((a))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"), "a"); - - - /* Per POSIX D11.1, p. 125. */ - - test_fastmap (PARENS_TO_OPS ("(a)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*"), "", 0, 0, -1, -1, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*"), "aa", 0, 2, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a*)"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)"), "", 0, 0, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a*)"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)"), "a", 0, 1, 0, 1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)b"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*)b"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)b"), "b", 0, 1, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)b"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)b"), "ab", 0, 2, 0, 1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)b)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a*)b)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "ab", 0, 2, 0, 2, 0, 1); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "abb"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "abb", 0, 3, 2, 3, 2, 2); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "aabab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "aabab", 0, 5, 3, 5, 3, 4); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "abbab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "abbab", 0, 5, 3, 5, 3, 4); - - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "xabbabx", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a*)b)*"), "abaabaaaab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "abaabaaab", 0, 9, 5, 9, 5, 8); - - test_fastmap (PARENS_TO_OPS ("(ab)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(ab)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(ab)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(ab)*"), "abab"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab)*"), "abab", 0, 4, 2, 4, -1, -1); - - /* We match the empty string here. */ - TEST_REGISTERS (PARENS_TO_OPS ("(ab)*"), "xababx", 0, 0, -1, -1, -1, -1); - - /* Per David A. Willcox. */ - TEST_REGISTERS (PARENS_TO_OPS ("a(b*)c"), "ac", 0, 2, 1, 1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a)*b"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a)*b"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*b"), "b", 0, 1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(a)*b"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*b"), "ab", 0, 2, 0, 1, -1, -1); - - test_match_2 (PARENS_TO_OPS ("(a)*b"), "a", "ab"); - TEST_REGISTERS_2 (PARENS_TO_OPS ("(a)*b"), "a", "ab", 0, 3, 1, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a)*b"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*b"), "aab", 0, 3, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a)*a"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)*a"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*a"), "a", 0, 1, -1, -1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*"), "", 0, 0, 0, 0, 0, 0); - - test_match (PARENS_TO_OPS ("((a*))*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a*))*"), "", 0, 0, 0, 0, 0, 0); - test_match (PARENS_TO_OPS ("((a*))*"), "aa"); - - test_fastmap (PARENS_TO_OPS ("(a*)*b"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*)*b"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "b", 0, 1, 0, 0, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "xbx", 1, 2, 1, 1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)*b"), "ab"); /* Per rms. */ - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "ab", 0, 2, 0, 1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "xabx", 1, 3, 1, 2, -1, -1); - - /* Test register restores. */ - test_match (PARENS_TO_OPS ("(a*)*b"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "aab", 0, 3, 0, 2, -1, -1); - - TEST_REGISTERS_2 (PARENS_TO_OPS ("(a*)*b"), "a", "ab", 0, 3, 0, 2, -1, -1); - - /* We are matching the empty string, with backtracking. */ - test_fastmap (PARENS_TO_OPS ("(a*)a"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a*)a"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)a"), "a", 0, 1, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)a"), "aa"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)a"), "aa", 0, 2, 0, 1, -1, -1); - - /* We are matching the empty string, with backtracking. */ -/*fails test_match (PARENS_TO_OPS ("(a*)*a"), "a"); */ - test_match (PARENS_TO_OPS ("(a*)*a"), "aa"); - /* Match the empty string. */ - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "a", 0, 1, 0, 0, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "xax", 1, 2, 1, 1, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "aa", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "xaax", 1, 3, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a)*ab"), "a", 0 , 0); - test_match (PARENS_TO_OPS ("(a)*ab"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*ab"), "ab", 0, 2, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(a)*ab"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a)*ab"), "aab", 0, 3, 0, 1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS("(a)*ab"), "xaabx", 1, 4, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)ab"), "a", 0 , 0); - test_match (PARENS_TO_OPS ("(a*)ab"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)ab"), "ab", 0, 2, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)ab"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)ab"), "aab", 0, 3, 0, 1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS ("(a*)ab"), "xaabx", 1, 4, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)*ab"), "a", 0 , 0); - test_match (PARENS_TO_OPS ("(a*)*ab"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*ab"), "ab", 0, 2, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)*ab"), "aab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*ab"), "aab", 0, 3, 0, 1, -1, -1); - - TEST_REGISTERS (PARENS_TO_OPS("(a*)*ab"), "xaabx", 1, 4, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)*b*c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("(a*)*b*c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b*c"), "c", 0, 1, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a)*(ab)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a)*(ab)*"), "ab"); - /* Register 1 doesn't match at all (vs. matching the empty string) - because of backtracking, hence -1's. */ - TEST_REGISTERS (PARENS_TO_OPS ("(a)*(ab)*"), "ab", 0, 2, -1, -1, 0, 2); - - test_match (PARENS_TO_OPS ("(a*)*(ab)*"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*(ab)*"), "ab", 0, 2, 0, 0, 0, 2); - - test_fastmap (PARENS_TO_OPS ("(a*b)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*b)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*"), "b", 0, 1, 0, 1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*"), "baab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*"), "baab", 0, 4, 1, 4, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*b*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*b*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "", 0, 0, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "a", 0, 1, 0, 1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "ba"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "ba", 0, 2, 1, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "ab", 0, 2, 0, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "aa"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "aa", 0, 2, 0, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "bb"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "bb", 0, 2, 0, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)*"), "aba"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "aba", 0, 3, 2, 3, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b*)b"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)b"), "b", 0, 1, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)*(b*)*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a*)*(b*)*)*"), ""); - test_all_registers (PARENS_TO_OPS ("((a*)*(b*)*)*"), "", "", 0, 0, 0, 0, - 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a*)*(b*)*)*"), "aba"); - /* Perhaps register 3 should be 3/3 here? Not sure if standard - specifies this. xx*/ - test_all_registers (PARENS_TO_OPS ("((a*)*(b*)*)*"), "aba", "", 0, 3, 2, 3, - 2, 3, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)(b*))*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a*)(b*))*"), ""); - - test_all_registers (PARENS_TO_OPS ("((a*)(b*))*"), "", "", 0, 0, 0, 0, - 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), ""); - - test_match (PARENS_TO_OPS ("((a*)(b*))*"), "aba"); - test_all_registers (PARENS_TO_OPS ("((a*)(b*))*"), "aba", "", 0, 3, 2, 3, - 2, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a)*(b)*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a)*(b)*)*"), ""); - test_all_registers (PARENS_TO_OPS ("((a)*(b)*)*"), "", "", 0, 0, 0, 0, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a)*(b)*)*"), "aba"); - - test_all_registers (PARENS_TO_OPS ("((a)*(b)*)*"), "aba", "", 0, 3, 2, 3, - 2, 3, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(c(a)*(b)*)*"), "c", 0, 0); - test_match (PARENS_TO_OPS ("(c(a)*(b)*)*"), ""); - test_all_registers (PARENS_TO_OPS ("(c(a)*(b)*)*"), "", "", 0, 0, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(c(a)*(b)*)*"), "c"); - test_all_registers (PARENS_TO_OPS ("(c(a)*(b)*)*"), "c", "", 0, 1, 0, 1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("c((a)*(b)*)*"), "c", 0, 0); - test_match (PARENS_TO_OPS ("c((a)*(b)*)*"), "c"); - test_all_registers (PARENS_TO_OPS ("c((a)*(b)*)*"), "c", "", 0, 1, 1, 1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(((a)*(b)*)*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(((a)*(b)*)*)*"), ""); - test_all_registers (PARENS_TO_OPS ("(((a)*(b)*)*)*"), "", "", 0, 0, 0, 0, - 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), ""); - test_fastmap (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), "c", 0, 0); - - test_all_registers (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), "", "", 0, 0, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a)*b)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a)*b)*"), ""); - - test_match (PARENS_TO_OPS ("((a)*b)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("((a)*b)*"), "abb"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "abb", 0, 3, 2, 3, 0, 1); /*zz*/ - - test_match (PARENS_TO_OPS ("((a)*b)*"), "abbab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "abbab", 0, 5, 3, 5, 3, 4); - - /* We match the empty string here. */ - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "xabbabx", 0, 0, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(a*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*"), "", 0, 0, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(a*)*"), "aa"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*)*"), "aa", 0, 2, 0, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)*)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("((a*)*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)*)*"), "", 0, 0, 0, 0, 0, 0); - - test_match (PARENS_TO_OPS ("((a*)*)*"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)*)*"), "a", 0, 1, 0, 1, 0, 1); - - test_fastmap (PARENS_TO_OPS ("(ab*)*"), "a", 0, 0); - test_match (PARENS_TO_OPS ("(ab*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*"), "", 0, 0, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(ab*)*"), "aa"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*"), "aa", 0, 2, 1, 2, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(ab*)*c"), "ac", 0, 0); - test_match (PARENS_TO_OPS ("(ab*)*c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*c"), "c", 0, 1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(ab*)*c"), "abbac"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*c"), "abbac", 0, 5, 3, 4, -1, -1); - - test_match (PARENS_TO_OPS ("(ab*)*c"), "abac"); - TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*c"), "abac", 0, 4, 2, 3, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*b)*c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("(a*b)*c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "c", 0, 1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*c"), "bbc"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "bbc", 0, 3, 1, 2, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*c"), "aababc"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "aababc", 0, 6, 3, 5, -1, -1); - - test_match (PARENS_TO_OPS ("(a*b)*c"), "aabaabc"); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "aabaabc", 0, 7, 3, 6, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a*)b*)"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a*)b*)"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)"), "", 0, 0, 0, 0, 0, 0); - - test_match (PARENS_TO_OPS ("((a*)b*)"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)"), "a", 0, 1, 0, 1, 0, 1); - - test_match (PARENS_TO_OPS ("((a*)b*)"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)"), "b", 0, 1, 0, 1, 0, 0); - - test_fastmap (PARENS_TO_OPS ("((a)*b*)"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("((a)*b*)"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "", 0, 0, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("((a)*b*)"), "a"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "a", 0, 1, 0, 1, 0, 1); - - test_match (PARENS_TO_OPS ("((a)*b*)"), "b"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "b", 0, 1, 0, 1, -1, -1); - - test_match (PARENS_TO_OPS ("((a)*b*)"), "ab"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "ab", 0, 2, 0, 2, 0, 1); - - test_fastmap (PARENS_TO_OPS ("((a*)b*)c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("((a*)b*)c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)c"), "c", 0, 1, 0, 0, 0, 0); - - test_fastmap (PARENS_TO_OPS ("((a)*b*)c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("((a)*b*)c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)c"), "c", 0, 1, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(a*b*)*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(a*b*)*"), ""); - TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "", 0, 0, 0, 0, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(((a*))((b*)))*"), "ab", 0, 0); - test_match (PARENS_TO_OPS ("(((a*))((b*)))*"), ""); - test_all_registers (PARENS_TO_OPS ("(((a*))((b*)))*"), "", "", 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("(c*((a*))d*((b*))e*)*"), "abcde", 0, 0); - test_match (PARENS_TO_OPS ("(c*((a*))d*((b*))e*)*"), ""); - test_all_registers (PARENS_TO_OPS ("(c*((a*))d*((b*))e*)*"), "", "", 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); - - test_fastmap (PARENS_TO_OPS ("((a)*b)*c"), "abc", 0, 0); - test_match (PARENS_TO_OPS ("((a)*b)*c"), "c"); - TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*c"), "c", 0, 1, -1, -1, -1, -1); - - test_match (PARENS_TO_OPS ("(ab)*"), ""); - test_match (PARENS_TO_OPS ("((ab)*)"), ""); - test_match (PARENS_TO_OPS ("(((ab)*))"), ""); - test_match (PARENS_TO_OPS ("((((ab)*)))"), ""); - test_match (PARENS_TO_OPS ("(((((ab)*))))"), ""); - test_match (PARENS_TO_OPS ("((((((ab)*)))))"), ""); - test_match (PARENS_TO_OPS ("(((((((ab)*))))))"), ""); - test_match (PARENS_TO_OPS ("((((((((ab)*)))))))"), ""); - test_match (PARENS_TO_OPS ("(((((((((ab)*))))))))"), ""); - - - test_fastmap (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "a", 0, 0); - test_match (PARENS_TO_OPS ("((((((((((ab)*)))))))))"), ""); - test_match (PARENS_TO_OPS ("(((((((((ab)*))))))))"), ""); - test_all_registers (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "", NULL, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1); - - test_match (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "abab"); - test_all_registers (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "abab", NULL, - 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 2, 4); - - - test_should_match = false; - - invalid_pattern (REG_EPAREN, PARENS_TO_OPS ("(a")); - - test_match (PARENS_TO_OPS ("(a)"), ""); - test_match (PARENS_TO_OPS ("((a))"), "b"); - test_match (PARENS_TO_OPS ("(a)(b)"), "ac"); - test_match (PARENS_TO_OPS ("(ab)*"), "acab"); - test_match (PARENS_TO_OPS ("(a*)*b"), "c"); - test_match (PARENS_TO_OPS ("(a*b)*"), "baa"); - test_match (PARENS_TO_OPS ("(a*b)*"), "baabc"); - test_match (PARENS_TO_OPS ("(a*b*)*"), "c"); - test_match (PARENS_TO_OPS ("((a*)*(b*)*)*"), "c"); - test_match (PARENS_TO_OPS ("(a*)*"), "ab"); - test_match (PARENS_TO_OPS ("((a*)*)*"), "ab"); - test_match (PARENS_TO_OPS ("((a*)*)*"), "b"); - test_match (PARENS_TO_OPS ("(ab*)*"), "abc"); - test_match (PARENS_TO_OPS ("(ab*)*c"), "abbad"); - test_match (PARENS_TO_OPS ("(a*c)*b"), "aacaacd"); - test_match (PARENS_TO_OPS ("(a*)"), "b"); - test_match (PARENS_TO_OPS ("((a*)b*)"), "c"); - - /* Expression anchoring. */ - TEST_SEARCH (PARENS_TO_OPS ("(^b)"), "ab", 0, 2); - TEST_SEARCH (PARENS_TO_OPS ("(a$)"), "ab", 0, 2); - - printf ("\nFinished POSIX grouping tests.\n"); -} diff --git a/gnu/libregex/test/psx-interf.c b/gnu/libregex/test/psx-interf.c deleted file mode 100644 index 8312d5e7d74a..000000000000 --- a/gnu/libregex/test/psx-interf.c +++ /dev/null @@ -1,624 +0,0 @@ -/* psx-interf.c: test POSIX interface. */ - -#include <string.h> -#include <assert.h> - -#include "test.h" - -#define ERROR_CODE_LENGTH 20 -#define TEST_ERRBUF_SIZE 15 - - -void test_compile (); - - -/* ANSWER should be at least ERROR_CODE_LENGTH long. */ - -static char * -get_error_string (error_code, answer) - int error_code; - char answer[]; -{ - switch (error_code) - { - case 0: strcpy (answer, "No error"); break; - case REG_NOMATCH: strcpy (answer, "REG_NOMATCH"); break; - case REG_BADPAT: strcpy (answer, "REG_BADPAT"); break; - case REG_EPAREN: strcpy (answer, "REG_EPAREN"); break; - case REG_ESPACE: strcpy (answer, "REG_ESPACE"); break; - case REG_ECOLLATE: strcpy (answer, "REG_ECOLLATE"); break; - case REG_ECTYPE: strcpy (answer, "REG_ECTYPE"); break; - case REG_EESCAPE: strcpy (answer, "REG_EESCAPE"); break; - case REG_ESUBREG: strcpy (answer, "REG_ESUBREG"); break; - case REG_EBRACK: strcpy (answer, "REG_EBRACK"); break; - case REG_EBRACE: strcpy (answer, "REG_EBRACE"); break; - case REG_BADBR: strcpy (answer, "REG_BADBR"); break; - case REG_ERANGE: strcpy (answer, "REG_ERANGE"); break; - case REG_BADRPT: strcpy (answer, "REG_BADRPT"); break; - case REG_EEND: strcpy (answer, "REG_EEND"); break; - default: strcpy (answer, "Bad error code"); - } - return answer; -} - - -/* I don't think we actually need to initialize all these things. - --karl */ - -void -init_pattern_buffer (pattern_buffer_ptr) - regex_t *pattern_buffer_ptr; -{ - pattern_buffer_ptr->buffer = NULL; - pattern_buffer_ptr->allocated = 0; - pattern_buffer_ptr->used = 0; - pattern_buffer_ptr->fastmap = NULL; - pattern_buffer_ptr->fastmap_accurate = 0; - pattern_buffer_ptr->translate = NULL; - pattern_buffer_ptr->can_be_null = 0; - pattern_buffer_ptr->re_nsub = 0; - pattern_buffer_ptr->no_sub = 0; - pattern_buffer_ptr->not_bol = 0; - pattern_buffer_ptr->not_eol = 0; -} - - -void -test_compile (valid_pattern, error_code_expected, pattern, - pattern_buffer_ptr, cflags) - unsigned valid_pattern; - int error_code_expected; - const char *pattern; - regex_t *pattern_buffer_ptr; - int cflags; -{ - int error_code_returned; - boolean error = false; - char errbuf[TEST_ERRBUF_SIZE]; - - init_pattern_buffer (pattern_buffer_ptr); - error_code_returned = regcomp (pattern_buffer_ptr, pattern, cflags); - - if (valid_pattern && error_code_returned) - { - printf ("\nShould have been a valid pattern but wasn't.\n"); - regerror (error_code_returned, pattern_buffer_ptr, errbuf, - TEST_ERRBUF_SIZE); - printf ("%s", errbuf); - error = true; - } - - if (!valid_pattern && !error_code_returned) - { - printf ("\n\nInvalid pattern compiled as valid:\n"); - error = true; - } - - if (error_code_returned != error_code_expected) - { - char expected_error_string[ERROR_CODE_LENGTH]; - char returned_error_string[ERROR_CODE_LENGTH]; - - get_error_string (error_code_expected, expected_error_string), - get_error_string (error_code_returned, returned_error_string); - - printf (" Expected error code %s but got `%s'.\n", - expected_error_string, returned_error_string); - - error = true; - } - - if (error) - print_pattern_info (pattern, pattern_buffer_ptr); -} - - -static void -test_nsub (sub_count, pattern, cflags) - unsigned sub_count; - char *pattern; - int cflags; - -{ - regex_t pattern_buffer; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - - if (pattern_buffer.re_nsub != sub_count) - { - printf ("\nShould have counted %d subexpressions but counted %d \ -instead.\n", sub_count, pattern_buffer.re_nsub); - } - - regfree (&pattern_buffer); -} - - -static void -test_regcomp () -{ - regex_t pattern_buffer; - int cflags = 0; - - - printf ("\nStarting regcomp tests.\n"); - - cflags = 0; - test_compile (0, REG_ESUBREG, "\\(a\\)\\2", &pattern_buffer, cflags); - test_compile (0, REG_EBRACE, "a\\{", &pattern_buffer, cflags); - test_compile (0, REG_BADBR, "a\\{-1\\}", &pattern_buffer, cflags); - test_compile (0, REG_EBRACE, "a\\{", &pattern_buffer, cflags); - test_compile (0, REG_EBRACE, "a\\{1", &pattern_buffer, cflags); - - cflags = REG_EXTENDED; - test_compile (0, REG_ECTYPE, "[[:alpo:]]", &pattern_buffer, cflags); - test_compile (0, REG_EESCAPE, "\\", &pattern_buffer, cflags); - test_compile (0, REG_EBRACK, "[a", &pattern_buffer, cflags); - test_compile (0, REG_EPAREN, "(", &pattern_buffer, cflags); - test_compile (0, REG_ERANGE, "[z-a]", &pattern_buffer, cflags); - - test_nsub (1, "(a)", cflags); - test_nsub (2, "((a))", cflags); - test_nsub (2, "(a)(b)", cflags); - - cflags = REG_EXTENDED | REG_NOSUB; - test_nsub (1, "(a)", cflags); - - regfree (&pattern_buffer); - - printf ("\nFinished regcomp tests.\n"); -} - - -static void -fill_pmatch (pmatch, start0, end0, start1, end1, start2, end2) - regmatch_t pmatch[]; - regoff_t start0, end0, start1, end1, start2, end2; -{ - pmatch[0].rm_so = start0; - pmatch[0].rm_eo = end0; - pmatch[1].rm_so = start1; - pmatch[1].rm_eo = end1; - pmatch[2].rm_so = start2; - pmatch[2].rm_eo = end2; -} - - -static void -test_pmatch (pattern, string, nmatch, pmatch, correct_pmatch, cflags) - char *pattern; - char *string; - unsigned nmatch; - regmatch_t pmatch[]; - regmatch_t correct_pmatch[]; - int cflags; -{ - regex_t pattern_buffer; - unsigned this_match; - int error_code_returned; - boolean found_nonmatch = false; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, nmatch, pmatch, 0); - - if (error_code_returned == REG_NOMATCH) - printf ("Matching failed in test_pmatch.\n"); - else - { - for (this_match = 0; this_match < nmatch; this_match++) - { - if (pmatch[this_match].rm_so != correct_pmatch[this_match].rm_so) - { - if (found_nonmatch == false) - printf ("\n"); - - printf ("Pmatch start %d wrong: was %d when should have \ -been %d.\n", this_match, pmatch[this_match].rm_so, - correct_pmatch[this_match].rm_so); - found_nonmatch = true; - } - if (pmatch[this_match].rm_eo != correct_pmatch[this_match].rm_eo) - { - if (found_nonmatch == false) - printf ("\n"); - - printf ("Pmatch end %d wrong: was %d when should have been \ -%d.\n", this_match, pmatch[this_match].rm_eo, - correct_pmatch[this_match].rm_eo); - found_nonmatch = true; - } - } - - if (found_nonmatch) - { - printf (" The number of pmatches requested was: %d.\n", nmatch); - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - } - } /* error_code_returned == REG_NOMATCH */ - - regfree (&pattern_buffer); -} - - -static void -test_eflags (must_match_bol, must_match_eol, pattern, string, cflags, eflags) - boolean must_match_bol; - boolean must_match_eol; - char *pattern; - char *string; - int cflags; - int eflags; -{ - regex_t pattern_buffer; - int error_code_returned; - boolean was_error = false; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, 0, 0, eflags); - - if (error_code_returned == REG_NOMATCH) - { - /* If wasn't true that both 1) the anchored part of the pattern - had to match this string and 2) this string was a proper - substring... */ - - if (!( (must_match_bol && (eflags & REG_NOTBOL)) - || (must_match_eol && (eflags & REG_NOTEOL)) )) - { - printf ("\nEflags test failed: didn't match when should have.\n"); - was_error = true; - } - } - else /* We got a match. */ - { - /* If wasn't true that either 1) the anchored part of the pattern - didn't have to match this string or 2) this string wasn't a - proper substring... */ - - if ((must_match_bol == (eflags & REG_NOTBOL)) - || (must_match_eol == (eflags & REG_NOTEOL))) - { - printf ("\nEflags test failed: matched when shouldn't have.\n"); - was_error = true; - } - } - - if (was_error) - { - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - - if (eflags & REG_NOTBOL) - printf (" The eflag REG_BOL was set.\n"); - if (eflags & REG_NOTEOL) - printf (" The eflag REG_EOL was set.\n"); - } - - regfree (&pattern_buffer); -} - - -static void -test_ignore_case (should_match, pattern, string, cflags) - boolean should_match; - char *pattern; - char *string; - int cflags; -{ - regex_t pattern_buffer; - int error_code_returned; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, 0, 0, 0); - - if (should_match && error_code_returned == REG_NOMATCH) - { - printf ("\nIgnore-case test failed:\n"); - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - - if (cflags & REG_ICASE) - printf (" The cflag REG_ICASE was set.\n"); - } - - regfree (&pattern_buffer); -} - - -static void -test_newline (should_match, pattern, string, cflags) - boolean should_match; - char *pattern; - char *string; - int cflags; -{ - regex_t pattern_buffer; - int error_code_returned; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, 0, 0, 0); - - if (should_match && error_code_returned == REG_NOMATCH) - { - printf ("\nNewline test failed:\n"); - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - - if (cflags & REG_NEWLINE) - printf (" The cflag REG_NEWLINE was set.\n"); - else - printf (" The cflag REG_NEWLINE wasn't set.\n"); - } - - regfree (&pattern_buffer); -} - - -static void -test_posix_match (should_match, pattern, string, cflags) - boolean should_match; - char *pattern; - char *string; - int cflags; -{ - regex_t pattern_buffer; - int error_code_returned; - boolean was_error = false; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); - error_code_returned = regexec (&pattern_buffer, string, 0, 0, 0); - - if (should_match && error_code_returned == REG_NOMATCH) - { - printf ("\nShould have matched but didn't:\n"); - was_error = true; - } - else if (!should_match && error_code_returned != REG_NOMATCH) - { - printf ("\nShould not have matched but did:\n"); - was_error = true; - } - - if (was_error) - { - printf (" The string to match was: `%s'.\n", string); - print_pattern_info (pattern, &pattern_buffer); - } - - regfree (&pattern_buffer); -} - - -static void -test_regexec () -{ - regmatch_t pmatch[3]; - regmatch_t correct_pmatch[3]; - int cflags = 0; - int eflags = 0; - - printf ("\nStarting regexec tests.\n"); - - cflags = REG_NOSUB; /* shouldn't look at any of pmatch. */ - test_pmatch ("a", "a", 0, pmatch, correct_pmatch, cflags); - - /* Ask for less `pmatch'es than there are pattern subexpressions. - (Shouldn't look at pmatch[2]. */ - cflags = REG_EXTENDED; - fill_pmatch (correct_pmatch, 0, 1, 0, 1, 100, 101); - test_pmatch ("((a))", "a", 2, pmatch, correct_pmatch, cflags); - - /* Ask for same number of `pmatch'es as there are pattern subexpressions. */ - cflags = REG_EXTENDED; - fill_pmatch(correct_pmatch, 0, 1, 0, 1, -1, -1); - test_pmatch ("(a)", "a", 2, pmatch, correct_pmatch, cflags); - - /* Ask for more `pmatch'es than there are pattern subexpressions. */ - cflags = REG_EXTENDED; - fill_pmatch (correct_pmatch, 0, 1, -1, -1, -1, -1); - test_pmatch ("a", "a", 2, pmatch, correct_pmatch, cflags); - - eflags = REG_NOTBOL; - test_eflags (true, false, "^a", "a", cflags, eflags); - test_eflags (true, false, "(^a)", "a", cflags, eflags); - test_eflags (true, false, "a|^b", "b", cflags, eflags); - test_eflags (true, false, "^b|a", "b", cflags, eflags); - - eflags = REG_NOTEOL; - test_eflags (false, true, "a$", "a", cflags, eflags); - test_eflags (false, true, "(a$)", "a", cflags, eflags); - test_eflags (false, true, "a|b$", "b", cflags, eflags); - test_eflags (false, true, "b$|a", "b", cflags, eflags); - - eflags = REG_NOTBOL | REG_NOTEOL; - test_eflags (true, true, "^a$", "a", cflags, eflags); - test_eflags (true, true, "(^a$)", "a", cflags, eflags); - test_eflags (true, true, "a|(^b$)", "b", cflags, eflags); - test_eflags (true, true, "(^b$)|a", "b", cflags, eflags); - - cflags = REG_ICASE; - test_ignore_case (true, "a", "a", cflags); - test_ignore_case (true, "A", "A", cflags); - test_ignore_case (true, "A", "a", cflags); - test_ignore_case (true, "a", "A", cflags); - - test_ignore_case (true, "@", "@", cflags); - test_ignore_case (true, "\\[", "[", cflags); - test_ignore_case (true, "`", "`", cflags); - test_ignore_case (true, "{", "{", cflags); - - test_ignore_case (true, "[!-`]", "A", cflags); - test_ignore_case (true, "[!-`]", "a", cflags); - - cflags = 0; - test_ignore_case (false, "a", "a", cflags); - test_ignore_case (false, "A", "A", cflags); - test_ignore_case (false, "A", "a", cflags); - test_ignore_case (false, "a", "A", cflags); - - test_ignore_case (true, "@", "@", cflags); - test_ignore_case (true, "\\[", "[", cflags); - test_ignore_case (true, "`", "`", cflags); - test_ignore_case (true, "{", "{", cflags); - - test_ignore_case (true, "[!-`]", "A", cflags); - test_ignore_case (false, "[!-`]", "a", cflags); - - - /* Test newline stuff. */ - cflags = REG_EXTENDED | REG_NEWLINE; - test_newline (true, "\n", "\n", cflags); - test_newline (true, "a\n", "a\n", cflags); - test_newline (true, "\nb", "\nb", cflags); - test_newline (true, "a\nb", "a\nb", cflags); - - test_newline (false, ".", "\n", cflags); - test_newline (false, "[^a]", "\n", cflags); - - test_newline (true, "\n^a", "\na", cflags); - test_newline (true, "\n(^a|b)", "\na", cflags); - test_newline (true, "a$\n", "a\n", cflags); - test_newline (true, "(a$|b)\n", "a\n", cflags); - test_newline (true, "(a$|b|c)\n", "a\n", cflags); - test_newline (true, "((a$|b|c)$)\n", "a\n", cflags); - test_newline (true, "((a$|b|c)$)\n", "b\n", cflags); - test_newline (true, "(a$|b)\n|a\n", "a\n", cflags); - - test_newline (true, "^a", "\na", cflags); - test_newline (true, "a$", "a\n", cflags); - - /* Now test normal behavior. */ - cflags = REG_EXTENDED; - test_newline (true, "\n", "\n", cflags); - test_newline (true, "a\n", "a\n", cflags); - test_newline (true, "\nb", "\nb", cflags); - test_newline (true, "a\nb", "a\nb", cflags); - - test_newline (true, ".", "\n", cflags); - test_newline (true, "[^a]", "\n", cflags); - - test_newline (false, "\n^a", "\na", cflags); - test_newline (false, "a$\n", "a\n", cflags); - - test_newline (false, "^a", "\na", cflags); - test_newline (false, "a$", "a\n", cflags); - - - /* Test that matches whole string only. */ - cflags = 0; - test_posix_match (true, "a", "a", cflags); - - /* Tests that match substrings. */ - test_posix_match (true, "a", "ab", cflags); - test_posix_match (true, "b", "ab", cflags); - - /* Test that doesn't match. */ - test_posix_match (false, "a", "b", cflags); - - printf ("\nFinished regexec tests.\n"); -} - - -static void -test_error_code_message (error_code, expected_error_message) - int error_code; - char *expected_error_message; -{ - char returned_error_message[TEST_ERRBUF_SIZE]; - char error_code_string[ERROR_CODE_LENGTH]; - size_t expected_error_message_length = strlen (expected_error_message) + 1; - size_t returned_error_message_length = regerror (error_code, 0, - returned_error_message, - TEST_ERRBUF_SIZE); - - if (returned_error_message_length != expected_error_message_length) - { - printf ("\n\n Testing returned error codes, with expected error \ -message `%s':\n", expected_error_message); - - printf ("\n\n and returned error message `%s':\n", - returned_error_message); - printf (" should have returned a length of %d but returned %d.\n", - expected_error_message_length, returned_error_message_length); - } - - if (strncmp (expected_error_message, returned_error_message, - TEST_ERRBUF_SIZE - 1) != 0) - { - - get_error_string (error_code, error_code_string), - printf ("\n\n With error code %s (%d), expected error message:\n", - error_code_string, error_code); - - printf (" `%s'\n", expected_error_message); - printf (" but got:\n"); - printf (" `%s'\n", returned_error_message); - } -} - - -static void -test_error_code_allocation (error_code, expected_error_message) - int error_code; - char *expected_error_message; -{ - char *returned_error_message = NULL; - char error_code_string[ERROR_CODE_LENGTH]; - size_t returned_error_message_length = regerror (error_code, 0, - returned_error_message, - (size_t)0); - - returned_error_message = xmalloc (returned_error_message_length + 1); - - regerror (error_code, 0, returned_error_message, - returned_error_message_length); - - if (strcmp (expected_error_message, returned_error_message) != 0) - { - get_error_string (error_code, error_code_string), - - printf ("\n\n Testing error code allocation,\n"); - printf ("with error code %s (%d), expected error message:\n", - error_code_string, error_code); - printf (" `%s'\n", expected_error_message); - printf (" but got:\n"); - printf (" `%s'\n", returned_error_message); - } -} - - -static void -test_regerror () -{ - test_error_code_message (REG_NOMATCH, "No match"); - test_error_code_message (REG_BADPAT, "Invalid regular expression"); - test_error_code_message (REG_ECOLLATE, "Invalid collation character"); - test_error_code_message (REG_ECTYPE, "Invalid character class name"); - test_error_code_message (REG_EESCAPE, "Trailing backslash"); - test_error_code_message (REG_ESUBREG, "Invalid back reference"); - test_error_code_message (REG_EBRACK, "Unmatched [ or [^"); - test_error_code_message (REG_EPAREN, "Unmatched ( or \\("); - test_error_code_message (REG_EBRACE, "Unmatched \\{"); - test_error_code_message (REG_BADBR, "Invalid content of \\{\\}"); - test_error_code_message (REG_ERANGE, "Invalid range end"); - test_error_code_message (REG_ESPACE, "Memory exhausted"); - test_error_code_message (REG_BADRPT, "Invalid preceding regular expression"); - test_error_code_message (REG_EEND, "Premature end of regular expression"); - test_error_code_message (REG_ESIZE, "Regular expression too big"); - test_error_code_allocation (REG_ERPAREN, "Unmatched ) or \\)"); -} - - -void -test_posix_interface () -{ - printf ("\nStarting POSIX interface tests.\n"); - t = posix_interface_test; - - test_regcomp (); - test_regexec (); - test_regerror (); - - printf ("\nFinished POSIX interface tests.\n"); -} diff --git a/gnu/libregex/test/psx-interv.c b/gnu/libregex/test/psx-interv.c deleted file mode 100644 index 6725c38d00b8..000000000000 --- a/gnu/libregex/test/psx-interv.c +++ /dev/null @@ -1,140 +0,0 @@ -/* psx-interv.c: test POSIX intervals, both basic and extended. */ - -#include "test.h" - -void -test_intervals () -{ - printf ("\nStarting POSIX interval tests.\n"); - - test_should_match = true; - /* Valid intervals. */ - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,2}b)*")), "abaab"); - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,2}b)*")), "a", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,2}b)*")), - "abaab", 0, 5, 2, 5, -1, -1); - - test_match (BRACES_TO_OPS ("a{0}"), ""); - test_fastmap (BRACES_TO_OPS ("a{0}"), "", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS ("a{0}"), "", 0, 0, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS ("a{0}"), "x", 0, 0, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{0,}"), ""); - test_match (BRACES_TO_OPS ("a{0,}"), "a"); - test_fastmap (BRACES_TO_OPS ("a{0,}"), "a", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS ("a{0,}"), "a", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS ("a{0,}"), "xax", 0, 0, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{1}"), "a"); - test_match (BRACES_TO_OPS ("a{1,}"), "a"); - test_match (BRACES_TO_OPS ("a{1,}"), "aa"); - test_match (BRACES_TO_OPS ("a{0,0}"), ""); - test_match (BRACES_TO_OPS ("a{0,1}"), ""); - test_match (BRACES_TO_OPS ("a{0,1}"), "a"); - test_match (BRACES_TO_OPS ("a{1,3}"), "a"); - test_match (BRACES_TO_OPS ("a{1,3}"), "aa"); - test_match (BRACES_TO_OPS ("a{1,3}"), "aaa"); - TEST_REGISTERS (BRACES_TO_OPS ("a{1,3}"), "aaa", 0, 3, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS ("a{1,3}"), "xaaax", 1, 4, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{0,3}b"), "b"); - test_match (BRACES_TO_OPS ("a{0,3}b"), "aaab"); - test_fastmap (BRACES_TO_OPS ("a{0,3}b"), "ab", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS ("a{0,3}b"), "b", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS ("a{0,3}b"), "xbx", 1, 2, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{1,3}b"), "ab"); - test_match (BRACES_TO_OPS ("a{1,3}b"), "aaab"); - test_match (BRACES_TO_OPS ("ab{1,3}c"), "abbbc"); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "b"); - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "ab", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "b", 0, 1, -1, -1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "ab", 0, 2, 0, 1, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "xabx", 1, 3, 1, 2, -1, -1); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "ab"); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "aaab"); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "aaab", 0, 4, 2, 3, -1, -1); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "xaaabx", 1, 5, 3, 4, -1, -1); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){0,3}b")), "aaaab"); - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){0,3}b")), "ab", 0, 0); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){0,3}b")), "aaaab", 0, 5, 4, 4, -1, -1); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "b"); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "aaab"); - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "ab", 0, 0); - - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,1}ab")), "aaaab"); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,1}ab")), "aaaab", 0, 5, 0, 3, -1, -1); - - test_match (BRACES_TO_OPS (".{0,3}b"), "b"); - test_match (BRACES_TO_OPS (".{0,3}b"), "ab"); - - test_match (BRACES_TO_OPS ("[a]{0,3}b"), "b"); - test_match (BRACES_TO_OPS ("[a]{0,3}b"), "aaab"); - test_fastmap (BRACES_TO_OPS ("[a]{0,3}b"), "ab", 0, 0); - test_match (BRACES_TO_OPS ("[^a]{0,3}b"), "bcdb"); - test_match (BRACES_TO_OPS ("ab{0,3}c"), "abbbc"); - test_match (BRACES_TO_OPS ("[[:digit:]]{0,3}d"), "123d"); - test_fastmap (BRACES_TO_OPS ("[[:digit:]]{0,3}d"), "0123456789d", 0, 0); - - test_match (BRACES_TO_OPS ("\\*{0,3}a"), "***a"); - test_match (BRACES_TO_OPS (".{0,3}b"), "aaab"); - test_match (BRACES_TO_OPS ("a{0,3}a"), "aaa"); - /* Backtracking. */ - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,})*a")), "a", 0, 0); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,})*a")), "a"); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,})*a")), "a", 0, 1, -1, -1, -1, -1); - - test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a{2,})*aa")), "aa", 0, 0); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a{2,})*aa")), "aa"); - TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a{2,})*aa")), "aa", 0, 2, -1, -1, -1, -1); - - test_match (BRACES_TO_OPS ("a{2}*"), ""); - test_match (BRACES_TO_OPS ("a{2}*"), "aa"); - - test_match (BRACES_TO_OPS ("a{1}*"), ""); - test_match (BRACES_TO_OPS ("a{1}*"), "a"); - test_match (BRACES_TO_OPS ("a{1}*"), "aa"); - - test_match (BRACES_TO_OPS ("a{1}{1}"), "a"); - - test_match (BRACES_TO_OPS ("a{1}{1}{1}"), "a"); - test_match (BRACES_TO_OPS ("a{1}{1}{2}"), "aa"); - - test_match (BRACES_TO_OPS ("a{1}{1}*"), ""); - test_match (BRACES_TO_OPS ("a{1}{1}*"), "a"); - test_match (BRACES_TO_OPS ("a{1}{1}*"), "aa"); - test_match (BRACES_TO_OPS ("a{1}{1}*"), "aaa"); - - test_match (BRACES_TO_OPS ("a{1}{2}"), "aa"); - test_match (BRACES_TO_OPS ("a{2}{1}"), "aa"); - - - test_should_match = false; - - test_match (BRACES_TO_OPS ("a{0}"), "a"); - test_match (BRACES_TO_OPS ("a{0,}"), "b"); - test_match (BRACES_TO_OPS ("a{1}"), ""); - test_match (BRACES_TO_OPS ("a{1}"), "aa"); - test_match (BRACES_TO_OPS ("a{1,}"), ""); - test_match (BRACES_TO_OPS ("a{1,}"), "b"); - test_match (BRACES_TO_OPS ("a{0,0}"), "a"); - test_match (BRACES_TO_OPS ("a{0,1}"), "aa"); - test_match (BRACES_TO_OPS ("a{0,1}"), "b"); - test_match (BRACES_TO_OPS ("a{1,3}"), ""); - test_match (BRACES_TO_OPS ("a{1,3}"), "aaaa"); - test_match (BRACES_TO_OPS ("a{1,3}"), "b"); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "aaaab"); - test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "bb"); - test_match (BRACES_TO_OPS ("[a]{0,3}"), "aaaa"); - test_match (BRACES_TO_OPS ("[^a]{0,3}b"), "ab"); - test_match (BRACES_TO_OPS ("ab{0,3}c"), "abababc"); - test_match (BRACES_TO_OPS ("[:alpha:]{0,3}d"), "123d"); - test_match (BRACES_TO_OPS ("\\^{1,3}a"), "a"); - test_match (BRACES_TO_OPS (".{0,3}b"), "aaaab"); - - printf ("\nFinished POSIX interval tests.\n"); -} diff --git a/gnu/libregex/test/regexcpp.sed b/gnu/libregex/test/regexcpp.sed deleted file mode 100644 index 082c1360814f..000000000000 --- a/gnu/libregex/test/regexcpp.sed +++ /dev/null @@ -1,8 +0,0 @@ -/;..*$/s/;/;\ -/g -/{ .*$/s/{/{\ -/g -/ \?[^'] /s/?/?\ -/g -/ : /s/:/:\ -/g diff --git a/gnu/libregex/test/syntax.skel b/gnu/libregex/test/syntax.skel deleted file mode 100644 index a3fbf64c5983..000000000000 --- a/gnu/libregex/test/syntax.skel +++ /dev/null @@ -1,74 +0,0 @@ -/* Print which syntax bits are set. */ - -#include <sys/types.h> -#include <stdio.h> -#include "regex.h" - -/* It's coincidental that these two are currently the same. */ -#define LONGEST_BIT_NAME "RE_UNMATCHED_RIGHT_PAREN_ORD" -#define LAST_BIT RE_UNMATCHED_RIGHT_PAREN_ORD - -/* Sum of above, when printed. Assigned in main. */ -static unsigned longest; - - -static void -test_bit (syntax, bit, name) - reg_syntax_t syntax; - unsigned bit; - char *name; -{ - char padding[100], test_str[100]; - int padding_count; - - sprintf (test_str, "%s (%d=0x%x)", name, bit, bit); - padding_count = longest - strlen (test_str); - - padding[padding_count] = 0; - while (padding_count--) - { - padding[padding_count] = ' '; - } - - printf ("%s%s (%d=0x%x): %c\n", - name, padding, bit, bit, syntax & bit ? 'y' : 'n'); -} - - -/* Macro to abbreviate the constant arguments. */ -#define TEST_BIT(bit) test_bit (syntax, bit, #bit) - -int -main (argc, argv) - int argc; - char *argv[]; -{ - reg_syntax_t syntax; - char syntax_str[1000], test_str[100]; - - switch (argc) - { - case 1: - printf ("Syntax? "); - scanf ("%s", syntax_str); - break; - - case 2: - strcpy (syntax_str, argv[1]); - break; - - default: - fprintf (stderr, "Usage: syntax [syntax].\n"); - exit (1); - } - - sscanf (syntax_str, "%i", &syntax); - - /* Figure out the longest name, so we can align the output nicely. */ - sprintf (test_str, "%s (%d=0x%x)", LONGEST_BIT_NAME, LAST_BIT, LAST_BIT); - longest = strlen (test_str); - - /* [[[replace with bit tests]]] */ - - return 0; -} diff --git a/gnu/libregex/test/test.c b/gnu/libregex/test/test.c deleted file mode 100644 index a8de23ef7429..000000000000 --- a/gnu/libregex/test/test.c +++ /dev/null @@ -1,782 +0,0 @@ -/* test.c: testing routines for regex.c. */ - -#include <assert.h> - -#ifdef STDC_HEADERS -#include <stdlib.h> -#else -char *malloc (); -char *realloc (); -#endif - -/* Just to be complete, we make both the system V/ANSI and the BSD - versions of the string functions available. */ -#if USG || STDC_HEADERS -#include <string.h> -#define index strchr -#define rindex strrchr -#define bcmp(s1, s2, len) memcmp ((s1), (s2), (len)) -#define bcopy(from, to, len) memcpy ((to), (from), (len)) -#define bzero(s, len) memset ((s), 0, (len)) -#else -#include <strings.h> -#define strchr index -#define strrchr rindex -#ifndef NEED_MEMORY_H -#define memcmp(s1, s2, n) bcmp ((s1), (s2), (n)) -#define memcpy(to, from, len) bcopy ((from), (to), (len)) -#endif -extern char *strtok (); -extern char *strstr (); -#endif /* not USG or STDC_HEADERS */ - -/* SunOS 4.1 declares memchr in <memory.h>, not <string.h>. I don't - understand why. */ -#if NEED_MEMORY_H -#include <memory.h> -#endif - -#include "test.h" - -#define BYTEWIDTH 8 - -extern void print_partial_compiled_pattern (); -extern void print_compiled_pattern (); -extern void print_double_string (); - -/* If nonzero, the results of every test are displayed. */ -boolean verbose = false; - -/* If nonzero, don't do register testing. */ -boolean omit_register_tests = true; - -/* Says whether the current test should match or fail to match. */ -boolean test_should_match; - - -static void -set_all_registers (start0, end0, start1, end1, - start2, end2, start3, end3, - start4, end4, start5, end5, - start6, end6, start7, end7, - start8, end8, start9, end9, regs) - - int start0; int end0; int start1; int end1; - int start2; int end2; int start3; int end3; - int start4; int end4; int start5; int end5; - int start6; int end6; int start7; int end7; - int start8; int end8; int start9; int end9; - struct re_registers *regs; - - { - unsigned r; - - regs->start[0] = start0; regs->end[0] = end0; - regs->start[1] = start1; regs->end[1] = end1; - regs->start[2] = start2; regs->end[2] = end2; - regs->start[3] = start3; regs->end[3] = end3; - regs->start[4] = start4; regs->end[4] = end4; - regs->start[5] = start5; regs->end[5] = end5; - regs->start[6] = start6; regs->end[6] = end6; - regs->start[7] = start7; regs->end[7] = end7; - regs->start[8] = start8; regs->end[8] = end8; - regs->start[9] = start9; regs->end[9] = end9; - for (r = 10; r < regs->num_regs; r++) - { - regs->start[r] = -1; - regs->end[r] = -1; - } - } - - - -/* Return the concatenation of S1 and S2. This would be a prime place - to use varargs. */ - -char * -concat (s1, s2) - char *s1; - char *s2; -{ - char *answer = xmalloc (strlen (s1) + strlen (s2) + 1); - - strcpy (answer, s1); - strcat (answer, s2); - - return answer; -} - - -#define OK_TO_SEARCH (nonconst_buf.fastmap_accurate && (str1 || str2)) - -/* We ignore the `can_be_null' argument. Should just be removed. */ - -void -general_test (pattern_should_be_valid, match_whole_string, - pat, str1, str2, start, range, end, correct_fastmap, - correct_regs, can_be_null) - unsigned pattern_should_be_valid; - unsigned match_whole_string; - const char *pat; - char *str1, *str2; - int start, range, end; - char *correct_fastmap; - struct re_registers *correct_regs; - int can_be_null; -{ - struct re_pattern_buffer nonconst_buf; - struct re_pattern_buffer old_buf; - struct re_registers regs; - const char *r; - char fastmap[1 << BYTEWIDTH]; - unsigned *regs_correct = NULL; - unsigned all_regs_correct = 1; - boolean fastmap_internal_error = false; - unsigned match = 0; - unsigned match_1 = 0; - unsigned match_2 = 0; - unsigned invalid_pattern = 0; - boolean internal_error_1 = false; - boolean internal_error_2 = false; - - - nonconst_buf.allocated = 8; - nonconst_buf.buffer = xmalloc (nonconst_buf.allocated); - nonconst_buf.fastmap = fastmap; - nonconst_buf.translate = 0; - - assert (pat != NULL); - r = re_compile_pattern (pat, strlen (pat), &nonconst_buf); - - /* Kludge: if we are doing POSIX testing, we really should have - called regcomp, not re_compile_pattern. As it happens, the only - way in which it matters is that re_compile_pattern sets the - newline/anchor field for matching (part of what happens when - REG_NEWLINE is given to regcomp). We have to undo that for POSIX - matching. */ - if (t == posix_basic_test || t == posix_extended_test) - nonconst_buf.newline_anchor = 0; - - invalid_pattern = r != NULL; - - if (!r) - { - int r; - - if (!pattern_should_be_valid) - printf ("\nShould have been an invalid pattern but wasn't:\n"); - else - { - fastmap_internal_error = (re_compile_fastmap (&nonconst_buf) == -2); - - if (correct_fastmap) - nonconst_buf.fastmap_accurate = - memcmp (nonconst_buf.fastmap, correct_fastmap, 1 << BYTEWIDTH) - == 0; - - if (OK_TO_SEARCH) - { - old_buf = nonconst_buf; - old_buf.buffer = (unsigned char *) xmalloc (nonconst_buf.used); - memcpy (old_buf.buffer, nonconst_buf.buffer, nonconst_buf.used); - - /* If only one string is null, call re_match or re_search, - which is what the user would probably do. */ - if (str1 == NULL && str2 != NULL - || str2 == NULL && str1 != NULL) - { - char *the_str = str1 == NULL ? str2 : str1; - - match_1 - = match_whole_string - ? (r = re_match (&nonconst_buf, the_str, - strlen (the_str), start, ®s)) - == strlen (the_str) - : (r = re_search (&nonconst_buf, - the_str, strlen (the_str), - start, range, ®s)) - >= 0; - - if (r == -2) - internal_error_1 = true; - } - else - match_1 = 1; - - /* Also call with re_match_2 or re_search_2, as they might - do this. (Also can check calling with either string1 - or string2 or both null.) */ - if (match_whole_string) - { - r = re_match_2 (&nonconst_buf, - str1, SAFE_STRLEN (str1), - str2, SAFE_STRLEN (str2), - start, ®s, end); - match_2 = r == SAFE_STRLEN (str1) + SAFE_STRLEN (str2); - } - else - { - r = re_search_2 (&nonconst_buf, - str1, SAFE_STRLEN (str1), - str2, SAFE_STRLEN (str2), - start, range, ®s, end); - match_2 = r >= 0; - } - - if (r == -2) - internal_error_2 = true; - - match = match_1 & match_2; - - if (correct_regs) - { - unsigned reg; - if (regs_correct != NULL) - free (regs_correct); - - regs_correct - = (unsigned *) xmalloc (regs.num_regs * sizeof (unsigned)); - - for (reg = 0; - reg < regs.num_regs && reg < correct_regs->num_regs; - reg++) - { - regs_correct[reg] - = (regs.start[reg] == correct_regs->start[reg] - && regs.end[reg] == correct_regs->end[reg]) -#ifdef EMPTY_REGS_CONFUSED - /* There is confusion in the standard about - the registers in some patterns which can - match either the empty string or not match. - For example, in `((a*))*' against the empty - string, the two registers can either match - the empty string (be 0/0), or not match - (because of the outer *) (be -1/-1). (Or - one can do one and one can do the other.) */ - || (regs.start[reg] == -1 && regs.end[reg] == -1 - && correct_regs->start[reg] - == correct_regs->end[reg]) -#endif - ; - - all_regs_correct &= regs_correct[reg]; - } - } - } /* OK_TO_SEARCH */ - } - } - - if (fastmap_internal_error) - printf ("\n\nInternal error in re_compile_fastmap:"); - - if (internal_error_1) - { - if (!fastmap_internal_error) - printf ("\n"); - - printf ("\nInternal error in re_match or re_search:"); - } - - if (internal_error_2) - { - if (!internal_error_1) - printf ("\n"); - - printf ("\nInternal error in re_match_2 or re_search_2:"); - } - - if ((OK_TO_SEARCH && ((match && !test_should_match) - || (!match && test_should_match)) - || (correct_regs && !all_regs_correct)) - || !nonconst_buf.fastmap_accurate - || invalid_pattern - || !pattern_should_be_valid - || internal_error_1 || internal_error_2 - || verbose) - { - if (OK_TO_SEARCH && match && !test_should_match) - { - printf ("\n\nMatched but shouldn't have:\n"); - if (match_1) - printf ("The single match/search succeeded.\n"); - - if (match_2) - printf ("The double match/search succeeded.\n"); - } - else if (OK_TO_SEARCH && !match && test_should_match) - { - printf ("\n\nDidn't match but should have:\n"); - if (!match_1) - printf ("The single match/search failed.\n"); - - if (!match_2) - printf ("The double match/search failed.\n"); - } - else if (invalid_pattern && pattern_should_be_valid) - printf ("\n\nInvalid pattern (%s):\n", r); - else if (!nonconst_buf.fastmap_accurate && pattern_should_be_valid) - printf ("\n\nIncorrect fastmap:\n"); - else if (OK_TO_SEARCH && correct_regs && !all_regs_correct) - printf ("\n\nNot all registers were correct:\n"); - else if (verbose) - printf ("\n\nTest was OK:\n"); - - - if ((!(invalid_pattern && !pattern_should_be_valid)) || verbose) - printf (" Pattern: `%s'.\n", pat); - - if (pattern_should_be_valid || verbose - || internal_error_1 || internal_error_2) - { - printf(" Strings: "); - printf ("`%s' and ", str1 == NULL ? "NULL" : str1); - printf ("`%s'.\n", str2 == NULL ? "NULL" : str2); - - if ((OK_TO_SEARCH || verbose || internal_error_1 || internal_error_2) - && !invalid_pattern) - { - if (memcmp (old_buf.buffer, nonconst_buf.buffer, - nonconst_buf.used) != 0 - && !invalid_pattern) - { - printf(" (%s)\n", r ? r : "Valid regular expression"); - printf ("\n Compiled pattern before matching: "); - print_compiled_pattern (&old_buf); - printf ("\n Compiled pattern after matching: "); - } - else - printf ("\n Compiled pattern: "); - - print_compiled_pattern (&nonconst_buf); - } - - if (correct_fastmap && (!nonconst_buf.fastmap_accurate || verbose)) - { - printf ("\n The fastmap should have been: "); - print_fastmap (correct_fastmap); - - printf ("\n Fastmap: "); - print_fastmap (fastmap); - - printf ("\n Compiled pattern before matching: "); - print_compiled_pattern (&nonconst_buf); - } - - if ((!all_regs_correct || verbose) && correct_regs) - { - unsigned this_reg; - printf ("\n Incorrect registers:"); - - for (this_reg = 0; this_reg < regs.num_regs; this_reg++) - { - if (!regs_correct[this_reg]) - { - printf ("\n Register %d's start was %2d. ", this_reg, - regs.start[this_reg]); - printf ("\tIt should have been %d.\n", - correct_regs->start[this_reg]); - printf (" Register %d's end was %2d. ", this_reg, - regs.end[this_reg]); - printf ("\tIt should have been %d.\n", - correct_regs->end[this_reg]); - } - } - } - } - } - - if (nonconst_buf.buffer != NULL) - free (nonconst_buf.buffer); - - if (OK_TO_SEARCH) - { - free (old_buf.buffer); - - if (correct_regs) - free (regs_correct); - - } - - nonconst_buf.buffer = old_buf.buffer = NULL; - regs_correct = NULL; - regs.start = regs.end = NULL; - -} /* general_test */ - - -void -test_search_return (match_start_wanted, pattern, string) - int match_start_wanted; - const char *pattern; - char *string; -{ - struct re_pattern_buffer buf; - char fastmap[1 << BYTEWIDTH]; - const char *compile_return; - int match_start; - static num_times_called = 0; - - num_times_called++; - buf.allocated = 1; - buf.buffer = xmalloc (buf.allocated); - - assert (pattern != NULL); - buf.translate = 0; - compile_return = re_compile_pattern (pattern, strlen (pattern), &buf); - - if (compile_return) - { - printf ("\n\nInvalid pattern in test_match_start:\n"); - printf ("%s\n", compile_return); - } - else - { - buf.fastmap = fastmap; - match_start = re_search (&buf, string, strlen (string), - 0, strlen (string), 0); - - if (match_start != match_start_wanted) - printf ("\nWanted search to start at %d but started at %d.\n", - match_start, match_start_wanted); - } - free (buf.buffer); - buf.buffer = NULL; -} - - -#define SET_FASTMAP() \ - { \ - unsigned this_char; \ - \ - memset (correct_fastmap, invert, (1 << BYTEWIDTH)); \ - \ - for (this_char = 0; this_char < strlen (fastmap_string); this_char++)\ - correct_fastmap[fastmap_string[this_char]] = !invert; \ - correct_fastmap['\n'] = match_newline; \ - } - - -void -test_fastmap (pat, fastmap_string, invert, match_newline) - const char *pat; - char *fastmap_string; - unsigned invert; - unsigned match_newline; -{ - char correct_fastmap[(1 << BYTEWIDTH)]; - - SET_FASTMAP (); - general_test (1, 0, pat, NULL, NULL, -1, 0, -1, correct_fastmap, 0, -1); -} - - -void -test_fastmap_search (pat, str, fastmap_string, invert, match_newline, - can_be_null, start0, end0) - const char *pat; - char *str; - char *fastmap_string; - unsigned invert; - unsigned match_newline; - int can_be_null; - int start0; - int end0; -{ - char correct_fastmap[(1 << BYTEWIDTH)]; - struct re_registers correct_regs; - - correct_regs.num_regs = RE_NREGS; - correct_regs.start = (int *) xmalloc (RE_NREGS * sizeof (int)); - correct_regs.end = (int *) xmalloc (RE_NREGS * sizeof (int)); - - set_all_registers (start0, end0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, &correct_regs); - SET_FASTMAP (); - general_test (1, 0, pat, str, NULL, 0, SAFE_STRLEN (str), SAFE_STRLEN (str), - correct_fastmap, &correct_regs, can_be_null); - - free (correct_regs.start); - free (correct_regs.end); -} - - - - -void -test_all_registers (pat, str1, str2, - start0, end0, start1, end1, - start2, end2, start3, end3, - start4, end4, start5, end5, - start6, end6, start7, end7, - start8, end8, start9, end9) - char *pat; char *str1; char *str2; - int start0; int end0; int start1; int end1; - int start2; int end2; int start3; int end3; - int start4; int end4; int start5; int end5; - int start6; int end6; int start7; int end7; - int start8; int end8; int start9; int end9; -{ - struct re_registers correct_regs; - - if (omit_register_tests) return; - - correct_regs.num_regs = RE_NREGS; - correct_regs.start = (int *) xmalloc (RE_NREGS * sizeof (int)); - correct_regs.end = (int *) xmalloc (RE_NREGS * sizeof (int)); - - set_all_registers (start0, end0, start1, end1, start2, end2, start3, end3, - start4, end4, start5, end5, start6, end6, start7, end7, - start8, end8, start9, end9, &correct_regs); - - general_test (1, 0, pat, str1, str2, 0, - SAFE_STRLEN (str1) + SAFE_STRLEN (str2), - SAFE_STRLEN (str1) + SAFE_STRLEN (str2), - NULL, &correct_regs, -1); - - free (correct_regs.start); - free (correct_regs.end); -} - - -void -invalid_pattern (error_code_expected, pattern) - int error_code_expected; - char *pattern; -{ - regex_t pattern_buffer; - int cflags - = re_syntax_options == RE_SYNTAX_POSIX_EXTENDED - || re_syntax_options == RE_SYNTAX_POSIX_MINIMAL_EXTENDED - ? REG_EXTENDED : 0; - - test_compile (0, error_code_expected, pattern, &pattern_buffer, cflags); -} - - -void -valid_pattern (pattern) - char *pattern; -{ - regex_t pattern_buffer; - int cflags - = re_syntax_options == RE_SYNTAX_POSIX_EXTENDED - || re_syntax_options == RE_SYNTAX_POSIX_MINIMAL_EXTENDED - ? REG_EXTENDED : 0; - - test_compile (1, 0, pattern, &pattern_buffer, cflags); -} - - -char * -delimiters_to_ops (source, left_delimiter, right_delimiter) - char *source; - char left_delimiter; - char right_delimiter; -{ - static char *answer = NULL; - char *tmp = NULL; - boolean double_size = false; - unsigned source_char; - unsigned answer_char = 0; - - assert (source != NULL); - - switch (left_delimiter) - { - case '(': if (!(re_syntax_options & RE_NO_BK_PARENS)) - double_size = true; - break; - case '{': if (!(re_syntax_options & RE_NO_BK_BRACES)) - double_size = true; - break; - default: printf ("Found strange delimiter %c in delimiter_to_ops.\n", - left_delimiter); - printf ("The source was `%s'\n", source); - exit (0); - } - - if (answer == source) - { - tmp = (char *) xmalloc (strlen (source) + 1); - strcpy (tmp, source); - source = tmp; - } - - if (answer) - { - free (answer); - answer = NULL; - } - - answer = (char *) xmalloc ((double_size - ? strlen (source) << 1 - : strlen (source)) - + 1); - if (!double_size) - strcpy (answer, source); - else - { - for (source_char = 0; source_char < strlen (source); source_char++) - { - if (source[source_char] == left_delimiter - || source[source_char] == right_delimiter) - answer[answer_char++] = '\\'; - - answer[answer_char++] = source[source_char]; - } - answer[answer_char] = 0; - } - - return answer; -} - - -void -print_pattern_info (pattern, pattern_buffer_ptr) - const char *pattern; - regex_t *pattern_buffer_ptr; -{ - printf (" Pattern: `%s'.\n", pattern); - printf (" Compiled pattern: "); - print_compiled_pattern (pattern_buffer_ptr); -} - - -void -valid_nonposix_pattern (pattern) - char *pattern; -{ - struct re_pattern_buffer nonconst_buf; - - nonconst_buf.allocated = 0; - nonconst_buf.buffer = NULL; - nonconst_buf.translate = NULL; - - assert (pattern != NULL); - - if (re_compile_pattern (pattern, strlen (pattern), &nonconst_buf)) - { - printf ("Couldn't compile the pattern.\n"); - print_pattern_info (pattern, &nonconst_buf); - } -} - - -void -compile_and_print_pattern (pattern) - char *pattern; -{ - struct re_pattern_buffer nonconst_buf; - - nonconst_buf.allocated = 0; - nonconst_buf.buffer = NULL; - - if (re_compile_pattern (pattern, strlen (pattern), &nonconst_buf)) - printf ("Couldn't compile the pattern.\n"); - - print_pattern_info (pattern, &nonconst_buf); -} - - -void -test_case_fold (pattern, string) - const char *pattern; - char* string; -{ - struct re_pattern_buffer nonconst_buf; - const char *ret; - - init_pattern_buffer (&nonconst_buf); - nonconst_buf.translate = upcase; - - assert (pattern != NULL); - ret = re_compile_pattern (pattern, strlen (pattern), &nonconst_buf); - - if (ret) - { - printf ("\nShould have been a valid pattern but wasn't.\n"); - print_pattern_info (pattern, &nonconst_buf); - } - else - { - if (test_should_match - && re_match (&nonconst_buf, string, strlen (string), 0, 0) - != strlen (string)) - { - printf ("Match failed for case fold.\n"); - printf (" Pattern: `%s'.\n", pattern); - printf (" String: `%s'.\n", string == NULL ? "NULL" : string); - } - } -} - - -void -test_match_n_times (n, pattern, string) - unsigned n; - char* pattern; - char* string; -{ - struct re_pattern_buffer buf; - const char *r; - unsigned match = 0; - unsigned this_match; - - buf.allocated = 0; - buf.buffer = NULL; - buf.translate = 0; - - assert (pattern != NULL); - - r = re_compile_pattern (pattern, strlen (pattern), &buf); - if (r) - { - printf ("Didn't compile.\n"); - printf (" Pattern: %s.\n", pattern); - } - else - { - for (this_match = 1; this_match <= n; this_match++) - match = (re_match (&buf, string, strlen (string), - 0, 0) - == strlen (string)); - - if (match && !test_should_match) - printf ("\n\nMatched but shouldn't have:\n"); - else if (!match && test_should_match) - printf ("\n\nDidn't match but should have:\n"); - - if ((match && !test_should_match) || (!match && test_should_match)) - { - printf(" The string to match was: "); - if (string) - printf ("`%s' and ", string); - else - printf ("`'"); - - printf (" Pattern: %s.\n", pattern); - printf (" Compiled pattern: %s.\n", pattern); - print_compiled_pattern (&buf); - } - } -} - - -void -test_match_2 (pat, str1, str2) - const char *pat; - char *str1; - char *str2; -{ - general_test (1, 1, pat, str1, str2, 0, 1, - SAFE_STRLEN (str1) + SAFE_STRLEN (str2), NULL, 0, -1); -} - -void -test_match (pat, str) - const char *pat; - char *str; -{ - test_match_2 (pat, str, NULL); - test_match_2 (pat, NULL, str); -} diff --git a/gnu/libregex/test/test.h b/gnu/libregex/test/test.h deleted file mode 100644 index fb67126547cc..000000000000 --- a/gnu/libregex/test/test.h +++ /dev/null @@ -1,141 +0,0 @@ -/* test.h: for Regex testing. */ - -#ifndef TEST_H -#define TEST_H - -#include <stdio.h> -#include <assert.h> - -#include <sys/types.h> -#include "regex.h" - - -/* A strlen that works even on a null pointer. */ -#define SAFE_STRLEN(s) (s == NULL ? 0 : strlen (s)) - -typedef enum { false = 0, true = 1 } boolean; - -extern boolean test_should_match; -extern boolean omit_register_tests; -extern void *xmalloc (); - -/* Defined in upcase.c. */ -extern char upcase[]; - -typedef enum -{ - all_test, - other_test, - posix_basic_test, - posix_extended_test, - posix_interface_test, - regress_test -} test_type; - -extern test_type t; - - -#if __STDC__ - -extern char *concat (char *, char *); - -extern void general_test (unsigned pattern_should_be_valid, - unsigned match_whole_string, - const char *pat, char *str1, char *str2, - int start, int range, int end, - char *correct_fastmap, - struct re_registers *correct_regs, int can_be_null); - - -extern void init_pattern_buffer (regex_t *pattern_buffer_ptr); - -extern void test_compile (unsigned valid_pattern, int error_code_expected, - const char *pattern, regex_t *pattern_buffer_ptr, - int cflags); - -extern char *delimiter_to_ops (char *source, char left_delimiter, - char right_delimiter); - - -extern void test_search_return (int, const char *, char *); - -extern void test_berk_search (const char *pattern, char *string); - -extern void test_fastmap (const char *pat, char *fastmap_string, unsigned invert, - unsigned match_newline); - -extern void test_fastmap_search (const char *pat, char *str, char *fastmap_string, - unsigned invert, unsigned match_newline, - int can_be_null, int start0, int end0); - -extern void test_all_registers (char *pat, char *str1, char *str2, - int start0, int end0, int start1, int end1, - int start2, int end2, int start3, int end3, - int start4, int end4, int start5, int end5, - int start6, int end6, int start7, int end7, - int start8, int end8, int start9, int end9); - -extern void print_pattern_info (const char *pattern, regex_t *pattern_buffer_ptr); -extern void compile_and_print_pattern (char *pattern); - -extern void test_case_fold (const char *pattern, char* string); - -extern void test_posix_generic (); - -extern void test_grouping (); - -extern void invalid_pattern (int error_code_expected, char *pattern); -extern void valid_nonposix_pattern (char *pattern); -extern void valid_pattern (char *pattern); - -extern void test_match_2 (const char *pat, char *str1, char *str2); -extern void test_match (const char *pat, char *str); - -#endif /* __STDC__ */ - - -#define TEST_REGISTERS_2(pat, str1, str2, start0, end0, start1, end1, start2, end2)\ - if (!omit_register_tests) \ - test_all_registers (pat, str1, str2, start0, end0, start1, end1, \ - start2, end2, -1, -1, -1, -1, -1, -1, -1, -1,\ - -1, -1, -1, -1, -1, -1) \ - - -#define TEST_REGISTERS(pat, str, start0, end0, start1, end1, start2, end2) \ - TEST_REGISTERS_2 (pat, str, NULL, start0, end0, start1, end1, start2, end2)\ - -#define BRACES_TO_OPS(string) ((char *) delimiters_to_ops (string, '{', '}')) -#define PARENS_TO_OPS(string) ((char *) delimiters_to_ops (string, '(', ')')) - -#define INVALID_PATTERN(pat) \ - general_test (0, 0, pat, NULL, NULL, -1, 0, -1, NULL, 0, -1) - - -#define MATCH_SELF(p) test_match (p, p) - -#define TEST_POSITIONED_MATCH(pat, str, start) \ - general_test (1, 0, pat, str, NULL, start, 1, SAFE_STRLEN (str), \ - NULL, 0, -1) - -#define TEST_TRUNCATED_MATCH(pat, str, end) \ - general_test (1, 0, pat, str, NULL, 0, 1, end, NULL, 0, -1) - -#define TEST_SEARCH_2(pat, str1, str2, start, range, one_past_end) \ - general_test (1, 0, pat, str1, str2, start, range, one_past_end, \ - NULL, 0, -1) - -#define TEST_SEARCH(pat, str, start, range) \ - { \ - TEST_SEARCH_2 (pat, str, NULL, start, range, SAFE_STRLEN (str)); \ - TEST_SEARCH_2 (pat, NULL, str, start, range, SAFE_STRLEN (str)); \ - } - -#endif /* TEST_H */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/libregex/test/tregress.c b/gnu/libregex/test/tregress.c deleted file mode 100644 index 7858cac1502b..000000000000 --- a/gnu/libregex/test/tregress.c +++ /dev/null @@ -1,464 +0,0 @@ -/* tregress.c: reported bugs. The `t' just makes the filename not have - a common prefix with `regex.c', so completion works better. */ - -#include "test.h" - - -boolean pause_at_error = true; - -char * -itoa (i) - int i; -{ - char *a = xmalloc (21); /* sign + 19 digits (enough for 64 bits) + null */ - - sprintf (a, "%d", i); - return a; -} - - -static void -simple_fail (routine, pat, buf, str, ret) - const char *routine; - const char *pat; - struct re_pattern_buffer *buf; - const char *str; - char *ret; -{ - fprintf (stderr, "Failed %s (return = %s).\n", routine, ret); - if (str && *str) fprintf (stderr, " String = %s\n", str); - fprintf (stderr, " Pattern = %s\n", pat); - print_compiled_pattern (buf); - - if (pause_at_error) - { - fprintf (stderr, "RET to continue: "); - (void) getchar (); - } -} - - -/* Abbreviate the most common calls. */ - -static void -simple_compile (pat, buf) - const char *pat; - struct re_pattern_buffer *buf; -{ - const char *ret = re_compile_pattern (pat, strlen (pat), buf); - - if (ret != NULL) simple_fail ("compile", pat, buf, NULL, ret); -} - - -static void -simple_fastmap (pat) - const char *pat; -{ - struct re_pattern_buffer buf; - char fastmap[256]; - int ret; - - buf.allocated = 0; - buf.buffer = buf.translate = NULL; - buf.fastmap = fastmap; - - simple_compile (pat, &buf); - - ret = re_compile_fastmap (&buf); - - if (ret != 0) simple_fail ("fastmap compile", pat, &buf, NULL, itoa (ret)); -} - - -#define SIMPLE_MATCH(pat, str) do_match (pat, str, strlen (str)) -#define SIMPLE_NONMATCH(pat, str) do_match (pat, str, -1) - -static void -do_match (pat, str, expected) - const char *pat, *str; - int expected; -{ - int ret; - unsigned len; - struct re_pattern_buffer buf; - - buf.allocated = 0; - buf.buffer = buf.translate = buf.fastmap = NULL; - - simple_compile (pat, &buf); - - len = strlen (str); - - ret = re_match_2 (&buf, NULL, 0, str, len, 0, NULL, len); - - if (ret != expected) simple_fail ("match", pat, &buf, str, itoa (ret)); -} - - -static void -simple_search (pat, str, correct_startpos) - const char *pat, *str; - int correct_startpos; -{ - int ret; - unsigned len; - struct re_pattern_buffer buf; - - buf.allocated = 0; - buf.buffer = buf.translate = buf.fastmap = NULL; - - simple_compile (pat, &buf); - - len = strlen (str); - - ret = re_search_2 (&buf, NULL, 0, str, len, 0, len, NULL, len); - - if (ret != correct_startpos) - simple_fail ("match", pat, &buf, str, itoa (ret)); -} - -/* Past bugs people have reported. */ - -void -test_regress () -{ - extern char upcase[]; - struct re_pattern_buffer buf; - unsigned len; - struct re_registers regs; - int ret; - char *fastmap = xmalloc (256); - - buf.translate = NULL; - buf.fastmap = NULL; - buf.allocated = 0; - buf.buffer = NULL; - - printf ("\nStarting regression tests.\n"); - t = regress_test; - - test_should_match = true; - re_set_syntax (RE_SYNTAX_EMACS); - - /* enami@sys.ptg.sony.co.jp 10 Nov 92 15:19:02 JST */ - buf.translate = upcase; - SIMPLE_MATCH ("[A-[]", "A"); - buf.translate = NULL; - - /* meyering@cs.utexas.edu Nov 6 22:34:41 1992 */ - simple_search ("\\w+", "a", 0); - - /* jimb@occs.cs.oberlin.edu 10 Sep 92 00:42:33 */ - buf.translate = upcase; - SIMPLE_MATCH ("[\001-\377]", "\001"); - SIMPLE_MATCH ("[\001-\377]", "a"); - SIMPLE_MATCH ("[\001-\377]", "\377"); - buf.translate = NULL; - - /* mike@skinner.cs.uoregon.edu 1 Sep 92 01:45:22 */ - SIMPLE_MATCH ("^^$", "^"); - - /* pclink@qld.tne.oz.au Sep 7 22:42:36 1992 */ - re_set_syntax (RE_INTERVALS); - SIMPLE_MATCH ("^a\\{3\\}$", "aaa"); - SIMPLE_NONMATCH ("^a\\{3\\}$", "aa"); - re_set_syntax (RE_SYNTAX_EMACS); - - /* pclink@qld.tne.oz.au, 31 Aug 92. (conjecture) */ - re_set_syntax (RE_INTERVALS); - simple_search ("a\\{1,3\\}b", "aaab", 0); - simple_search ("a\\{1,3\\}b", "aaaab", 1); - re_set_syntax (RE_SYNTAX_EMACS); - - /* trq@dionysos.thphys.ox.ac.uk, 31 Aug 92. (simplified) */ - simple_fastmap ("^.*\n[ ]*"); - - /* wind!greg@plains.NoDak.edu, 25 Aug 92. (simplified) */ - re_set_syntax (RE_INTERVALS); - SIMPLE_MATCH ("[a-zA-Z]*.\\{5\\}", "xN0000"); - SIMPLE_MATCH ("[a-zA-Z]*.\\{5\\}$", "systemxN0000"); - SIMPLE_MATCH ("\\([a-zA-Z]*\\).\\{5\\}$", "systemxN0000"); - re_set_syntax (RE_SYNTAX_EMACS); - - /* jimb, 18 Aug 92. Don't use \000, so `strlen' (in our testing - routines) will work. (This still tickles the bug jimb reported.) */ - SIMPLE_MATCH ("[\001-\377]", "\001"); - SIMPLE_MATCH ("[\001-\377]", "a"); - SIMPLE_MATCH ("[\001-\377]", "\377"); - - /* jimb, 13 Aug 92. */ - SIMPLE_MATCH ("[\001-\177]", "\177"); - - /* Tests based on bwoelfel's below. */ - SIMPLE_MATCH ("\\(a\\|ab\\)*", "aab"); - SIMPLE_MATCH ("\\(a\\|ab\\)+", "aab"); - SIMPLE_MATCH ("\\(a*\\|ab\\)+", "aab"); - SIMPLE_MATCH ("\\(a+\\|ab\\)+", "aab"); - SIMPLE_MATCH ("\\(a?\\|ab\\)+", "aab"); - - /* bwoelfel@widget.seas.upenn.edu, 25 Jul 92. */ - SIMPLE_MATCH ("^\\([ab]+\\|bc\\)+", "abc"); - - /* jla, 3 Jul 92. Core dump in re_search_2. */ - buf.fastmap = fastmap; - buf.translate = upcase; -#define DATEDUMP_PATTERN " *[0-9]*:" - if (re_compile_pattern (DATEDUMP_PATTERN, strlen (DATEDUMP_PATTERN), &buf) - != NULL) - printf ("date dump compile failed.\n"); - regs.num_regs = 0; - regs.start = regs.end = NULL; - if (re_search_2 (&buf, NULL, 0, "Thu Jul 2 18:34:18 1992", - 24, 3, 21, ®s, 24) != 10) - printf ("date dump search failed.\n"); - buf.fastmap = 0; - buf.translate = 0; - - - /* rms, 4 Jul 1992. Pattern is much slower in Emacs 19. Fastmap - should be only a backslash. */ -#define BEGINEND_PATTERN "\\(\\\\begin\\s *{\\)\\|\\(\\\\end\\s *{\\)" - test_fastmap (BEGINEND_PATTERN, "\\", false, 0); - - - /* kaoru@is.s.u-tokyo.ac.jp, 27 Jun 1992. Code for [a-z] (in regex.c) - should translate the whole set. */ - buf.translate = upcase; -#define CASE_SET_PATTERN "[ -`]" - if (re_compile_pattern (CASE_SET_PATTERN, strlen (CASE_SET_PATTERN), &buf) - != NULL) - printf ("case set compile failed.\n"); - if (re_match_2 (&buf, "K", 1, "", 0, 0, NULL, 1) != 1) - printf ("case set match failed.\n"); - -#define CASE_SET_PATTERN2 "[`-|]" - if (re_compile_pattern (CASE_SET_PATTERN2, strlen (CASE_SET_PATTERN2), &buf) - != NULL) - printf ("case set2 compile failed.\n"); - if (re_match_2 (&buf, "K", 1, "", 0, 0, NULL, 1) != 1) - printf ("case set2 match failed.\n"); - - buf.translate = NULL; - - - /* jimb, 27 Jun 92. Problems with gaps in the string. */ -#define GAP_PATTERN "x.*y.*z" - if (re_compile_pattern (GAP_PATTERN, strlen (GAP_PATTERN), &buf) != NULL) - printf ("gap didn't compile.\n"); - if (re_match_2 (&buf, "x-", 2, "y-z-", 4, 0, NULL, 6) != 5) - printf ("gap match failed.\n"); - - - /* jimb, 19 Jun 92. Since `beginning of word' matches at the - beginning of the string, then searching ought to find it there. - If `re_compile_fastmap' is not called, then it works ok. */ - buf.fastmap = fastmap; -#define BOW_BEG_PATTERN "\\<" - if (re_compile_pattern (BOW_BEG_PATTERN, strlen (BOW_BEG_PATTERN), &buf) - != NULL) - printf ("begword-begstring didn't compile.\n"); - if (re_search (&buf, "foo", 3, 0, 3, NULL) != 0) - printf ("begword-begstring search failed.\n"); - - /* Same bug report, different null-matching pattern. */ -#define EMPTY_ANCHOR_PATTERN "^$" - if (re_compile_pattern (EMPTY_ANCHOR_PATTERN, strlen (EMPTY_ANCHOR_PATTERN), - &buf) != NULL) - printf ("empty anchor didn't compile.\n"); - if (re_search (&buf, "foo\n\nbar", 8, 0, 8, NULL) != 4) - printf ("empty anchor search failed.\n"); - - /* jimb@occs.cs.oberlin.edu, 21 Apr 92. After we first allocate - registers for a particular re_pattern_buffer, we might have to - reallocate more registers on subsequent calls -- and we should be - reusing the same memory. */ -#define ALLOC_REG_PATTERN "\\(abc\\)" - free (buf.fastmap); - buf.fastmap = 0; - if (re_compile_pattern (ALLOC_REG_PATTERN, strlen (ALLOC_REG_PATTERN), &buf) - != NULL) - printf ("register allocation didn't compile.\n"); - if (re_match (&buf, "abc", 3, 0, ®s) != 3) - printf ("register allocation didn't match.\n"); - if (regs.start[1] != 0 || regs.end[1] != 3) - printf ("register allocation reg #1 wrong.\n"); - - { - int *old_regstart = regs.start; - int *old_regend = regs.end; - - if (re_match (&buf, "abc", 3, 0, ®s) != 3) - printf ("register reallocation didn't match.\n"); - if (regs.start[1] != 0 || regs.end[1] != 3 - || old_regstart[1] != 0 || old_regend[1] != 3 - || regs.start != old_regstart || regs.end != old_regend) - printf ("register reallocation registers wrong.\n"); - } - - /* jskudlarek@std.MENTORG.COM, 21 Apr 92 (string-match). */ -#define JSKUD_PATTERN "[^/]+\\(/[^/.]+\\)?/[0-9]+$" - if (re_compile_pattern (JSKUD_PATTERN, strlen (JSKUD_PATTERN), &buf) != NULL) - printf ("jskud test didn't compile.\n"); - if (re_search (&buf, "a/1", 3, 0, 3, ®s) != 0) - printf ("jskud test didn't match.\n"); - if (regs.start[1] != -1 || regs.end[1] != -1) - printf ("jskud test, reg #1 wrong.\n"); - - /* jla's bug (with string-match), 5 Feb 92. */ - TEST_SEARCH ("\\`[ \t\n]*", "jla@challenger (Joseph Arceneaux)", 0, 100); - - /* jwz@lucid.com, 8 March 1992 (re-search-forward). (His is the - second.) These are not supposed to match. */ -#if 0 - /* This one fails quickly, because we can change the maybe_pop_jump - from the + to a pop_failure_pop, because of the c's. */ - TEST_SEARCH ("^\\(To\\|CC\\):\\([^c]*\\)+co", -"To: hbs%titanic@lucid.com (Harlan Sexton)\n\ -Cc: eb@thalidomide, jlm@thalidomide\n\ -Subject: Re: so is this really as horrible an idea as it seems to me?\n\ -In-Reply-To: Harlan Sexton's message of Sun 8-Mar-92 11:00:06 PST <9203081900.AA24794@titanic.lucid>\n\ -References: <9203080736.AA05869@thalidomide.lucid>\n\ - <9203081900.AA24794@titanic.lucid>", 0, 5000); - - /* This one takes a long, long time to complete, because we have to - keep the failure points around because we might backtrack. */ - TEST_SEARCH ("^\\(To\\|CC\\):\\(.*\n.*\\)+co", - /* "X-Windows: The joke that kills.\n\ -FCC: /u/jwz/VM/inbox\n\ -From: Jamie Zawinski <jwz@lucid.com>\n\ */ -"To: hbs%titanic@lucid.com (Harlan Sexton)\n\ -Cc: eb@thalidomide, jlm@thalidomide\n\ -Subject: Re: so is this really as horrible an idea as it seems to me?\n\ -In-Reply-To: Harlan Sexton's message of Sun 8-Mar-92 11:00:06 PST <9203081900.AA24794@titanic.lucid>\n\ -References: <9203080736.AA05869@thalidomide.lucid>\n\ - <9203081900.AA24794@titanic.lucid>", 0, 5000); -#endif /* 0 [failed searches] */ - - - /* macrakis' bugs. */ - buf.translate = upcase; /* message of 24 Jan 91 */ - if (re_compile_pattern ("[!-`]", 5, &buf) != NULL) - printf ("Range test didn't compile.\n"); - if (re_match (&buf, "A", 1, 0, NULL) != 1) - printf ("Range test #1 didn't match.\n"); - if (re_match (&buf, "a", 1, 0, NULL) != 1) - printf ("Range test #2 didn't match.\n"); - - buf.translate = 0; -#define FAO_PATTERN "\\(f\\(.\\)o\\)+" - if (re_compile_pattern (FAO_PATTERN, strlen (FAO_PATTERN), &buf) != NULL) - printf ("faofdx test didn't compile.\n"); - if (re_search (&buf, "faofdx", 6, 0, 6, ®s) != 0) - printf ("faofdx test didn't match.\n"); - if (regs.start[1] != 0 || regs.end[1] != 3) - printf ("faofdx test, reg #1 wrong.\n"); - if (regs.start[2] != 1 || regs.end[2] != 2) - printf ("faofdx test, reg #2 wrong.\n"); - - TEST_REGISTERS ("\\(a\\)*a", "aaa", 0, 3, 1, 2, -1, -1); - test_fastmap ("^\\([^ \n]+:\n\\)+\\([^ \n]+:\\)", " \n", 1, 0); - - /* 40 lines, 48 a's in each line. */ - test_match ("^\\([^ \n]+:\n\\)+\\([^ \n]+:\\)", - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:"); - - /* 640 a's followed by one b, twice. */ - test_match ("\\(.*\\)\\1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"); - - /* 640 a's followed by two b's, twice. */ - test_match ("\\(.*\\)\\1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb"); - - - /* Dave G. bug: Reference to a subexpression which didn't match. - Should fail. */ - re_set_syntax (RE_NO_BK_PARENS | RE_NO_BK_VBAR); - test_match ("(ooooooooooone())-annnnnnnnnnnd-(twooooooooooo\\2)", - "ooooooooooone-annnnnnnnnnnd-twooooooooooo"); - test_match ("(o|t)", "o"); - test_match ("(o()|t)", "o"); - test_match ("(o|t)", "o"); - test_match ("(ooooooooooooooo|tttttttttttttttt())", "ooooooooooooooo"); - test_match ("(o|t())", "o"); - test_match ("(o()|t())", "o"); - test_match ("(ooooooooooooooooooooooooone()|twooooooooooooooooooooooooo())", "ooooooooooooooooooooooooone"); - test_match ("(o()|t())-a-(t\\2|f\\3)", "o-a-t"); - test_match ("(o()|t())-a-(t\\2|f\\3)", "t-a-f"); - - test_should_match = 0; - test_match ("(foo(bar)|second)\\2", "second"); - test_match ("(o()|t())-a-(t\\2|f\\3)", "t-a-t"); - test_match ("(o()|t())-a-(t\\2|f\\3)", "o-a-f"); - - re_set_syntax (RE_SYNTAX_EMACS); - test_match ("\\(foo\\(bar\\)\\|second\\)\\2", "secondbar"); - test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)", - "one-and-four"); - test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)", - "two-and-three"); - - test_should_match = 1; - re_set_syntax (RE_SYNTAX_EMACS); - test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)", - "one-and-three"); - test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)", - "two-and-four"); - - TEST_REGISTERS (":\\(.*\\)", ":/", 0, 2, 1, 2, -1, -1); - - /* Bug with `upcase' translation table, from Nico Josuttis - <nico@bredex.de> */ - test_should_match = 1; - test_case_fold ("[a-a]", "a"); - - printf ("\nFinished regression tests.\n"); -} - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/libregex/test/upcase.c b/gnu/libregex/test/upcase.c deleted file mode 100644 index 5147b812aaae..000000000000 --- a/gnu/libregex/test/upcase.c +++ /dev/null @@ -1,39 +0,0 @@ -/* Indexed by a character, gives the upper case equivalent of the - character. */ - -char upcase[0400] = - { 000, 001, 002, 003, 004, 005, 006, 007, - 010, 011, 012, 013, 014, 015, 016, 017, - 020, 021, 022, 023, 024, 025, 026, 027, - 030, 031, 032, 033, 034, 035, 036, 037, - 040, 041, 042, 043, 044, 045, 046, 047, - 050, 051, 052, 053, 054, 055, 056, 057, - 060, 061, 062, 063, 064, 065, 066, 067, - 070, 071, 072, 073, 074, 075, 076, 077, - 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, - 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, - 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, - 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, - 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, - 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, - 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, - 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, - 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, - 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, - 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, - 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, - 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, - 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, - 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, - 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, - 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, - 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, - 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, - 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, - 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, - 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, - 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, - 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 - }; - - diff --git a/gnu/libregex/test/xmalloc.c b/gnu/libregex/test/xmalloc.c deleted file mode 100644 index 88be1a668bd3..000000000000 --- a/gnu/libregex/test/xmalloc.c +++ /dev/null @@ -1,21 +0,0 @@ -#include <stdio.h> -extern char *malloc (); - -#ifndef NULL -#define NULL 0 -#endif - -void * -xmalloc (size) - unsigned size; -{ - char *new_mem = malloc (size); - - if (new_mem == NULL) - { - fprintf (stderr, "xmalloc: request for %u bytes failed.\n", size); - abort (); - } - - return new_mem; -} |
