aboutsummaryrefslogtreecommitdiff
path: root/gnu
diff options
context:
space:
mode:
authorJordan K. Hubbard <jkh@FreeBSD.org>1993-06-18 04:22:21 +0000
committerJordan K. Hubbard <jkh@FreeBSD.org>1993-06-18 04:22:21 +0000
commitb76095a4307cc94ec7cd722853f9b032e45e6ea4 (patch)
tree890f91d43eec35dc2f71a54410491f6503ca5b38 /gnu
parent7c434002a4e47486e9a2d7b2f32b1ddf42d37e2a (diff)
Notes
Diffstat (limited to 'gnu')
-rw-r--r--gnu/COPYING339
-rw-r--r--gnu/Makefile5
-rw-r--r--gnu/usr.bin/awk/ACKNOWLEDGMENT21
-rw-r--r--gnu/usr.bin/awk/COPYING340
-rw-r--r--gnu/usr.bin/awk/FUTURES120
-rw-r--r--gnu/usr.bin/awk/LIMITATIONS14
-rw-r--r--gnu/usr.bin/awk/Makefile13
-rw-r--r--gnu/usr.bin/awk/NEWS1295
-rw-r--r--gnu/usr.bin/awk/PORTS32
-rw-r--r--gnu/usr.bin/awk/POSIX95
-rw-r--r--gnu/usr.bin/awk/PROBLEMS6
-rw-r--r--gnu/usr.bin/awk/README116
-rw-r--r--gnu/usr.bin/awk/array.c293
-rw-r--r--gnu/usr.bin/awk/awk.11873
-rw-r--r--gnu/usr.bin/awk/awk.h763
-rw-r--r--gnu/usr.bin/awk/awk.y1804
-rw-r--r--gnu/usr.bin/awk/builtin.c1133
-rw-r--r--gnu/usr.bin/awk/config.h272
-rw-r--r--gnu/usr.bin/awk/dfa.c2291
-rw-r--r--gnu/usr.bin/awk/dfa.h543
-rw-r--r--gnu/usr.bin/awk/eval.c1225
-rw-r--r--gnu/usr.bin/awk/field.c645
-rw-r--r--gnu/usr.bin/awk/gawk.texi11270
-rw-r--r--gnu/usr.bin/awk/getopt.c662
-rw-r--r--gnu/usr.bin/awk/getopt.h128
-rw-r--r--gnu/usr.bin/awk/getopt1.c160
-rw-r--r--gnu/usr.bin/awk/io.c1207
-rw-r--r--gnu/usr.bin/awk/iop.c318
-rw-r--r--gnu/usr.bin/awk/main.c731
-rw-r--r--gnu/usr.bin/awk/msg.c106
-rw-r--r--gnu/usr.bin/awk/node.c429
-rw-r--r--gnu/usr.bin/awk/patchlevel.h1
-rw-r--r--gnu/usr.bin/awk/protos.h115
-rw-r--r--gnu/usr.bin/awk/re.c208
-rw-r--r--gnu/usr.bin/awk/regex.c2854
-rw-r--r--gnu/usr.bin/awk/regex.h260
-rw-r--r--gnu/usr.bin/awk/version.c46
-rw-r--r--gnu/usr.bin/rcs/Makefile3
-rw-r--r--gnu/usr.bin/rcs/Makefile.inc3
-rw-r--r--gnu/usr.bin/rcs/ci/Makefile7
-rw-r--r--gnu/usr.bin/rcs/ci/ci.1772
-rw-r--r--gnu/usr.bin/rcs/ci/ci.c1165
-rw-r--r--gnu/usr.bin/rcs/co/Makefile7
-rw-r--r--gnu/usr.bin/rcs/co/co.1569
-rw-r--r--gnu/usr.bin/rcs/co/co.c769
-rw-r--r--gnu/usr.bin/rcs/doc/rcs.ms1524
-rw-r--r--gnu/usr.bin/rcs/doc/rcs_func.ms95
-rw-r--r--gnu/usr.bin/rcs/ident/Makefile7
-rw-r--r--gnu/usr.bin/rcs/ident/ident.176
-rw-r--r--gnu/usr.bin/rcs/ident/ident.c214
-rw-r--r--gnu/usr.bin/rcs/lib/Makefile5
-rw-r--r--gnu/usr.bin/rcs/lib/conf.h495
-rw-r--r--gnu/usr.bin/rcs/lib/maketime.c344
-rw-r--r--gnu/usr.bin/rcs/lib/merger.c139
-rw-r--r--gnu/usr.bin/rcs/lib/partime.c639
-rw-r--r--gnu/usr.bin/rcs/lib/rcsbase.h677
-rw-r--r--gnu/usr.bin/rcs/lib/rcsedit.c1656
-rw-r--r--gnu/usr.bin/rcs/lib/rcsfcmp.c321
-rw-r--r--gnu/usr.bin/rcs/lib/rcsfnms.c1088
-rw-r--r--gnu/usr.bin/rcs/lib/rcsgen.c432
-rw-r--r--gnu/usr.bin/rcs/lib/rcskeep.c422
-rw-r--r--gnu/usr.bin/rcs/lib/rcskeys.c102
-rw-r--r--gnu/usr.bin/rcs/lib/rcslex.c1241
-rw-r--r--gnu/usr.bin/rcs/lib/rcsmap.c68
-rw-r--r--gnu/usr.bin/rcs/lib/rcsrev.c790
-rw-r--r--gnu/usr.bin/rcs/lib/rcssyn.c857
-rw-r--r--gnu/usr.bin/rcs/lib/rcsutil.c994
-rw-r--r--gnu/usr.bin/rcs/merge/Makefile7
-rw-r--r--gnu/usr.bin/rcs/merge/merge.1102
-rw-r--r--gnu/usr.bin/rcs/merge/merge.c97
-rw-r--r--gnu/usr.bin/rcs/rcs/Makefile10
-rw-r--r--gnu/usr.bin/rcs/rcs/rcs.1397
-rw-r--r--gnu/usr.bin/rcs/rcs/rcs.c1554
-rw-r--r--gnu/usr.bin/rcs/rcs/rcsfile.5224
-rw-r--r--gnu/usr.bin/rcs/rcs/rcsintro.1292
-rw-r--r--gnu/usr.bin/rcs/rcsclean/Makefile7
-rw-r--r--gnu/usr.bin/rcs/rcsclean/rcsclean.1177
-rw-r--r--gnu/usr.bin/rcs/rcsclean/rcsclean.c297
-rw-r--r--gnu/usr.bin/rcs/rcsdiff/Makefile7
-rw-r--r--gnu/usr.bin/rcs/rcsdiff/rcsdiff.1152
-rw-r--r--gnu/usr.bin/rcs/rcsdiff/rcsdiff.c422
-rw-r--r--gnu/usr.bin/rcs/rcsfreeze/Makefile7
-rw-r--r--gnu/usr.bin/rcs/rcsfreeze/rcsfreeze.168
-rw-r--r--gnu/usr.bin/rcs/rcsfreeze/rcsfreeze.sh100
-rw-r--r--gnu/usr.bin/rcs/rcsmerge/Makefile7
-rw-r--r--gnu/usr.bin/rcs/rcsmerge/rcsmerge.1140
-rw-r--r--gnu/usr.bin/rcs/rcsmerge/rcsmerge.c252
-rwxr-xr-xgnu/usr.bin/rcs/rcstest397
-rw-r--r--gnu/usr.bin/rcs/rlog/Makefile7
-rw-r--r--gnu/usr.bin/rcs/rlog/rlog.1260
-rw-r--r--gnu/usr.bin/rcs/rlog/rlog.c1204
-rw-r--r--gnu/usr.bin/tar/COPYING339
-rw-r--r--gnu/usr.bin/tar/ChangeLog1732
-rw-r--r--gnu/usr.bin/tar/Makefile14
-rw-r--r--gnu/usr.bin/tar/Makefile.gnu185
-rw-r--r--gnu/usr.bin/tar/README40
-rw-r--r--gnu/usr.bin/tar/buffer.c1584
-rw-r--r--gnu/usr.bin/tar/create.c1454
-rw-r--r--gnu/usr.bin/tar/diffarch.c759
-rw-r--r--gnu/usr.bin/tar/extract.c907
-rw-r--r--gnu/usr.bin/tar/fnmatch.c173
-rw-r--r--gnu/usr.bin/tar/fnmatch.h62
-rw-r--r--gnu/usr.bin/tar/getdate.y969
-rw-r--r--gnu/usr.bin/tar/getoldopt.c96
-rw-r--r--gnu/usr.bin/tar/getopt.c712
-rw-r--r--gnu/usr.bin/tar/getopt.h125
-rw-r--r--gnu/usr.bin/tar/getopt1.c161
-rw-r--r--gnu/usr.bin/tar/getpagesize.h38
-rw-r--r--gnu/usr.bin/tar/gnu.c677
-rw-r--r--gnu/usr.bin/tar/list.c881
-rw-r--r--gnu/usr.bin/tar/mangle.c270
-rw-r--r--gnu/usr.bin/tar/msd_dir.h44
-rw-r--r--gnu/usr.bin/tar/names.c149
-rw-r--r--gnu/usr.bin/tar/open3.h67
-rw-r--r--gnu/usr.bin/tar/pathmax.h53
-rw-r--r--gnu/usr.bin/tar/port.c1256
-rw-r--r--gnu/usr.bin/tar/port.h215
-rw-r--r--gnu/usr.bin/tar/regex.c4932
-rw-r--r--gnu/usr.bin/tar/regex.h490
-rw-r--r--gnu/usr.bin/tar/rmt.h98
-rw-r--r--gnu/usr.bin/tar/rtapelib.c582
-rw-r--r--gnu/usr.bin/tar/tar.c1504
-rw-r--r--gnu/usr.bin/tar/tar.h291
-rw-r--r--gnu/usr.bin/tar/update.c585
-rw-r--r--gnu/usr.bin/tar/version.c1
-rw-r--r--gnu/usr.bin/tar/y.tab.h18
126 files changed, 74864 insertions, 0 deletions
diff --git a/gnu/COPYING b/gnu/COPYING
new file mode 100644
index 000000000000..a43ea2126fb6
--- /dev/null
+++ b/gnu/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/gnu/Makefile b/gnu/Makefile
new file mode 100644
index 000000000000..7da2c3237654
--- /dev/null
+++ b/gnu/Makefile
@@ -0,0 +1,5 @@
+# @(#)Makefile 5.33.1.1 (Berkeley) 5/6/91
+
+SUBDIR= gawk groff tar
+
+.include <bsd.subdir.mk>
diff --git a/gnu/usr.bin/awk/ACKNOWLEDGMENT b/gnu/usr.bin/awk/ACKNOWLEDGMENT
new file mode 100644
index 000000000000..b6c3b0b0c692
--- /dev/null
+++ b/gnu/usr.bin/awk/ACKNOWLEDGMENT
@@ -0,0 +1,21 @@
+The current developers of Gawk would like to thank and acknowledge the
+many people who have contributed to the development through bug reports
+and fixes and suggestions. Unfortunately, we have not been organized
+enough to keep track of all the names -- for that we apologize.
+
+Another group of people have assisted even more by porting Gawk to new
+platforms and providing a great deal of feedback. They are:
+
+ Hal Peterson <hrp@pecan.cray.com> (Cray)
+ Pat Rankin <gawk.rankin@EQL.Caltech.Edu> (VMS)
+ Michal Jaegermann <NTOMCZAK@vm.ucs.UAlberta.CA> (Atari, NeXT, DEC 3100)
+ Mike Lijewski <mjlx@eagle.cnsf.cornell.edu> (IBM RS6000)
+ Scott Deifik <scottd@amgen.com> (MSDOS 2.14)
+ Kent Williams (MSDOS 2.11)
+ Conrad Kwok (MSDOS earlier versions)
+ Scott Garfinkle (MSDOS earlier versions)
+
+Last, but far from least, we would like to thank Brian Kernighan who
+has helped to clear up many dark corners of the language and provided a
+restraining touch when we have been overly tempted by "feeping
+creaturism".
diff --git a/gnu/usr.bin/awk/COPYING b/gnu/usr.bin/awk/COPYING
new file mode 100644
index 000000000000..3358a7be862a
--- /dev/null
+++ b/gnu/usr.bin/awk/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
diff --git a/gnu/usr.bin/awk/FUTURES b/gnu/usr.bin/awk/FUTURES
new file mode 100644
index 000000000000..b09656046b27
--- /dev/null
+++ b/gnu/usr.bin/awk/FUTURES
@@ -0,0 +1,120 @@
+This file lists future projects and enhancements for gawk. Items are listed
+in roughly the order they will be done for a given release. This file is
+mainly for use by the developers to help keep themselves on track, please
+don't bug us too much about schedules or what all this really means.
+
+For 2.16
+========
+David:
+ Move to autoconf-based configure system.
+
+ Allow RS to be a regexp.
+
+ RT variable to hold text of record terminator
+
+ RECLEN variable for fixed length records
+
+ Feedback alloca.s changes to FSF
+
+ Extensible hashing in memory of awk arrays
+
+ Split() with null string as third arg to split up strings
+
+ Analogously, setting FS="" would split the input record into individual
+ characters.
+
+Arnold:
+ Generalize IGNORECASE
+ any value makes it work, not just numeric non-zero
+ make it apply to *all* string comparisons
+
+ Fix FILENAME to have an initial value of "", not "-"
+
+ Clean up code by isolating system-specific functions in separate files.
+
+ Undertake significant directory reorganization.
+
+ Extensive manual cleanup:
+ Use of texinfo 2.0 features
+ Lots more examples
+ Document all of the above.
+
+In 2.17
+=======
+David:
+
+ Incorporate newer dfa.c and regex.c (go to POSIX regexps)
+
+ Make regex + dfa less dependant on gawk header file includes
+
+ General sub functions:
+ edit(line, pat, sub) and gedit(line, pat, sub)
+ that return the substituted strings and allow \1 etc. in the sub string.
+
+Arnold:
+ DBM storage of awk arrays. Try to allow multiple dbm packages
+
+ ? Have strftime() pay attention to the value of ENVIRON["TZ"]
+
+ Additional manual features:
+ Document posix regexps
+ Document use of dbm arrays
+ ? Add an error messages section to the manual
+ ? A section on where gawk is bounded
+ regex
+ i/o
+ sun fp conversions
+
+For 2.18
+========
+
+Arnold:
+ Add chdir and stat built-in functions.
+
+ Add function pointers as valid variable types.
+
+ Add an `ftw' built-in function that takes a function pointer.
+
+David:
+
+ Do an optimization pass over parse tree?
+
+For 2.19 or later:
+==================
+Add variables similar to C's __FILE__ and __LINE__ for better diagnostics
+from within awk programs.
+
+Add an explicit concatenation operator and assignment version.
+
+? Add a switch statement
+
+Add the ability to seek on an open file and retrieve the current file position.
+
+Add lint checking everywhere, including check for use of builtin vars.
+only in new awk.
+
+"restart" keyword
+
+Add |&
+
+Make awk '/foo/' files... run at egrep speeds
+
+Do a reference card
+
+Allow OFMT to be other than a floating point format.
+
+Allow redefining of builtin functions?
+
+Make it faster and smaller.
+
+For 3.x:
+========
+
+Create a gawk compiler?
+
+Create a gawk-to-C translator? (or C++??)
+
+Provide awk profiling and debugging.
+
+
+
diff --git a/gnu/usr.bin/awk/LIMITATIONS b/gnu/usr.bin/awk/LIMITATIONS
new file mode 100644
index 000000000000..5877197aeb55
--- /dev/null
+++ b/gnu/usr.bin/awk/LIMITATIONS
@@ -0,0 +1,14 @@
+This file describes limits of gawk on a Unix system (although it
+is variable even then). Non-Unix systems may have other limits.
+
+# of fields in a record: MAX_INT
+Length of input record: MAX_INT
+Length of output record: unlimited
+Size of a field: MAX_INT
+Size of a printf string: MAX_INT
+Size of a literal string: MAX_INT
+Characters in a character class: 2^(# of bits per byte)
+# of file redirections: unlimited
+# of pipe redirections: min(# of processes per user, # of open files)
+double-precision floating point
+Length of source line: unlimited
diff --git a/gnu/usr.bin/awk/Makefile b/gnu/usr.bin/awk/Makefile
new file mode 100644
index 000000000000..fdca82c4482e
--- /dev/null
+++ b/gnu/usr.bin/awk/Makefile
@@ -0,0 +1,13 @@
+PROG= awk
+SRCS= main.c eval.c builtin.c msg.c iop.c io.c field.c array.c \
+ node.c version.c re.c awk.c regex.c dfa.c \
+ getopt.c getopt1.c
+CFLAGS+= -DGAWK
+LDADD= -lm
+DPADD= ${LIBM}
+CLEANFILES+= awk.c y.tab.h
+
+MAN1= awk.0
+
+.include <bsd.prog.mk>
+.include "../../usr.bin/Makefile.inc"
diff --git a/gnu/usr.bin/awk/NEWS b/gnu/usr.bin/awk/NEWS
new file mode 100644
index 000000000000..6711373d6ea5
--- /dev/null
+++ b/gnu/usr.bin/awk/NEWS
@@ -0,0 +1,1295 @@
+Changes from 2.15.1 to 2.15.2
+---------------------------
+
+Additions to the FUTURES file.
+
+Document undefined order of output when using both standard output
+ and /dev/stdout or any of the /dev output files that gawk emulates in
+ the absence of OS support.
+
+Clean up the distribution generation in Makefile.in: the info files are
+ now included, the distributed files are marked read-only and patched
+ distributions are now unpacked in a directory named with the patch level.
+
+
+Changes from 2.15 to 2.15.1
+---------------------------
+
+Close stdout and stderr before all redirections on program exit. This allows
+ detection of write errors and also fixes the messages test on Solaris 2.x.
+
+Removed YYMAXDEPTH define in awk.y which was limiting the parser stack depth.
+
+Changes to config/bsd44, Makefile.bsd44 and configure to bring it into line
+ with the BSD4.4 release.
+
+Changed Makefile to use prefix, exec_prefix, bindir etc.
+
+make install now installs info files.
+
+make install now sets permissions on installed files.
+
+Make targets added: uninstall, distclean, mostlyclean and realclean.
+
+Added config.h to cleaner and clobber make targets.
+
+Changes to config/{hpux8x,sysv3,sysv4,ultrix41} to deal with alloca().
+
+Change to getopt.h for portability.
+
+Added more special cases to the getpgrp() call.
+
+Added README.ibmrt-aos and config/ibmrt-aos.
+
+Changes from 2.14 to 2.15
+---------------------------
+
+Command-line source can now be mixed with library functions.
+
+ARGIND variable tracks index in ARGV of FILENAME.
+
+GNU style long options in addition to short options.
+
+Plan 9 style special files interpreted by gawk:
+ /dev/pid
+ /dev/ppid
+ /dev/pgrpid
+ /dev/user
+ $1 = getuid
+ $2 = geteuid
+ $3 = getgid
+ $4 = getegid
+ $5 ... $NF = getgroups if supported
+
+ERRNO variable contains error string if getline or close fails.
+
+Very old options -a and -e have gone away.
+
+Inftest has been removed from the default target in test/Makefile -- the
+ results were too machine specific and resulted in too many false alarms.
+
+A README.amiga has been added.
+
+The "too many arguments supplied for format string" warning message is only
+ in effect under the lint option.
+
+Code improvements in dfa.c.
+
+Fixed all reported bugs:
+
+ Writes are checked for failure (such as full filesystem).
+
+ Stopped (at least some) runaway error messages.
+
+ gsub(/^/, "x") does the right thing for $0 of 0, 1, or more length.
+
+ close() on a command being piped to a getline now works properly.
+
+ The input record will no longer be freed upon an explicit close()
+ of the input file.
+
+ A NUL character in FS now works.
+
+ In a substitute, \\& now means a literal backslash followed by what
+ was matched.
+
+ Integer overflow of substring length in substr() is caught.
+
+ An input record without a newline termination is handled properly.
+
+ In io.c, check is against only EMFILE so that system file table
+ is not filled.
+
+ Renamed all files with names longer than 14 characters.
+
+ Escaped characters in regular expressions were being lost when
+ IGNORECASE was used.
+
+ Long source lines were not being handled properly.
+
+ Sourcefiles that ended in a tab but no newline were bombing.
+
+ Patterns that could match zero characters in split() were not working
+ properly.
+
+ The parsedebug option was not working.
+
+ The grammar was being a bit too lenient, allowing some very dubious
+ programs to pass.
+
+ Compilation with DEBUG defined now works.
+
+ A variable read in with getline was not being treated as a potential
+ number.
+
+ Array subscripts were not always of string type.
+
+
+Changes from 2.13.2 to 2.14
+---------------------------
+
+Updated manual!
+
+Added "next file" to skip efficiently to the next input file.
+
+Fixed potential of overflowing buffer in do_sprintf().
+
+Plugged small memory leak in sub_common().
+
+EOF on a redirect is now "sticky" -- it can only be cleared by close()ing
+ the pipe or file.
+
+Now works if used via a #! /bin/gawk line at the top of an executable file
+ when that line ends with whitespace.
+
+Added some checks to the grammar to catch redefinition of builtin functions.
+ This could eventually be the basis for an extension to allow redefining
+ functions, but in the mean time it's a good error catching facility.
+
+Negative integer exponents now work.
+
+Modified do_system() to make sure it had a non-null string to be passed
+ to system(3). Thus, system("") will flush any pending output but not go
+ through the overhead of forking an un-needed shell.
+
+A fix to floating point comparisons so that NaNs compare right on IEEE systems.
+
+Added code to make sure we're not opening directories for reading and such.
+
+Added code to do better diagnoses of weird or null file names.
+
+Allow continue outside of a loop, unless in strict posix mode. Lint option
+ will issue warning.
+
+New missing/strftime.c. There has been one chage that affects gawk. Posix
+ now defines a %V conversion so the vms conversion has been changed to %v.
+ If this version is used with gawk -Wlint and they use %V in a call to
+ strftime, they'll get a warning.
+
+Error messages now conform to GNU standard (I hope).
+
+Changed comparisons to conform to the description found in the file POSIX.
+ This is inconsistent with the current POSIX draft, but that is broken.
+ Hopefully the final POSIX standard will conform to this version.
+ (Alas, this will have to wait for 1003.2b, which will be a revision to
+ the 1003.2 standard. That standard has been frozen with the broken
+ comparison rules.)
+
+The length of a string was a short and now is a size_t.
+
+Updated VMS help.
+
+Added quite a few new tests to the test suite and deleted many due to lack of
+ written releases. Test output is only removed if it is identical to the
+ "good" output.
+
+Fixed a couple of bugs for reference to $0 when $0 is "" -- particularly in
+ a BEGIN block.
+
+Fixed premature freeing in construct "$0 = $0".
+
+Removed the call to wait_any() in gawk_popen(), since on at least some systems,
+ if gawk's input was from a pipe, the predecssor process in the pipe was a
+ child of gawk and this caused a deadlock.
+
+Regexp can (once again) match a newline, if given explicitly.
+
+nextopen() makes sure file name is null terminated.
+
+Fixed VMS pipe simulation. Improved VMS I/O performance.
+
+Catch . used in variable names.
+
+Fixed bug in getline without redirect from a file -- it was quitting after the
+ first EOF, rather than trying the next file.
+
+Fixed bug in treatment of backslash at the end of a string -- it was bombing
+ rather than doing something sensible. It is not clear what this should mean,
+ but for now I issue a warning and take it as a literal backslash.
+
+Moved setting of regexp syntax to before the option parsing in main(), to
+ handle things like -v FS='[.,;]'
+
+Fixed bug when NF is set by user -- fields_arr must be expanded if necessary
+ and "new" fields must be initialized.
+
+Fixed several bugs in [g]sub() for no match found or the match is 0-length.
+
+Fixed bug where in gsub() a pattern anchorred at the beginning would still
+ substitute throughout the string.
+
+make test does not assume the . is in PATH.
+
+Fixed bug when a field beyond the end of the record was requested after
+ $0 was altered (directly or indirectly).
+
+Fixed bug for assignment to field beyond end of record -- the assigned value
+ was not found on subsequent reference to that field.
+
+Fixed bug for FS a regexp and it matches at the end of a record.
+
+Fixed memory leak for an array local to a function.
+
+Fixed hanging of pipe redirection to getline
+
+Fixed coredump on access to $0 inside BEGIN block.
+
+Fixed treatment of RS = "". It now parses the fields correctly and strips
+ leading whitspace from a record if FS is a space.
+
+Fixed faking of /dev/stdin.
+
+Fixed problem with x += x
+
+Use of scalar as array and vice versa is now detected.
+
+IGNORECASE now obeyed for FS (even if FS is a single alphabetic character).
+
+Switch to GPL version 2.
+
+Renamed awk.tab.c to awktab.c for MSDOS and VMS tar programs.
+
+Renamed this file (CHANGES) to NEWS.
+
+Use fmod() instead of modf() and provide FMOD_MISSING #define to undo
+ this change.
+
+Correct the volatile declarations in eval.c.
+
+Avoid errant closing of the file descriptors for stdin, stdout and stderr.
+
+Be more flexible about where semi-colons can occur in programs.
+
+Check for write errors on all output, not just on close().
+
+Eliminate the need for missing/{strtol.c,vprintf.c}.
+
+Use GNU getopt and eliminate missing/getopt.c.
+
+More "lint" checking.
+
+
+Changes from 2.13.1 to 2.13.2
+-----------------------------
+
+Toward conformity with GNU standards, configure is a link to mkconf, the latter
+ to disappear in the next major release.
+
+Update to config/bsd43.
+
+Added config/apollo, config/msc60, config/cray2-50, config/interactive2.2
+
+sgi33.cc added for compilation using cc ratther than gcc.
+
+Ultrix41 now propagates to config.h properly -- as part of a general
+ mechanism in configure for kludges -- #define anything from a config file
+ just gets tacked onto the end of config.h -- to be used sparingly.
+
+Got rid of an unnecessary and troublesome declaration of vprintf().
+
+Small improvement in locality of error messages.
+
+Try to diagnose use of array as scalar and vice versa -- to be improved in
+ the future.
+
+Fix for last bug fix for Cray division code--sigh.
+
+More changes to test suite to explicitly use sh. Also get rid of
+ a few generated files.
+
+Fixed off-by-one bug in string concatenation code.
+
+Fix for use of array that is passed in from a previous function parameter.
+ Addition to test suite for above.
+
+A number of changes associated with changing NF and access to fields
+ beyond the end of the current record.
+
+Change to missing/memcmp.c to avoid seg. fault on zero length input.
+
+Updates to test suite (including some inadvertently left out of the last patch)
+ to invoke sh explicitly (rather than rely on #!/bin/sh) and remove some
+ junk files. test/chem/good updated to correspond to bug fixes.
+
+Changes from 2.13.0 to 2.13.1
+-----------------------------
+
+More configs and PORTS.
+
+Fixed bug wherein a simple division produced an erroneous FPE, caused by
+ the Cray division workaround -- that code is now #ifdef'd only for
+ Cray *and* fixed.
+
+Fixed bug in modulus implementation -- it was very close to the above
+ code, so I noticed it.
+
+Fixed portability problem with limits.h in missing.c
+
+Fixed portability problem with tzname and daylight -- define TZNAME_MISSING
+ if strftime() is missing and tzname is also.
+
+Better support for Latin-1 character set.
+
+Fixed portability problem in test Makefile.
+
+Updated PROBLEMS file.
+
+=============================== gawk-2.13 released =========================
+Changes from 2.12.42 to 2.12.43
+-------------------------------
+
+Typo in awk.y
+
+Fixed up strftime.3 and added doc. for %V.
+
+Changes from 2.12.41 to 2.12.42
+-------------------------------
+
+Fixed bug in devopen() -- if you had write permission in /dev,
+ it would just create /dev/stdout etc.!!
+
+Final (?) VMS update.
+
+Make NeXT use GFMT_WORKAROUND
+
+Fixed bug in sub_common() for substitute on zero-length match. Improved the
+ code a bit while I was at it.
+
+Fixed grammar so that $i++ parses as ($i)++
+
+Put support/* back in the distribution (didn't I already do this?!)
+
+Changes from 2.12.40 to 2.12.41
+-------------------------------
+
+VMS workaround for broken %g format.
+
+Changes from 2.12.39 to 2.12.40
+-------------------------------
+
+Minor man page update.
+
+Fixed latent bug in redirect().
+
+Changes from 2.12.38 to 2.12.39
+-------------------------------
+
+Updates to test suite -- remove dependence on changing gawk.1 man page.
+
+Changes from 2.12.37 to 2.12.38
+-------------------------------
+
+Fixed bug in use of *= without whitespace following.
+
+VMS update.
+
+Updates to man page.
+
+Option handling updates in main.c
+
+test/manyfiles redone and added to bigtest.
+
+Fixed latent (on Sun) bug in handling of save_fs.
+
+Changes from 2.12.36 to 2.12.37
+-------------------------------
+
+Update REL in Makefile-dist. Incorporate test suite into main distribution.
+
+Minor fix in regtest.
+
+Changes from 2.12.35 to 2.12.36
+-------------------------------
+
+Release takes on dual personality -- 2.12.36 and 2.13.0 -- any further
+ patches before public release won't count for 2.13, although they will for
+ 2.12 -- be careful to avoid confusion! patchlevel.h will be the last thing
+ to change.
+
+Cray updates to deal with arithmetic problems.
+
+Minor test suite updates.
+
+Fixed latent bug in parser (freeing memory).
+
+Changes from 2.12.34 to 2.12.35
+-------------------------------
+
+VMS updates.
+
+Flush stdout at top of err() and stderr at bottom.
+
+Fixed bug in eval_condition() -- it wasn't testing for MAYBE_NUM and
+ doing the force_number().
+
+Included the missing manyfiles.awk and a new test to catch the above bug which
+ I am amazed wasn't already caught by the test suite -- it's pretty basic.
+
+Changes from 2.12.33 to 2.12.34
+-------------------------------
+
+Atari updates -- including bug fix.
+
+More VMS updates -- also nuke vms/version.com.
+
+Fixed bug in handling of large numbers of redirections -- it was probably never
+ tested before (blush!).
+
+Minor rearrangement of code in r_force_number().
+
+Made chem and regtest tests a bit more portable (Ultrix again).
+
+Added another test -- manyfiles -- not invoked under any other test -- very Unix
+ specific.
+
+Rough beginning of LIMITATIONS file -- need my AWK book to complete it.
+
+Changes from 2.12.32 to 2.12.33
+-------------------------------
+
+Expunge debug.? from various files.
+
+Remove vestiges of Floor and Ceil kludge.
+
+Special case integer division -- mainly for Cray, but maybe someone else
+ will benefit.
+
+Workaround for iop_close closing an output pipe descriptor on Cray --
+ not conditional since I think it may fix a bug on SGI as well and I don't
+ think it can hurt elsewhere.
+
+Fixed memory leak in assoc_lookup().
+
+Small cleanup in test suite.
+
+Changes from 2.12.31 to 2.12.32
+-------------------------------
+
+Nuked debug.c and debugging flag -- there are better ways.
+
+Nuked version.sh and version.c in subdirectories.
+
+Fixed bug in handling of IGNORECASE.
+
+Fixed bug when FIELDWIDTHS was set via -v option.
+
+Fixed (obscure) bug when $0 is assigned a numerical value.
+
+Fixed so that escape sequences in command-line assignments work (as it already
+ said in the comment).
+
+Added a few cases to test suite.
+
+Moved support/* back into distribution.
+
+VMS updates.
+
+Changes from 2.12.30 to 2.12.31
+-------------------------------
+
+Cosmetic manual page changes.
+
+Updated sunos3 config.
+
+Small changes in test suite including renaming files over 14 chars. in length.
+
+Changes from 2.12.29 to 2.12.30
+-------------------------------
+
+Bug fix for many string concatenations in a row.
+
+Changes from 2.12.28 to 2.12.29
+-------------------------------
+
+Minor cleanup in awk.y
+
+Minor VMS update.
+
+Minor atari update.
+
+Changes from 2.12.27 to 2.12.28
+-------------------------------
+
+Got rid of the debugging goop in eval.c -- there are better ways.
+
+Sequent port.
+
+VMS changes left out of the last patch -- sigh! config/vms.h renamed
+ to config/vms-conf.h.
+
+Fixed missing/tzset.c
+
+Removed use of gcvt() and GCVT_MISSING -- turns out it was no faster than
+ sprintf("%g") and caused all sorts of portability headaches.
+
+Tuned get_field() -- it was unnecessarily parsing the whole record on reference
+ to $0.
+
+Tuned interpret() a bit in the rule_node loop.
+
+In r_force_number(), worked around bug in Uglix strtod() and got rid of
+ ugly do{}while(0) at Michal's urging.
+
+Replaced do_deref() and deref with unref(node) -- much cleaner and a bit faster.
+
+Got rid of assign_number() -- contrary to comment, it was no faster than
+ just making a new node and freeing the old one.
+
+Replaced make_number() and tmp_number() with macros that call mk_number().
+
+Changed freenode() and newnode() into macros -- the latter is getnode()
+ which calls more_nodes() as necessary.
+
+Changes from 2.12.26 to 2.12.27
+-------------------------------
+
+Completion of Cray 2 port (includes a kludge for floor() and ceil()
+ that may go or be changed -- I think that it may just be working around
+ a bug in chem that is being tweaked on the Cray).
+
+More VMS updates.
+
+Moved kludge over yacc's insertion of malloc and realloc declarations
+ from protos.h to the Makefile.
+
+Added a lisp interpreter in awk to the test suite. (Invoked under
+ bigtest.)
+
+Cleanup in r_force_number() -- I had never gotten around to a thorough
+ profile of the cache code and it turns out to be not worth it.
+
+Performance boost -- do lazy force_number()'ing for fields etc. i.e.
+ flag them (MAYBE_NUM) and call force_number only as necessary.
+
+Changes from 2.12.25 to 2.12.26
+-------------------------------
+
+Rework of regexp stuff so that dynamic regexps have reasonable
+ performance -- string used for compiled regexp is stored and
+ compared to new string -- if same, no recompilation is necessary.
+ Also, very dynamic regexps cause dfa-based searching to be turned
+ off.
+
+Code in dev_open() is back to returning fileno(std*) rather than
+ dup()ing it. This will be documented. Sorry for the run-around
+ on this.
+
+Minor atari updates.
+
+Minor vms update.
+
+Missing file from MSDOS port.
+
+Added warning (under lint) if third arg. of [g]sub is a constant and
+ handle it properly in the code (i.e. return how many matches).
+
+Changes from 2.12.24 to 2.12.25
+-------------------------------
+
+MSDOS port.
+
+Non-consequential changes to regexp variables in preparation for
+ a more serious change to fix a serious performance problem.
+
+Changes from 2.12.23 to 2.12.24
+-------------------------------
+
+Fixed bug in output flushing introduced a few patches back. This caused
+ serious performance losses.
+
+Changes from 2.12.22 to 2.12.23
+-------------------------------
+
+Accidently left config/cray2-60 out of last patch.
+
+Added some missing dependencies to Makefile.
+
+Cleaned up mkconf a bit; made yacc the default parser (no alloca needed,
+ right?); added rs6000 hook for signed characters.
+
+Made regex.c with NO_ALLOCA undefined work.
+
+Fixed bug in dfa.c for systems where free(NULL) bombs.
+
+Deleted a few cant_happen()'s that *really* can't hapen.
+
+Changes from 2.12.21 to 2.12.22
+-------------------------------
+
+Added to config stuff the ability to choose YACC rather than bison.
+
+Fixed CHAR_UNSIGNED in config.h-dist.
+
+Second arg. of strtod() is char ** rather than const char **.
+
+stackb is now initially malloc()'ed since it may be realloc()'ed.
+
+VMS updates.
+
+Added SIZE_T_MISSING to config stuff and a default typedef to awk.h.
+ (Maybe it is not needed on any current systems??)
+
+re_compile_pattern()'s size is now size_t unconditionally.
+
+Changes from 2.12.20 to 2.12.21
+-------------------------------
+
+Corrected missing/gcvt.c.
+
+Got rid of use of dup2() and thus DUP_MISSING.
+
+Updated config/sgi33.
+
+Turned on (and fixed) in cmp_nodes() the behaviour that I *hope* will be in
+ POSIX 1003.2 for relational comparisons.
+
+Small updates to test suite.
+
+Changes from 2.12.19 to 2.12.20
+-------------------------------
+
+Sloppy, sloppy, sloppy!! I didn't even try to compile the last two
+ patches. This one fixes goofs in regex.c.
+
+Changes from 2.12.18 to 2.12.19
+-------------------------------
+
+Cleanup of last patch.
+
+Changes from 2.12.17 to 2.12.18
+-------------------------------
+
+Makefile renamed to Makefile-dist.
+
+Added alloca() configuration to mkconf. (A bit kludgey.) Just
+ add a single line containing ALLOCA_PW, ALLOCA_S or ALLOCA_C
+ to the appropriate config file to have Makefile-dist edited
+ accordingly.
+
+Reorganized output flushing to correspond with new semantics of
+ devopen() on "/dev/std*" etc.
+
+Fixed rest of last goof!!
+
+Save and restore errno in do_pathopen().
+
+Miscellaneous atari updates.
+
+Get rid of the trailing comma in the NODETYPE definition (Cray
+ compiler won't take it).
+
+Try to make the use of `const' consistent since Cray compiler is
+ fussy about that. See the changes to `basename' and `myname'.
+
+It turns out that, according to section 3.8.3 (Macro Replacement)
+ of the ANSI Standard: ``If there are sequences of preprocessing
+ tokens within the list of arguments that would otherwise act as
+ preprocessing directives, the behavior is undefined.'' That means
+ that you cannot count on the behavior of the declaration of
+ re_compile_pattern in awk.h, and indeed the Cray compiler chokes on it.
+
+Replaced alloca with malloc/realloc/free in regex.c. It was much simpler
+ than expected. (Inside NO_ALLOCA for now -- by default no alloca.)
+
+Added a configuration file, config/cray60, for Unicos-6.0.
+
+Changes from 2.12.16 to 2.12.17
+-------------------------------
+
+Ooops. Goofed signal use in last patch.
+
+Changes from 2.12.15 to 2.12.16
+-------------------------------
+
+RENAMED *_dir to just * (e.g. missing_dir).
+
+Numerous VMS changes.
+
+Proper inclusion of atari and vms files.
+
+Added experimental (ifdef'd out) RELAXED_CONTINUATION and DEFAULT_FILETYPE
+ -- please comment on these!
+
+Moved pathopen() to io.c (sigh).
+
+Put local directory ahead in default AWKPATH.
+
+Added facility in mkconf to echo comments on stdout: lines beginning
+ with "#echo " will have the remainder of the line echoed when mkconf is run.
+ Any lines starting with "#" will otherwise be treated as comments. The
+ intent is to be able to say:
+ "#echo Make sure you uncomment alloca.c in the Makefile"
+ or the like.
+
+Prototype fix for V.4
+
+Fixed version_string to not print leading @(#).
+
+Fixed FIELDWIDTHS to work with strict (turned out to be easy).
+
+Fixed conf for V.2.
+
+Changed semantics of /dev/fd/n to be like on real /dev/fd.
+
+Several configuration and updates in the makefile.
+
+Updated manpage.
+
+Include tzset.c and system.c from missing_dir that were accidently left out of
+ the last patch.
+
+Fixed bug in cmdline variable assignment -- arg was getting freed(!) in
+ call to variable.
+
+Backed out of parse-time constant folding for now, until I can figure out
+ how to do it right.
+
+Fixed devopen() so that getline <"-" works.
+
+Changes from 2.12.14 to 2.12.15
+-------------------------------
+
+Changed config/* to a condensed form that can be used with mkconf to generate
+ a config.h from config.h-dist -- much easier to maintain. Please chaeck
+ carefully against what you had before for a particular system and report
+ any problems. vms.h remains separate since the stuff at the bottom
+ didn't quite fit the mkconf model -- hopefully cleared up later.
+
+Fixed bug in grammar -- didn't allow function definition to be separated from
+ other rules by a semi-colon.
+
+VMS fix to #includes in missing.c -- should we just be including awk.h?
+
+Updated README for texinfo.tex version.
+
+Updating of copyright in all .[chy] files.
+
+Added but commented out Michal's fix to strftime.
+
+Added tzset() emulation based on Rick Adams' code. Added TZSET_MISSING to
+ config.h-dist.
+
+Added strftime.3 man page for missing_dir
+
+More posix: func, **, **= don't work in -W posix
+
+More lint: ^, ^= not in old awk
+
+gawk.1: removed ref to -DNO_DEV_FD, other minor updating.
+
+Style change: pushbak becomes pushback() in yylex().
+
+Changes from 2.12.13 to 2.12.14
+-------------------------------
+
+Better (?) organization of awk.h -- attempt to keep all system dependencies
+ near the top and move some of the non-general things out of the config.h
+ files.
+
+Change to handling of SYSTEM_MISSING.
+
+Small change to ultrix config.
+
+Do "/dev/fd/*" etc. checking at runtime.
+
+First pass at VMS port.
+
+Improvements to error handling (when lexeme spans buffers).
+
+Fixed backslash handling -- why didn't I notice this sooner?
+
+Added programs from book to test suite and new target "bigtest" to Makefile.
+
+Changes from 2.12.12 to 2.12.13
+-------------------------------
+
+Recognize OFS and ORS specially so that OFS = 9 works without efficiency hit.
+ Took advantage of opportunity to tune do_print*() for about 10% win on a
+ print with 5 args (i.e. small but significant).
+
+Somewhat pervasive changes to reconcile CONVFMT vs. OFMT.
+
+Better initialization of builtin vars.
+
+Make config/* consistent wrt STRTOL_MISSING.
+
+Small portability improvement to alloca.s
+
+Improvements to lint code in awk.y
+
+Replaced strtol() with a better one by Chris Torek.
+
+Changes from 2.12.11 to 2.12.12
+-------------------------------
+
+Added PORTS file to record successful ports.
+
+Added #define const to nothing if not STDC and added const to strtod() header.
+
+Added * to printf capabilities and partially implemented ' ' and '+' (has an
+ effect for %d only, silently ignored for other formats). I'm afraid that's
+ as far as I want to go before I look at a complete replacement for
+ do_sprintf().
+
+Added warning for /regexp/ on LHS of MATCHOP.
+
+Changes from 2.12.10 to 2.12.11
+-------------------------------
+
+Small Makefile improvements.
+
+Some remaining nits from the NeXT port.
+
+Got rid of bcopy() define in awk.h -- not needed anymore (??)
+
+Changed private in builtin.c -- it is special on Sequent.
+
+Added subset implementation of strtol() and STRTOL_MISSING.
+
+A little bit of cleanup in debug.c, dfa.c.
+
+Changes from 2.12.9 to 2.12.10
+------------------------------
+
+Redid compatability checking and checking for # of args.
+
+Removed all references to variables[] from outside awk.y, in preparation
+ for a more abstract interface to the symbol table.
+
+Got rid of a remaining use of bcopy() in regex.c.
+
+Changes from 2.12.8 to 2.12.9
+-----------------------------
+
+Portability improvements for atari, next and decstation.
+
+Bug fix in substr() -- wasn't handling 3rd arg. of -1 properly.
+
+Manpage updates.
+
+Moved support from src release to doc release.
+
+Updated FUTURES file.
+
+Added some "lint" warnings.
+
+Changes from 2.12.7 to 2.12.8
+-----------------------------
+
+Changed time() to systime().
+
+Changed warning() in snode() to fatal().
+
+strftime() now defaults second arg. to current time.
+
+Changes from 2.12.6 to 2.12.7
+-----------------------------
+
+Fixed bug in sub_common() involving inadequate allocation of a buffer.
+
+Added some missing files to the Makefile.
+
+Changes from 2.12.5 to 2.12.6
+-----------------------------
+
+Fixed bug wherein non-redirected getline could call iop_close() just
+ prior to a call from do_input().
+
+Fixed bug in handling of /dev/stdout and /dev/stderr.
+
+Changes from 2.12.4 to 2.12.5
+-----------------------------
+
+Updated README and support directory.
+
+Changes from 2.12.3 to 2.12.4
+-----------------------------
+
+Updated CHANGES and TODO (should have been done in previous 2 patches).
+
+Changes from 2.12.2 to 2.12.3
+-----------------------------
+
+Brought regex.c and alloca.s into line with current FSF versions.
+
+Changes from 2.12.1 to 2.12.2
+-----------------------------
+
+Portability improvements; mostly moving system prototypes out of awk.h
+
+Introduction of strftime.
+
+Use of CONVFMT.
+
+Changes from 2.12 to 2.12.1
+-----------------------------
+
+Consolidated treatment of command-line assignments (thus correcting the
+-v treatment).
+
+Rationalized builtin-variable handling into a table-driven process, thus
+simplifying variable() and eliminating spc_var().
+
+Fixed bug in handling of command-line source that ended in a newline.
+
+Simplified install() and lookup().
+
+Did away with double-mallocing of identifiers and now free second and later
+instances of a name, after the first gets installed into the symbol table.
+
+Treat IGNORECASE specially, simplifying a lot of code, and allowing
+checking against strict conformance only on setting it, rather than on each
+pattern match.
+
+Fixed regexp matching when IGNORECASE is non-zero (broken when dfa.c was
+added).
+
+Fixed bug where $0 was not being marked as valid, even after it was rebuilt.
+This caused mangling of $0.
+
+
+Changes from 2.11.1 to 2.12
+-----------------------------
+
+Makefile:
+
+Portability improvements in Makefile.
+Move configuration stuff into config.h
+
+FSF files:
+
+Synchronized alloca.[cs] and regex.[ch] with FSF.
+
+array.c:
+
+Rationalized hash routines into one with a different algorithm.
+delete() now works if the array is a local variable.
+Changed interface of assoc_next() and avoided dereferencing past the end of the
+ array.
+
+awk.h:
+
+Merged non-prototype and prototype declarations in awk.h.
+Expanded tree_eval #define to short-circuit more calls of r_tree_eval().
+
+awk.y:
+
+Delinted some of the code in the grammar.
+Fixed and improved some of the error message printing.
+Changed to accomodate unlimited length source lines.
+Line continuation now works as advertised.
+Source lines can be arbitrarily long.
+Refined grammar hacks so that /= assignment works. Regular expressions
+ starting with /= are recognized at the beginning of a line, after && or ||
+ and after ~ or !~. More contexts can be added if necessary.
+Fixed IGNORECASE (multiple scans for backslash).
+Condensed expression_lists in array references.
+Detect and warn for correct # args in builtin functions -- call most of them
+ with a fixed number (i.e. fill in defaults at parse-time rather than at
+ run-time).
+Load ENVIRON only if it is referenced (detected at parse-time).
+Treat NF, FS, RS, NR, FNR specially at parse time, to improve run time.
+Fold constant expressions at parse time.
+Do make_regexp() on third arg. of split() at parse tiem if it is a constant.
+
+builtin.c:
+
+srand() returns 0 the first time called.
+Replaced alloca() with malloc() in do_sprintf().
+Fixed setting of RSTART and RLENGTH in do_match().
+Got rid of get_{one,two,three} and allowance for variable # of args. at
+ run-time -- this is now done at parse-time.
+Fixed latent bug in [g]sub whereby changes to $0 would never get made.
+Rewrote much of sub_common() for simplicity and performance.
+Added ctime() and time() builtin functions (unless -DSTRICT). ctime() returns
+ a time string like the C function, given the number of seconds since the epoch
+ and time() returns the current time in seconds.
+do_sprintf() now checks for mismatch between format string and number of
+ arguments supplied.
+
+dfa.c
+
+This is borrowed (almost unmodified) from GNU grep to provide faster searches.
+
+eval.c
+
+Node_var, Node_var_array and Node_param_list handled from macro rather
+ than in r_tree_eval().
+Changed cmp_nodes() to not do a force_number() -- this, combined with a
+ force_number() on ARGV[] and ENVIRON[] brings it into line with other awks
+Greatly simplified cmp_nodes().
+Separated out Node_NF, Node_FS, Node_RS, Node_NR and Node_FNR in get_lhs().
+All adjacent string concatenations now done at once.
+
+field.c
+
+Added support for FIELDWIDTHS.
+Fixed bug in get_field() whereby changes to a field were not always
+ properly reflected in $0.
+Reordered tests in parse_field() so that reference off the end of the buffer
+ doesn't happen.
+set_FS() now sets *parse_field i.e. routine to call depending on type of FS.
+It also does make_regexp() for FS if needed. get_field() passes FS_regexp
+ to re_parse_field(), as does do_split().
+Changes to set_field() and set_record() to avoid malloc'ing and free'ing the
+ field nodes repeatedly. The fields now just point into $0 unless they are
+ assigned to another variable or changed. force_number() on the field is
+ *only* done when the field is needed.
+
+gawk.1
+
+Fixed troff formatting problem on .TP lines.
+
+io.c
+
+Moved some code out into iop.c.
+Output from pipes and system() calls is properly synchronized.
+Status from pipe close properly returned.
+Bug in getline with no redirect fixed.
+
+iop.c
+
+This file contains a totally revamped get_a_record and associated code.
+
+main.c
+
+Command line programs no longer use a temporary file.
+Therefore, tmpnam() no longer required.
+Deprecated -a and -e options -- they will go away in the next release,
+ but for now they cause a warning.
+Moved -C, -V, -c options to -W ala posix.
+Added -W posix option: throw out \x
+Added -W lint option.
+
+
+node.c
+
+force_number() now allows pure numerics to have leading whitespace.
+Added make_string facility to optimize case of adding an already malloc'd
+ string.
+Cleaned up and simplified do_deref().
+Fixed bug in handling of stref==255 in do_deref().
+
+re.c
+
+contains the interface to regexp code
+
+Changes from 2.11.1 to FSF version of same
+------------------------------------------
+Thu Jan 4 14:19:30 1990 Jim Kingdon (kingdon at albert)
+
+ * Makefile (YACC): Add -y to bison part.
+
+ * missing.c: Add #include <stdio.h>.
+
+Sun Dec 24 16:16:05 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * * Makefile: Add (commented out) default defines for Sony News.
+
+ * awk.h: Move declaration of vprintf so it will compile when
+ -DVPRINTF_MISSING is defined.
+
+Mon Nov 13 18:54:08 1989 Robert J. Chassell (bob at apple-gunkies.ai.mit.edu)
+
+ * gawk.texinfo: changed @-commands that are not part of the
+ standard, currently released texinfmt.el to those that are.
+ Otherwise, only people with the as-yet unreleased makeinfo.c can
+ format this file.
+
+Changes from 2.11beta to 2.11.1 (production)
+--------------------------------------------
+
+Went from "beta" to production status!!!
+
+Now flushes stdout before closing pipes or redirected files to
+synchonize output.
+
+MS-DOS changes added in.
+
+Signal handler return type parameterized in Makefile and awk.h and
+some lint removed. debug.c cleaned up.
+
+Fixed FS splitting to never match null strings, per book.
+
+Correction to the manual's description of FS.
+
+Some compilers break on char *foo = "string" + 4 so fixed version.sh and
+main.c.
+
+Changes from 2.10beta to 2.11beta
+---------------------------------
+
+This release fixes all reported bugs that we could reproduce. Probably
+some of the changes are not documented here.
+
+The next release will probably not be a beta release!
+
+The most important change is the addition of the -nostalgia option. :-)
+
+The documentation has been improved and brought up-to-date.
+
+There has been a lot of general cleaning up of the code that is not otherwise
+documented here. There has been a movement toward using standard-conforming
+library routines and providing them (in missing.d) for systems lacking them.
+Improved (hopefully) configuration through Makfile modifications and missing.c.
+In particular, straightened out confusion over vprintf #defines, declarations
+etc.
+
+Deleted RCS log comments from source, to reduce source size by about one third.
+Most of them were horribly out-of-date, anyway.
+
+Renamed source files to reflect (for the most part) their contents.
+
+More and improved error messages. Cleanup and fixes to yyerror().
+String constants are not altered in input buffer, so error messages come out
+better. Fixed usage message. Make use of ANSI C strerror() function
+(provided).
+
+Plugged many more memory leaks. The memory consumption is now quite
+reasonable over a wide range of programs.
+
+Uses volatile declaration if STDC > 0 to avoid problems due to longjmp.
+
+New -a and -e options to use awk or egrep style regexps, respectively,
+since POSIX says awk should use egrep regexps. Default is -a.
+
+Added -v option for setting variables before the first file is encountered.
+Version information now uses -V and copyleft uses -C.
+
+Added a patchlevel.h file and its use for -V and -C.
+
+Append_right() optimized for major improvement to programs with a *lot*
+of statements.
+
+Operator precedence has been corrected to match draft Posix.
+
+Tightened up grammar for builtin functions so that only length
+may be called without arguments or parentheses.
+
+/regex/ is now a normal expression that can appear in any expression
+context.
+
+Allow /= to begin a regexp. Allow ..[../..].. in a regexp.
+
+Allow empty compound statements ({}).
+
+Made return and next illegal outside a function and in BEGIN/END respectively.
+
+Division by zero is now illegal and causes a fatal error.
+
+Fixed exponentiation so that x ^ 0 and x ^= 0 both return 1.
+
+Fixed do_sqrt, do_log, and do_exp to do argument/return checking and
+print an error message, per the manual.
+
+Fixed main to catch SIGSEGV to get source and data file line numbers.
+
+Fixed yyerror to print the ^ at the beginning of the bad token, not the end.
+
+Fix to substr() builtin: it was failing if the arguments
+weren't already strings.
+
+Added new node value flag NUMERIC to indicate that a variable is
+purely a number as opposed to type NUM which indicates that
+the node's numeric value is valid. This is set in make_number(),
+tmp_number and r_force_number() when appropriate and used in
+cmp_nodes(). This fixed a bug in comparison of variables that had
+numeric prefixes. The new code uses strtod() and eliminates is_a_number().
+A simple strtod() is provided for systems lacking one. It does no
+overflow checking, so could be improved.
+
+Simplification and efficiency improvement in force_string.
+
+Added performance tweak in r_force_number().
+
+Fixed a bug with nested loops and break/continue in functions.
+
+Fixed inconsistency in handling of empty fields when $0 has to be rebuilt.
+Happens to simplify rebuild_record().
+
+Cleaned up the code associated with opening a pipe for reading. Gawk
+now has its own popen routine (gawk_popen) that allocates an IOBUF
+and keeps track of the pid of the child process. gawk_pclose
+marks the appropriate child as defunct in the right struct redirect.
+
+Cleaned up and fixed close_redir().
+
+Fixed an obscure bug to do with redirection. Intermingled ">" and ">>"
+redirects did not output in a predictable order.
+
+Improved handling of output bufferring: now all print[f]s redirected to a tty
+or pipe are flushed immediately and non-redirected output to a tty is flushed
+before the next input record is read.
+
+Fixed a bug in get_a_record() where bcopy() could have copied over
+a random pointer.
+
+Fixed a bug when RS="" and records separated by multiple blank lines.
+
+Got rid of SLOWIO code which was out-of-date anyway.
+
+Fix in get_field() for case where $0 is changed and then $(n) are
+changed and then $0 is used.
+
+Fixed infinite loop on failure to open file for reading from getline.
+Now handles redirect file open failures properly.
+
+Filenames such as /dev/stdin now allowed on the command line as well as
+in redirects.
+
+Fixed so that gawk '$1' where $1 is a zero tests false.
+
+Fixed parsing so that `RLENGTH -1' parses the same as `RLENGTH - 1',
+for example.
+
+The return from a user-defined function now defaults to the Null node.
+This fixes a core-dump-causing bug when the return value of a function
+is used and that function returns no value.
+
+Now catches floating point exceptions to avoid core dumps.
+
+Bug fix for deleting elements of an array -- under some conditions, it was
+deleting more than one element at a time.
+
+Fix in AWKPATH code for running off the end of the string.
+
+Fixed handling of precision in *printf calls. %0.2d now works properly,
+as does %c. [s]printf now recognizes %i and %X.
+
+Fixed a bug in printing of very large (>240) strings.
+
+Cleaned up erroneous behaviour for RS == "".
+
+Added IGNORECASE support to index().
+
+Simplified and fixed newnode/freenode.
+
+Fixed reference to $(anything) in a BEGIN block.
+
+Eliminated use of USG rand48().
+
+Bug fix in force_string for machines with 16-bit ints.
+
+Replaced use of mktemp() with tmpnam() and provided a partial implementation of
+the latter for systems that don't have it.
+
+Added a portability check for includes in io.c.
+
+Minor portability fix in alloc.c plus addition of xmalloc().
+
+Portability fix: on UMAX4.2, st_blksize is zero for a pipe, thus breaking
+iop_alloc() -- fixed.
+
+Workaround for compiler bug on Sun386i in do_sprintf.
+
+More and improved prototypes in awk.h.
+
+Consolidated C escape parsing code into one place.
+
+strict flag is now turned on only when invoked with compatability option.
+It now applies to fewer things.
+
+Changed cast of f._ptr in vprintf.c from (unsigned char *) to (char *).
+Hopefully this is right for the systems that use this code (I don't).
+
+Support for pipes under MSDOS added.
diff --git a/gnu/usr.bin/awk/PORTS b/gnu/usr.bin/awk/PORTS
new file mode 100644
index 000000000000..95e133f9dd03
--- /dev/null
+++ b/gnu/usr.bin/awk/PORTS
@@ -0,0 +1,32 @@
+A recent version of gawk has been successfully compiled and run "make test"
+on the following:
+
+Sun 4/490 running 4.1
+NeXT running 2.0
+DECstation 3100 running Ultrix 4.0 or Ultrix 3.1 (different config)
+AtariST (16-bit ints, gcc compiler, byacc, running under TOS)
+ESIX V.3.2 Rev D (== System V Release 3.2), the 386. compiler was gcc + bison
+IBM RS/6000 (see README.rs6000)
+486 running SVR4, using cc and bison
+SGI running IRIX 3.3 using gcc (fails with cc)
+Sequent Balance running Dynix V3.1
+Cray Y-MP8 running Unicos 6.0.11
+Cray 2 running Unicos 6.1 (modulo trailing zeroes in chem)
+VAX/VMS V5.x (should also work on 4.6 and 4.7)
+VMS POSIX V1.0, V1.1
+OpenVMS AXP V1.0
+MSDOS - Microsoft C 5.1, compiles and runs very simple testing
+BSD 4.4alpha
+
+From: ghazi@caip.rutgers.edu (Kaveh R. Ghazi):
+
+arch configured as:
+---- --------------
+Hpux 9.0 hpux8x
+NeXTStep 2.0 next20
+Sgi Irix 4.0.5 (/bin/cc) sgi405.cc (new file)
+Stardent Titan 1500 OSv2.5 sysv3
+Stardent Vistra (i860) SVR4 sysv4
+SunOS 4.1.2 sunos41
+Tektronix XD88 (UTekV 3.2e) sysv3
+Ultrix 4.2 ultrix41
diff --git a/gnu/usr.bin/awk/POSIX b/gnu/usr.bin/awk/POSIX
new file mode 100644
index 000000000000..f2405420aedf
--- /dev/null
+++ b/gnu/usr.bin/awk/POSIX
@@ -0,0 +1,95 @@
+Right now, the numeric vs. string comparisons are screwed up in draft
+11.2. What prompted me to check it out was the note in gnu.bug.utils
+which observed that gawk was doing the comparison $1 == "000"
+numerically. I think that we can agree that intuitively, this should
+be done as a string comparison. Version 2.13.2 of gawk follows the
+current POSIX draft. Following is how I (now) think this
+stuff should be done.
+
+1. A numeric literal or the result of a numeric operation has the NUMERIC
+ attribute.
+
+2. A string literal or the result of a string operation has the STRING
+ attribute.
+
+3. Fields, getline input, FILENAME, ARGV elements, ENVIRON elements and the
+ elements of an array created by split() that are numeric strings
+ have the STRNUM attribute. Otherwise, they have the STRING attribute.
+ Uninitialized variables also have the STRNUM attribute.
+
+4. Attributes propagate across assignments, but are not changed by
+ any use. (Although a use may cause the entity to acquire an additional
+ value such that it has both a numeric and string value -- this leaves the
+ attribute unchanged.)
+
+When two operands are compared, either string comparison or numeric comparison
+may be used, depending on the attributes of the operands, according to the
+following (symmetric) matrix:
+
+ +----------------------------------------------
+ | STRING NUMERIC STRNUM
+--------+----------------------------------------------
+ |
+STRING | string string string
+ |
+NUMERIC | string numeric numeric
+ |
+STRNUM | string numeric numeric
+--------+----------------------------------------------
+
+So, the following program should print all OKs.
+
+echo '0e2 0a 0 0b
+0e2 0a 0 0b' |
+$AWK '
+NR == 1 {
+ num = 0
+ str = "0e2"
+
+ print ++test ": " ( (str == "0e2") ? "OK" : "OOPS" )
+ print ++test ": " ( ("0e2" != 0) ? "OK" : "OOPS" )
+ print ++test ": " ( ("0" != $2) ? "OK" : "OOPS" )
+ print ++test ": " ( ("0e2" == $1) ? "OK" : "OOPS" )
+
+ print ++test ": " ( (0 == "0") ? "OK" : "OOPS" )
+ print ++test ": " ( (0 == num) ? "OK" : "OOPS" )
+ print ++test ": " ( (0 != $2) ? "OK" : "OOPS" )
+ print ++test ": " ( (0 == $1) ? "OK" : "OOPS" )
+
+ print ++test ": " ( ($1 != "0") ? "OK" : "OOPS" )
+ print ++test ": " ( ($1 == num) ? "OK" : "OOPS" )
+ print ++test ": " ( ($2 != 0) ? "OK" : "OOPS" )
+ print ++test ": " ( ($2 != $1) ? "OK" : "OOPS" )
+ print ++test ": " ( ($3 == 0) ? "OK" : "OOPS" )
+ print ++test ": " ( ($3 == $1) ? "OK" : "OOPS" )
+ print ++test ": " ( ($2 != $4) ? "OK" : "OOPS" ) # 15
+}
+{
+ a = "+2"
+ b = 2
+ if (NR % 2)
+ c = a + b
+ print ++test ": " ( (a != b) ? "OK" : "OOPS" ) # 16 and 22
+
+ d = "2a"
+ b = 2
+ if (NR % 2)
+ c = d + b
+ print ++test ": " ( (d != b) ? "OK" : "OOPS" )
+
+ print ++test ": " ( (d + 0 == b) ? "OK" : "OOPS" )
+
+ e = "2"
+ print ++test ": " ( (e == b "") ? "OK" : "OOPS" )
+
+ a = "2.13"
+ print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" )
+
+ a = "2.130000"
+ print ++test ": " ( (a != 2.13) ? "OK" : "OOPS" )
+
+ if (NR == 2) {
+ CONVFMT = "%.6f"
+ print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" )
+ }
+}'
diff --git a/gnu/usr.bin/awk/PROBLEMS b/gnu/usr.bin/awk/PROBLEMS
new file mode 100644
index 000000000000..3b7c5148bd8e
--- /dev/null
+++ b/gnu/usr.bin/awk/PROBLEMS
@@ -0,0 +1,6 @@
+This is a list of known problems in gawk 2.15.
+Hopefully they will all be fixed in the next major release of gawk.
+
+Please keep in mind that the code is still undergoing significant evolution.
+
+1. Gawk's printf is probably still not POSIX compliant.
diff --git a/gnu/usr.bin/awk/README b/gnu/usr.bin/awk/README
new file mode 100644
index 000000000000..f4bd3df806c8
--- /dev/null
+++ b/gnu/usr.bin/awk/README
@@ -0,0 +1,116 @@
+README:
+
+This is GNU Awk 2.15. It should be upwardly compatible with the
+System V Release 4 awk. It is almost completely compliant with draft 11.3
+of POSIX 1003.2.
+
+This release adds new features -- see NEWS for details.
+
+See the installation instructions, below.
+
+Known problems are given in the PROBLEMS file. Work to be done is
+described briefly in the FUTURES file. Verified ports are listed in
+the PORTS file. Changes in this version are summarized in the CHANGES file.
+Please read the LIMITATIONS and ACKNOWLEDGMENT files.
+
+Read the file POSIX for a discussion of how the standard says comparisons
+should be done vs. how they really should be done and how gawk does them.
+
+To format the documentation with TeX, you must use texinfo.tex 2.53
+or later. Otherwise footnotes look unacceptable.
+
+If you wish to remake the Info files, you should use makeinfo. The 2.15
+version of makeinfo works with no errors.
+
+The man page is up to date.
+
+INSTALLATION:
+
+Check whether there is a system-specific README file for your system.
+
+Makefile.in may need some tailoring. The only changes necessary should
+be to change installation targets or to change compiler flags.
+The changes to make in Makefile.in are commented and should be obvious.
+
+All other changes should be made in a config file. Samples for
+various systems are included in the config directory. Starting with
+2.11, our intent has been to make the code conform to standards (ANSI,
+POSIX, SVID, in that order) whenever possible, and to not penalize
+standard conforming systems. We have included substitute versions of
+routines not universally available. Simply add the appropriate define
+for the missing feature(s) on your system.
+
+If you have neither bison nor yacc, use the awktab.c file here. It was
+generated with bison, and should have no AT&T code in it. (Note that
+modifying awk.y without bison or yacc will be difficult, at best. You might
+want to get a copy of bison from the FSF too.)
+
+If no config file is included for your system, start by copying one
+for a similar system. One way of determining the defines needed is to
+try to load gawk with nothing defined and see what routines are
+unresolved by the loader. This should give you a good idea of how to
+proceed.
+
+The next release will use the FSF autoconfig program, so we are no longer
+soliciting new config files.
+
+If you have an MS-DOS system, use the stuff in the pc directory.
+For an Atari there is an atari directory and similarly one for VMS.
+
+Chapter 16 of The GAWK Manual discusses configuration in detail.
+
+After successful compilation, do 'make test' to run a small test
+suite. There should be no output from the 'cmp' invocations except in
+the cases where there are small differences in floating point values.
+If there are other differences, please investigate and report the
+problem.
+
+PRINTING THE MANUAL
+
+The 'support' directory contains texinfo.tex 2.65, which will be necessary
+for printing the manual, and the texindex.c program from the texinfo
+distribution which is also necessary. See the makefile for the steps needed
+to get a DVI file from the manual.
+
+CAVEATS
+
+The existence of a patchlevel.h file does *N*O*T* imply a commitment on
+our part to issue bug fixes or patches. It is there in case we should
+decide to do so.
+
+BUG REPORTS AND FIXES (Un*x systems):
+
+Please coordinate changes through David Trueman and/or Arnold Robbins.
+
+David Trueman
+Department of Mathematics, Statistics and Computing Science,
+Dalhousie University, Halifax, Nova Scotia, Canada
+
+UUCP: {uunet utai watmath}!dalcs!david
+INTERNET: david@cs.dal.ca
+
+Arnold Robbins
+1736 Reindeer Drive
+Atlanta, GA, 30329, USA
+
+INTERNET: arnold@skeeve.atl.ga.us
+UUCP: { gatech, emory, emoryu1 }!skeeve!arnold
+
+BUG REPORTS AND FIXES (non-Unix ports):
+
+MS-DOS:
+ Scott Deifik
+ AMGEN Inc.
+ Amgen Center, Bldg.17-Dept.393
+ Thousand Oaks, CA 91320-1789
+ Tel-805-499-5725 ext.4677
+ Fax-805-498-0358
+ scottd@amgen.com
+
+VMS:
+ Pat Rankin
+ rankin@eql.caltech.edu (e-mail only)
+
+Atari ST:
+ Michal Jaegermann
+ NTOMCZAK@vm.ucs.UAlberta.CA (e-mail only)
diff --git a/gnu/usr.bin/awk/array.c b/gnu/usr.bin/awk/array.c
new file mode 100644
index 000000000000..59be340c04df
--- /dev/null
+++ b/gnu/usr.bin/awk/array.c
@@ -0,0 +1,293 @@
+/*
+ * array.c - routines for associative arrays.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1));
+
+NODE *
+concat_exp(tree)
+register NODE *tree;
+{
+ register NODE *r;
+ char *str;
+ char *s;
+ unsigned len;
+ int offset;
+ int subseplen;
+ char *subsep;
+
+ if (tree->type != Node_expression_list)
+ return force_string(tree_eval(tree));
+ r = force_string(tree_eval(tree->lnode));
+ if (tree->rnode == NULL)
+ return r;
+ subseplen = SUBSEP_node->lnode->stlen;
+ subsep = SUBSEP_node->lnode->stptr;
+ len = r->stlen + subseplen + 2;
+ emalloc(str, char *, len, "concat_exp");
+ memcpy(str, r->stptr, r->stlen+1);
+ s = str + r->stlen;
+ free_temp(r);
+ tree = tree->rnode;
+ while (tree) {
+ if (subseplen == 1)
+ *s++ = *subsep;
+ else {
+ memcpy(s, subsep, subseplen+1);
+ s += subseplen;
+ }
+ r = force_string(tree_eval(tree->lnode));
+ len += r->stlen + subseplen;
+ offset = s - str;
+ erealloc(str, char *, len, "concat_exp");
+ s = str + offset;
+ memcpy(s, r->stptr, r->stlen+1);
+ s += r->stlen;
+ free_temp(r);
+ tree = tree->rnode;
+ }
+ r = make_str_node(str, s - str, ALREADY_MALLOCED);
+ r->flags |= TEMP;
+ return r;
+}
+
+/* Flush all the values in symbol[] before doing a split() */
+void
+assoc_clear(symbol)
+NODE *symbol;
+{
+ int i;
+ NODE *bucket, *next;
+
+ if (symbol->var_array == 0)
+ return;
+ for (i = 0; i < HASHSIZE; i++) {
+ for (bucket = symbol->var_array[i]; bucket; bucket = next) {
+ next = bucket->ahnext;
+ unref(bucket->ahname);
+ unref(bucket->ahvalue);
+ freenode(bucket);
+ }
+ symbol->var_array[i] = 0;
+ }
+}
+
+/*
+ * calculate the hash function of the string in subs
+ */
+unsigned int
+hash(s, len)
+register char *s;
+register int len;
+{
+ register unsigned long h = 0, g;
+
+ while (len--) {
+ h = (h << 4) + *s++;
+ g = (h & 0xf0000000);
+ if (g) {
+ h = h ^ (g >> 24);
+ h = h ^ g;
+ }
+ }
+ if (h < HASHSIZE)
+ return h;
+ else
+ return h%HASHSIZE;
+}
+
+/*
+ * locate symbol[subs]
+ */
+static NODE * /* NULL if not found */
+assoc_find(symbol, subs, hash1)
+NODE *symbol;
+register NODE *subs;
+int hash1;
+{
+ register NODE *bucket, *prev = 0;
+
+ for (bucket = symbol->var_array[hash1]; bucket; bucket = bucket->ahnext) {
+ if (cmp_nodes(bucket->ahname, subs) == 0) {
+ if (prev) { /* move found to front of chain */
+ prev->ahnext = bucket->ahnext;
+ bucket->ahnext = symbol->var_array[hash1];
+ symbol->var_array[hash1] = bucket;
+ }
+ return bucket;
+ } else
+ prev = bucket; /* save previous list entry */
+ }
+ return NULL;
+}
+
+/*
+ * test whether the array element symbol[subs] exists or not
+ */
+int
+in_array(symbol, subs)
+NODE *symbol, *subs;
+{
+ register int hash1;
+
+ if (symbol->type == Node_param_list)
+ symbol = stack_ptr[symbol->param_cnt];
+ if (symbol->var_array == 0)
+ return 0;
+ subs = concat_exp(subs); /* concat_exp returns a string node */
+ hash1 = hash(subs->stptr, subs->stlen);
+ if (assoc_find(symbol, subs, hash1) == NULL) {
+ free_temp(subs);
+ return 0;
+ } else {
+ free_temp(subs);
+ return 1;
+ }
+}
+
+/*
+ * SYMBOL is the address of the node (or other pointer) being dereferenced.
+ * SUBS is a number or string used as the subscript.
+ *
+ * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it
+ * isn't there. Returns a pointer ala get_lhs to where its value is stored
+ */
+NODE **
+assoc_lookup(symbol, subs)
+NODE *symbol, *subs;
+{
+ register int hash1;
+ register NODE *bucket;
+
+ (void) force_string(subs);
+ hash1 = hash(subs->stptr, subs->stlen);
+
+ if (symbol->var_array == 0) { /* this table really should grow
+ * dynamically */
+ unsigned size;
+
+ size = sizeof(NODE *) * HASHSIZE;
+ emalloc(symbol->var_array, NODE **, size, "assoc_lookup");
+ memset((char *)symbol->var_array, 0, size);
+ symbol->type = Node_var_array;
+ } else {
+ bucket = assoc_find(symbol, subs, hash1);
+ if (bucket != NULL) {
+ free_temp(subs);
+ return &(bucket->ahvalue);
+ }
+ }
+
+ /* It's not there, install it. */
+ if (do_lint && subs->stlen == 0)
+ warning("subscript of array `%s' is null string",
+ symbol->vname);
+ getnode(bucket);
+ bucket->type = Node_ahash;
+ if (subs->flags & TEMP)
+ bucket->ahname = dupnode(subs);
+ else {
+ unsigned int saveflags = subs->flags;
+
+ subs->flags &= ~MALLOC;
+ bucket->ahname = dupnode(subs);
+ subs->flags = saveflags;
+ }
+ free_temp(subs);
+
+ /* array subscripts are strings */
+ bucket->ahname->flags &= ~NUMBER;
+ bucket->ahname->flags |= STRING;
+ bucket->ahvalue = Nnull_string;
+ bucket->ahnext = symbol->var_array[hash1];
+ symbol->var_array[hash1] = bucket;
+ return &(bucket->ahvalue);
+}
+
+void
+do_delete(symbol, tree)
+NODE *symbol, *tree;
+{
+ register int hash1;
+ register NODE *bucket, *last;
+ NODE *subs;
+
+ if (symbol->type == Node_param_list)
+ symbol = stack_ptr[symbol->param_cnt];
+ if (symbol->var_array == 0)
+ return;
+ subs = concat_exp(tree); /* concat_exp returns string node */
+ hash1 = hash(subs->stptr, subs->stlen);
+
+ last = NULL;
+ for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext)
+ if (cmp_nodes(bucket->ahname, subs) == 0)
+ break;
+ free_temp(subs);
+ if (bucket == NULL)
+ return;
+ if (last)
+ last->ahnext = bucket->ahnext;
+ else
+ symbol->var_array[hash1] = bucket->ahnext;
+ unref(bucket->ahname);
+ unref(bucket->ahvalue);
+ freenode(bucket);
+}
+
+void
+assoc_scan(symbol, lookat)
+NODE *symbol;
+struct search *lookat;
+{
+ if (!symbol->var_array) {
+ lookat->retval = NULL;
+ return;
+ }
+ lookat->arr_ptr = symbol->var_array;
+ lookat->arr_end = lookat->arr_ptr + HASHSIZE; /* added */
+ lookat->bucket = symbol->var_array[0];
+ assoc_next(lookat);
+}
+
+void
+assoc_next(lookat)
+struct search *lookat;
+{
+ while (lookat->arr_ptr < lookat->arr_end) {
+ if (lookat->bucket != 0) {
+ lookat->retval = lookat->bucket->ahname;
+ lookat->bucket = lookat->bucket->ahnext;
+ return;
+ }
+ lookat->arr_ptr++;
+ if (lookat->arr_ptr < lookat->arr_end)
+ lookat->bucket = *(lookat->arr_ptr);
+ else
+ lookat->retval = NULL;
+ }
+ return;
+}
diff --git a/gnu/usr.bin/awk/awk.1 b/gnu/usr.bin/awk/awk.1
new file mode 100644
index 000000000000..0338485e8db8
--- /dev/null
+++ b/gnu/usr.bin/awk/awk.1
@@ -0,0 +1,1873 @@
+.ds PX \s-1POSIX\s+1
+.ds UX \s-1UNIX\s+1
+.ds AN \s-1ANSI\s+1
+.TH GAWK 1 "Apr 15 1993" "Free Software Foundation" "Utility Commands"
+.SH NAME
+gawk \- pattern scanning and processing language
+.SH SYNOPSIS
+.B gawk
+[ POSIX or GNU style options ]
+.B \-f
+.I program-file
+[
+.B \-\^\-
+] file .\^.\^.
+.br
+.B gawk
+[ POSIX or GNU style options ]
+[
+.B \-\^\-
+]
+.I program-text
+file .\^.\^.
+.SH DESCRIPTION
+.I Gawk
+is the GNU Project's implementation of the AWK programming language.
+It conforms to the definition of the language in
+the \*(PX 1003.2 Command Language And Utilities Standard.
+This version in turn is based on the description in
+.IR "The AWK Programming Language" ,
+by Aho, Kernighan, and Weinberger,
+with the additional features defined in the System V Release 4 version
+of \*(UX
+.IR awk .
+.I Gawk
+also provides some GNU-specific extensions.
+.PP
+The command line consists of options to
+.I gawk
+itself, the AWK program text (if not supplied via the
+.B \-f
+or
+.B \-\^\-file
+options), and values to be made
+available in the
+.B ARGC
+and
+.B ARGV
+pre-defined AWK variables.
+.SH OPTIONS
+.PP
+.I Gawk
+options may be either the traditional \*(PX one letter options,
+or the GNU style long options. \*(PX style options start with a single ``\-'',
+while GNU long options start with ``\-\^\-''.
+GNU style long options are provided for both GNU-specific features and
+for \*(PX mandated features. Other implementations of the AWK language
+are likely to only accept the traditional one letter options.
+.PP
+Following the \*(PX standard,
+.IR gawk -specific
+options are supplied via arguments to the
+.B \-W
+option. Multiple
+.B \-W
+options may be supplied, or multiple arguments may be supplied together
+if they are separated by commas, or enclosed in quotes and separated
+by white space.
+Case is ignored in arguments to the
+.B \-W
+option.
+Each
+.B \-W
+option has a corresponding GNU style long option, as detailed below.
+.PP
+.I Gawk
+accepts the following options.
+.TP
+.PD 0
+.BI \-F " fs"
+.TP
+.PD
+.BI \-\^\-field-separator= fs
+Use
+.I fs
+for the input field separator (the value of the
+.B FS
+predefined
+variable).
+.TP
+.PD 0
+\fB\-v\fI var\fB\^=\^\fIval\fR
+.TP
+.PD
+\fB\-\^\-assign=\fIvar\fB\^=\^\fIval\fR
+Assign the value
+.IR val ,
+to the variable
+.IR var ,
+before execution of the program begins.
+Such variable values are available to the
+.B BEGIN
+block of an AWK program.
+.TP
+.PD 0
+.BI \-f " program-file"
+.TP
+.PD
+.BI \-\^\-file= program-file
+Read the AWK program source from the file
+.IR program-file ,
+instead of from the first command line argument.
+Multiple
+.B \-f
+(or
+.BR \-\^\-file )
+options may be used.
+.TP \w'\fB\-\^\-copyright\fR'u+1n
+.PD 0
+.B "\-W compat"
+.TP
+.PD
+.B \-\^\-compat
+Run in
+.I compatibility
+mode. In compatibility mode,
+.I gawk
+behaves identically to \*(UX
+.IR awk ;
+none of the GNU-specific extensions are recognized.
+See
+.BR "GNU EXTENSIONS" ,
+below, for more information.
+.TP
+.PD 0
+.B "\-W copyleft"
+.TP
+.PD 0
+.B "\-W copyright"
+.TP
+.PD 0
+.B \-\^\-copyleft
+.TP
+.PD
+.B \-\^\-copyright
+Print the short version of the GNU copyright information message on
+the error output.
+.TP
+.PD 0
+.B "\-W help"
+.TP
+.PD 0
+.B "\-W usage"
+.TP
+.PD 0
+.B \-\^\-help
+.TP
+.PD
+.B \-\^\-usage
+Print a relatively short summary of the available options on
+the error output.
+.TP
+.PD 0
+.B "\-W lint"
+.TP
+.PD 0
+.B \-\^\-lint
+Provide warnings about constructs that are
+dubious or non-portable to other AWK implementations.
+.ig
+.\" This option is left undocumented, on purpose.
+.TP
+.PD 0
+.B "\-W nostalgia"
+.TP
+.PD
+.B \-\^\-nostalgia
+Provide a moment of nostalgia for long time
+.I awk
+users.
+..
+.TP
+.PD 0
+.B "\-W posix"
+.TP
+.PD
+.B \-\^\-posix
+This turns on
+.I compatibility
+mode, with the following additional restrictions:
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+.B \ex
+escape sequences are not recognized.
+.TP
+\(bu
+The synonym
+.B func
+for the keyword
+.B function
+is not recognized.
+.TP
+\(bu
+The operators
+.B **
+and
+.B **=
+cannot be used in place of
+.B ^
+and
+.BR ^= .
+.RE
+.TP
+.PD 0
+.BI "\-W source=" program-text
+.TP
+.PD
+.BI \-\^\-source= program-text
+Use
+.I program-text
+as AWK program source code.
+This option allows the easy intermixing of library functions (used via the
+.B \-f
+and
+.B \-\^\-file
+options) with source code entered on the command line.
+It is intended primarily for medium to large size AWK programs used
+in shell scripts.
+.sp .5
+The
+.B "\-W source="
+form of this option uses the rest of the command line argument for
+.IR program-text ;
+no other options to
+.B \-W
+will be recognized in the same argument.
+.TP
+.PD 0
+.B "\-W version"
+.TP
+.PD
+.B \-\^\-version
+Print version information for this particular copy of
+.I gawk
+on the error output.
+This is useful mainly for knowing if the current copy of
+.I gawk
+on your system
+is up to date with respect to whatever the Free Software Foundation
+is distributing.
+.TP
+.B \-\^\-
+Signal the end of options. This is useful to allow further arguments to the
+AWK program itself to start with a ``\-''.
+This is mainly for consistency with the argument parsing convention used
+by most other \*(PX programs.
+.PP
+Any other options are flagged as illegal, but are otherwise ignored.
+.SH AWK PROGRAM EXECUTION
+.PP
+An AWK program consists of a sequence of pattern-action statements
+and optional function definitions.
+.RS
+.PP
+\fIpattern\fB { \fIaction statements\fB }\fR
+.br
+\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR
+.RE
+.PP
+.I Gawk
+first reads the program source from the
+.IR program-file (s)
+if specified, or from the first non-option argument on the command line.
+The
+.B \-f
+option may be used multiple times on the command line.
+.I Gawk
+will read the program text as if all the
+.IR program-file s
+had been concatenated together. This is useful for building libraries
+of AWK functions, without having to include them in each new AWK
+program that uses them. To use a library function in a file from a
+program typed in on the command line, specify
+.B /dev/tty
+as one of the
+.IR program-file s,
+type your program, and end it with a
+.B ^D
+(control-d).
+.PP
+The environment variable
+.B AWKPATH
+specifies a search path to use when finding source files named with
+the
+.B \-f
+option. If this variable does not exist, the default path is
+\fB".:/usr/lib/awk:/usr/local/lib/awk"\fR.
+If a file name given to the
+.B \-f
+option contains a ``/'' character, no path search is performed.
+.PP
+.I Gawk
+executes AWK programs in the following order.
+First,
+.I gawk
+compiles the program into an internal form.
+Next, all variable assignments specified via the
+.B \-v
+option are performed. Then,
+.I gawk
+executes the code in the
+.B BEGIN
+block(s) (if any),
+and then proceeds to read
+each file named in the
+.B ARGV
+array.
+If there are no files named on the command line,
+.I gawk
+reads the standard input.
+.PP
+If a filename on the command line has the form
+.IB var = val
+it is treated as a variable assignment. The variable
+.I var
+will be assigned the value
+.IR val .
+(This happens after any
+.B BEGIN
+block(s) have been run.)
+Command line variable assignment
+is most useful for dynamically assigning values to the variables
+AWK uses to control how input is broken into fields and records. It
+is also useful for controlling state if multiple passes are needed over
+a single data file.
+.PP
+If the value of a particular element of
+.B ARGV
+is empty (\fB""\fR),
+.I gawk
+skips over it.
+.PP
+For each line in the input,
+.I gawk
+tests to see if it matches any
+.I pattern
+in the AWK program.
+For each pattern that the line matches, the associated
+.I action
+is executed.
+The patterns are tested in the order they occur in the program.
+.PP
+Finally, after all the input is exhausted,
+.I gawk
+executes the code in the
+.B END
+block(s) (if any).
+.SH VARIABLES AND FIELDS
+AWK variables are dynamic; they come into existence when they are
+first used. Their values are either floating-point numbers or strings,
+or both,
+depending upon how they are used. AWK also has one dimension
+arrays; multiply dimensioned arrays may be simulated.
+Several pre-defined variables are set as a program
+runs; these will be described as needed and summarized below.
+.SS Fields
+.PP
+As each input line is read,
+.I gawk
+splits the line into
+.IR fields ,
+using the value of the
+.B FS
+variable as the field separator.
+If
+.B FS
+is a single character, fields are separated by that character.
+Otherwise,
+.B FS
+is expected to be a full regular expression.
+In the special case that
+.B FS
+is a single blank, fields are separated
+by runs of blanks and/or tabs.
+Note that the value of
+.B IGNORECASE
+(see below) will also affect how fields are split when
+.B FS
+is a regular expression.
+.PP
+If the
+.B FIELDWIDTHS
+variable is set to a space separated list of numbers, each field is
+expected to have fixed width, and
+.I gawk
+will split up the record using the specified widths. The value of
+.B FS
+is ignored.
+Assigning a new value to
+.B FS
+overrides the use of
+.BR FIELDWIDTHS ,
+and restores the default behavior.
+.PP
+Each field in the input line may be referenced by its position,
+.BR $1 ,
+.BR $2 ,
+and so on.
+.B $0
+is the whole line. The value of a field may be assigned to as well.
+Fields need not be referenced by constants:
+.RS
+.PP
+.ft B
+n = 5
+.br
+print $n
+.ft R
+.RE
+.PP
+prints the fifth field in the input line.
+The variable
+.B NF
+is set to the total number of fields in the input line.
+.PP
+References to non-existent fields (i.e. fields after
+.BR $NF )
+produce the null-string. However, assigning to a non-existent field
+(e.g.,
+.BR "$(NF+2) = 5" )
+will increase the value of
+.BR NF ,
+create any intervening fields with the null string as their value, and
+cause the value of
+.B $0
+to be recomputed, with the fields being separated by the value of
+.BR OFS .
+.SS Built-in Variables
+.PP
+AWK's built-in variables are:
+.PP
+.TP \w'\fBFIELDWIDTHS\fR'u+1n
+.B ARGC
+The number of command line arguments (does not include options to
+.IR gawk ,
+or the program source).
+.TP
+.B ARGIND
+The index in
+.B ARGV
+of the current file being processed.
+.TP
+.B ARGV
+Array of command line arguments. The array is indexed from
+0 to
+.B ARGC
+\- 1.
+Dynamically changing the contents of
+.B ARGV
+can control the files used for data.
+.TP
+.B CONVFMT
+The conversion format for numbers, \fB"%.6g"\fR, by default.
+.TP
+.B ENVIRON
+An array containing the values of the current environment.
+The array is indexed by the environment variables, each element being
+the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be
+.BR /u/arnold ).
+Changing this array does not affect the environment seen by programs which
+.I gawk
+spawns via redirection or the
+.B system()
+function.
+(This may change in a future version of
+.IR gawk .)
+.\" but don't hold your breath...
+.TP
+.B ERRNO
+If a system error occurs either doing a redirection for
+.BR getline ,
+during a read for
+.BR getline ,
+or during a
+.BR close ,
+then
+.B ERRNO
+will contain
+a string describing the error.
+.TP
+.B FIELDWIDTHS
+A white-space separated list of fieldwidths. When set,
+.I gawk
+parses the input into fields of fixed width, instead of using the
+value of the
+.B FS
+variable as the field separator.
+The fixed field width facility is still experimental; expect the
+semantics to change as
+.I gawk
+evolves over time.
+.TP
+.B FILENAME
+The name of the current input file.
+If no files are specified on the command line, the value of
+.B FILENAME
+is ``\-''.
+.TP
+.B FNR
+The input record number in the current input file.
+.TP
+.B FS
+The input field separator, a blank by default.
+.TP
+.B IGNORECASE
+Controls the case-sensitivity of all regular expression operations. If
+.B IGNORECASE
+has a non-zero value, then pattern matching in rules,
+field splitting with
+.BR FS ,
+regular expression
+matching with
+.B ~
+and
+.BR !~ ,
+and the
+.BR gsub() ,
+.BR index() ,
+.BR match() ,
+.BR split() ,
+and
+.B sub()
+pre-defined functions will all ignore case when doing regular expression
+operations. Thus, if
+.B IGNORECASE
+is not equal to zero,
+.B /aB/
+matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP,
+and \fB"AB"\fP.
+As with all AWK variables, the initial value of
+.B IGNORECASE
+is zero, so all regular expression operations are normally case-sensitive.
+.TP
+.B NF
+The number of fields in the current input record.
+.TP
+.B NR
+The total number of input records seen so far.
+.TP
+.B OFMT
+The output format for numbers, \fB"%.6g"\fR, by default.
+.TP
+.B OFS
+The output field separator, a blank by default.
+.TP
+.B ORS
+The output record separator, by default a newline.
+.TP
+.B RS
+The input record separator, by default a newline.
+.B RS
+is exceptional in that only the first character of its string
+value is used for separating records.
+(This will probably change in a future release of
+.IR gawk .)
+If
+.B RS
+is set to the null string, then records are separated by
+blank lines.
+When
+.B RS
+is set to the null string, then the newline character always acts as
+a field separator, in addition to whatever value
+.B FS
+may have.
+.TP
+.B RSTART
+The index of the first character matched by
+.BR match() ;
+0 if no match.
+.TP
+.B RLENGTH
+The length of the string matched by
+.BR match() ;
+\-1 if no match.
+.TP
+.B SUBSEP
+The character used to separate multiple subscripts in array
+elements, by default \fB"\e034"\fR.
+.SS Arrays
+.PP
+Arrays are subscripted with an expression between square brackets
+.RB ( [ " and " ] ).
+If the expression is an expression list
+.RI ( expr ", " expr " ...)"
+then the array subscript is a string consisting of the
+concatenation of the (string) value of each expression,
+separated by the value of the
+.B SUBSEP
+variable.
+This facility is used to simulate multiply dimensioned
+arrays. For example:
+.PP
+.RS
+.ft B
+i = "A" ;\^ j = "B" ;\^ k = "C"
+.br
+x[i, j, k] = "hello, world\en"
+.ft R
+.RE
+.PP
+assigns the string \fB"hello, world\en"\fR to the element of the array
+.B x
+which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in AWK
+are associative, i.e. indexed by string values.
+.PP
+The special operator
+.B in
+may be used in an
+.B if
+or
+.B while
+statement to see if an array has an index consisting of a particular
+value.
+.PP
+.RS
+.ft B
+.nf
+if (val in array)
+ print array[val]
+.fi
+.ft
+.RE
+.PP
+If the array has multiple subscripts, use
+.BR "(i, j) in array" .
+.PP
+The
+.B in
+construct may also be used in a
+.B for
+loop to iterate over all the elements of an array.
+.PP
+An element may be deleted from an array using the
+.B delete
+statement.
+.SS Variable Typing And Conversion
+.PP
+Variables and fields
+may be (floating point) numbers, or strings, or both. How the
+value of a variable is interpreted depends upon its context. If used in
+a numeric expression, it will be treated as a number, if used as a string
+it will be treated as a string.
+.PP
+To force a variable to be treated as a number, add 0 to it; to force it
+to be treated as a string, concatenate it with the null string.
+.PP
+When a string must be converted to a number, the conversion is accomplished
+using
+.IR atof (3).
+A number is converted to a string by using the value of
+.B CONVFMT
+as a format string for
+.IR sprintf (3),
+with the numeric value of the variable as the argument.
+However, even though all numbers in AWK are floating-point,
+integral values are
+.I always
+converted as integers. Thus, given
+.PP
+.RS
+.ft B
+.nf
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+.fi
+.ft R
+.RE
+.PP
+the variable
+.B b
+has a value of \fB"12"\fR and not \fB"12.00"\fR.
+.PP
+.I Gawk
+performs comparisons as follows:
+If two variables are numeric, they are compared numerically.
+If one value is numeric and the other has a string value that is a
+``numeric string,'' then comparisons are also done numerically.
+Otherwise, the numeric value is converted to a string and a string
+comparison is performed.
+Two strings are compared, of course, as strings.
+According to the \*(PX standard, even if two strings are
+numeric strings, a numeric comparison is performed. However, this is
+clearly incorrect, and
+.I gawk
+does not do this.
+.PP
+Uninitialized variables have the numeric value 0 and the string value ""
+(the null, or empty, string).
+.SH PATTERNS AND ACTIONS
+AWK is a line oriented language. The pattern comes first, and then the
+action. Action statements are enclosed in
+.B {
+and
+.BR } .
+Either the pattern may be missing, or the action may be missing, but,
+of course, not both. If the pattern is missing, the action will be
+executed for every single line of input.
+A missing action is equivalent to
+.RS
+.PP
+.B "{ print }"
+.RE
+.PP
+which prints the entire line.
+.PP
+Comments begin with the ``#'' character, and continue until the
+end of the line.
+Blank lines may be used to separate statements.
+Normally, a statement ends with a newline, however, this is not the
+case for lines ending in
+a ``,'', ``{'', ``?'', ``:'', ``&&'', or ``||''.
+Lines ending in
+.B do
+or
+.B else
+also have their statements automatically continued on the following line.
+In other cases, a line can be continued by ending it with a ``\e'',
+in which case the newline will be ignored.
+.PP
+Multiple statements may
+be put on one line by separating them with a ``;''.
+This applies to both the statements within the action part of a
+pattern-action pair (the usual case),
+and to the pattern-action statements themselves.
+.SS Patterns
+AWK patterns may be one of the following:
+.PP
+.RS
+.nf
+.B BEGIN
+.B END
+.BI / "regular expression" /
+.I "relational expression"
+.IB pattern " && " pattern
+.IB pattern " || " pattern
+.IB pattern " ? " pattern " : " pattern
+.BI ( pattern )
+.BI ! " pattern"
+.IB pattern1 ", " pattern2
+.fi
+.RE
+.PP
+.B BEGIN
+and
+.B END
+are two special kinds of patterns which are not tested against
+the input.
+The action parts of all
+.B BEGIN
+patterns are merged as if all the statements had
+been written in a single
+.B BEGIN
+block. They are executed before any
+of the input is read. Similarly, all the
+.B END
+blocks are merged,
+and executed when all the input is exhausted (or when an
+.B exit
+statement is executed).
+.B BEGIN
+and
+.B END
+patterns cannot be combined with other patterns in pattern expressions.
+.B BEGIN
+and
+.B END
+patterns cannot have missing action parts.
+.PP
+For
+.BI / "regular expression" /
+patterns, the associated statement is executed for each input line that matches
+the regular expression.
+Regular expressions are the same as those in
+.IR egrep (1),
+and are summarized below.
+.PP
+A
+.I "relational expression"
+may use any of the operators defined below in the section on actions.
+These generally test whether certain fields match certain regular expressions.
+.PP
+The
+.BR && ,
+.BR || ,
+and
+.B !
+operators are logical AND, logical OR, and logical NOT, respectively, as in C.
+They do short-circuit evaluation, also as in C, and are used for combining
+more primitive pattern expressions. As in most languages, parentheses
+may be used to change the order of evaluation.
+.PP
+The
+.B ?\^:
+operator is like the same operator in C. If the first pattern is true
+then the pattern used for testing is the second pattern, otherwise it is
+the third. Only one of the second and third patterns is evaluated.
+.PP
+The
+.IB pattern1 ", " pattern2
+form of an expression is called a range pattern.
+It matches all input records starting with a line that matches
+.IR pattern1 ,
+and continuing until a record that matches
+.IR pattern2 ,
+inclusive. It does not combine with any other sort of pattern expression.
+.SS Regular Expressions
+Regular expressions are the extended kind found in
+.IR egrep .
+They are composed of characters as follows:
+.TP \w'\fB[^\fIabc...\fB]\fR'u+2n
+.I c
+matches the non-metacharacter
+.IR c .
+.TP
+.I \ec
+matches the literal character
+.IR c .
+.TP
+.B .
+matches any character except newline.
+.TP
+.B ^
+matches the beginning of a line or a string.
+.TP
+.B $
+matches the end of a line or a string.
+.TP
+.BI [ abc... ]
+character class, matches any of the characters
+.IR abc... .
+.TP
+.BI [^ abc... ]
+negated character class, matches any character except
+.I abc...
+and newline.
+.TP
+.IB r1 | r2
+alternation: matches either
+.I r1
+or
+.IR r2 .
+.TP
+.I r1r2
+concatenation: matches
+.IR r1 ,
+and then
+.IR r2 .
+.TP
+.IB r +
+matches one or more
+.IR r 's.
+.TP
+.IB r *
+matches zero or more
+.IR r 's.
+.TP
+.IB r ?
+matches zero or one
+.IR r 's.
+.TP
+.BI ( r )
+grouping: matches
+.IR r .
+.PP
+The escape sequences that are valid in string constants (see below)
+are also legal in regular expressions.
+.SS Actions
+Action statements are enclosed in braces,
+.B {
+and
+.BR } .
+Action statements consist of the usual assignment, conditional, and looping
+statements found in most languages. The operators, control statements,
+and input/output statements
+available are patterned after those in C.
+.SS Operators
+.PP
+The operators in AWK, in order of increasing precedence, are
+.PP
+.TP "\w'\fB*= /= %= ^=\fR'u+1n"
+.PD 0
+.B "= += \-="
+.TP
+.PD
+.B "*= /= %= ^="
+Assignment. Both absolute assignment
+.BI ( var " = " value )
+and operator-assignment (the other forms) are supported.
+.TP
+.B ?:
+The C conditional expression. This has the form
+.IB expr1 " ? " expr2 " : " expr3\c
+\&. If
+.I expr1
+is true, the value of the expression is
+.IR expr2 ,
+otherwise it is
+.IR expr3 .
+Only one of
+.I expr2
+and
+.I expr3
+is evaluated.
+.TP
+.B ||
+Logical OR.
+.TP
+.B &&
+Logical AND.
+.TP
+.B "~ !~"
+Regular expression match, negated match.
+.B NOTE:
+Do not use a constant regular expression
+.RB ( /foo/ )
+on the left-hand side of a
+.B ~
+or
+.BR !~ .
+Only use one on the right-hand side. The expression
+.BI "/foo/ ~ " exp
+has the same meaning as \fB(($0 ~ /foo/) ~ \fIexp\fB)\fR.
+This is usually
+.I not
+what was intended.
+.TP
+.PD 0
+.B "< >"
+.TP
+.PD 0
+.B "<= >="
+.TP
+.PD
+.B "!= =="
+The regular relational operators.
+.TP
+.I blank
+String concatenation.
+.TP
+.B "+ \-"
+Addition and subtraction.
+.TP
+.B "* / %"
+Multiplication, division, and modulus.
+.TP
+.B "+ \- !"
+Unary plus, unary minus, and logical negation.
+.TP
+.B ^
+Exponentiation (\fB**\fR may also be used, and \fB**=\fR for
+the assignment operator).
+.TP
+.B "++ \-\^\-"
+Increment and decrement, both prefix and postfix.
+.TP
+.B $
+Field reference.
+.SS Control Statements
+.PP
+The control statements are
+as follows:
+.PP
+.RS
+.nf
+\fBif (\fIcondition\fB) \fIstatement\fR [ \fBelse\fI statement \fR]
+\fBwhile (\fIcondition\fB) \fIstatement \fR
+\fBdo \fIstatement \fBwhile (\fIcondition\fB)\fR
+\fBfor (\fIexpr1\fB; \fIexpr2\fB; \fIexpr3\fB) \fIstatement\fR
+\fBfor (\fIvar \fBin\fI array\fB) \fIstatement\fR
+\fBbreak\fR
+\fBcontinue\fR
+\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR
+\fBexit\fR [ \fIexpression\fR ]
+\fB{ \fIstatements \fB}
+.fi
+.RE
+.SS "I/O Statements"
+.PP
+The input/output statements are as follows:
+.PP
+.TP "\w'\fBprintf \fIfmt, expr-list\fR'u+1n"
+.BI close( filename )
+Close file (or pipe, see below).
+.TP
+.B getline
+Set
+.B $0
+from next input record; set
+.BR NF ,
+.BR NR ,
+.BR FNR .
+.TP
+.BI "getline <" file
+Set
+.B $0
+from next record of
+.IR file ;
+set
+.BR NF .
+.TP
+.BI getline " var"
+Set
+.I var
+from next input record; set
+.BR NF ,
+.BR FNR .
+.TP
+.BI getline " var" " <" file
+Set
+.I var
+from next record of
+.IR file .
+.TP
+.B next
+Stop processing the current input record. The next input record
+is read and processing starts over with the first pattern in the
+AWK program. If the end of the input data is reached, the
+.B END
+block(s), if any, are executed.
+.TP
+.B "next file"
+Stop processing the current input file. The next input record read
+comes from the next input file.
+.B FILENAME
+is updated,
+.B FNR
+is reset to 1, and processing starts over with the first pattern in the
+AWK program. If the end of the input data is reached, the
+.B END
+block(s), if any, are executed.
+.TP
+.B print
+Prints the current record.
+.TP
+.BI print " expr-list"
+Prints expressions.
+.TP
+.BI print " expr-list" " >" file
+Prints expressions on
+.IR file .
+.TP
+.BI printf " fmt, expr-list"
+Format and print.
+.TP
+.BI printf " fmt, expr-list" " >" file
+Format and print on
+.IR file .
+.TP
+.BI system( cmd-line )
+Execute the command
+.IR cmd-line ,
+and return the exit status.
+(This may not be available on non-\*(PX systems.)
+.PP
+Other input/output redirections are also allowed. For
+.B print
+and
+.BR printf ,
+.BI >> file
+appends output to the
+.IR file ,
+while
+.BI | " command"
+writes on a pipe.
+In a similar fashion,
+.IB command " | getline"
+pipes into
+.BR getline .
+.BR Getline
+will return 0 on end of file, and \-1 on an error.
+.SS The \fIprintf\fP\^ Statement
+.PP
+The AWK versions of the
+.B printf
+statement and
+.B sprintf()
+function
+(see below)
+accept the following conversion specification formats:
+.TP
+.B %c
+An \s-1ASCII\s+1 character.
+If the argument used for
+.B %c
+is numeric, it is treated as a character and printed.
+Otherwise, the argument is assumed to be a string, and the only first
+character of that string is printed.
+.TP
+.B %d
+A decimal number (the integer part).
+.TP
+.B %i
+Just like
+.BR %d .
+.TP
+.B %e
+A floating point number of the form
+.BR [\-]d.ddddddE[+\^\-]dd .
+.TP
+.B %f
+A floating point number of the form
+.BR [\-]ddd.dddddd .
+.TP
+.B %g
+Use
+.B e
+or
+.B f
+conversion, whichever is shorter, with nonsignificant zeros suppressed.
+.TP
+.B %o
+An unsigned octal number (again, an integer).
+.TP
+.B %s
+A character string.
+.TP
+.B %x
+An unsigned hexadecimal number (an integer).
+.TP
+.B %X
+Like
+.BR %x ,
+but using
+.B ABCDEF
+instead of
+.BR abcdef .
+.TP
+.B %%
+A single
+.B %
+character; no argument is converted.
+.PP
+There are optional, additional parameters that may lie between the
+.B %
+and the control letter:
+.TP
+.B \-
+The expression should be left-justified within its field.
+.TP
+.I width
+The field should be padded to this width. If the number has a leading
+zero, then the field will be padded with zeros.
+Otherwise it is padded with blanks.
+.TP
+.BI . prec
+A number indicating the maximum width of strings or digits to the right
+of the decimal point.
+.PP
+The dynamic
+.I width
+and
+.I prec
+capabilities of the \*(AN C
+.B printf()
+routines are supported.
+A
+.B *
+in place of either the
+.B width
+or
+.B prec
+specifications will cause their values to be taken from
+the argument list to
+.B printf
+or
+.BR sprintf() .
+.SS Special File Names
+.PP
+When doing I/O redirection from either
+.B print
+or
+.B printf
+into a file,
+or via
+.B getline
+from a file,
+.I gawk
+recognizes certain special filenames internally. These filenames
+allow access to open file descriptors inherited from
+.IR gawk 's
+parent process (usually the shell).
+Other special filenames provide access information about the running
+.B gawk
+process.
+The filenames are:
+.TP \w'\fB/dev/stdout\fR'u+1n
+.B /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+.TP
+.B /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+.TP
+.B /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+.TP
+.B /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with blanks.
+.B $1
+is the value of the
+.IR getuid (2)
+system call,
+.B $2
+is the value of the
+.IR geteuid (2)
+system call,
+.B $3
+is the value of the
+.IR getgid (2)
+system call, and
+.B $4
+is the value of the
+.IR getegid (2)
+system call.
+If there are any additional fields, they are the group IDs returned by
+.IR getgroups (2).
+(Multiple groups may not be supported on all systems.)
+.TP
+.B /dev/stdin
+The standard input.
+.TP
+.B /dev/stdout
+The standard output.
+.TP
+.B /dev/stderr
+The standard error output.
+.TP
+.BI /dev/fd/\^ n
+The file associated with the open file descriptor
+.IR n .
+.PP
+These are particularly useful for error messages. For example:
+.PP
+.RS
+.ft B
+print "You blew it!" > "/dev/stderr"
+.ft R
+.RE
+.PP
+whereas you would otherwise have to use
+.PP
+.RS
+.ft B
+print "You blew it!" | "cat 1>&2"
+.ft R
+.RE
+.PP
+These file names may also be used on the command line to name data files.
+.SS Numeric Functions
+.PP
+AWK has the following pre-defined arithmetic functions:
+.PP
+.TP \w'\fBsrand(\^\fIexpr\^\fB)\fR'u+1n
+.BI atan2( y , " x" )
+returns the arctangent of
+.I y/x
+in radians.
+.TP
+.BI cos( expr )
+returns the cosine in radians.
+.TP
+.BI exp( expr )
+the exponential function.
+.TP
+.BI int( expr )
+truncates to integer.
+.TP
+.BI log( expr )
+the natural logarithm function.
+.TP
+.B rand()
+returns a random number between 0 and 1.
+.TP
+.BI sin( expr )
+returns the sine in radians.
+.TP
+.BI sqrt( expr )
+the square root function.
+.TP
+.BI srand( expr )
+use
+.I expr
+as a new seed for the random number generator. If no
+.I expr
+is provided, the time of day will be used.
+The return value is the previous seed for the random
+number generator.
+.SS String Functions
+.PP
+AWK has the following pre-defined string functions:
+.PP
+.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
+\fBgsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR
+for each substring matching the regular expression
+.I r
+in the string
+.IR t ,
+substitute the string
+.IR s ,
+and return the number of substitutions.
+If
+.I t
+is not supplied, use
+.BR $0 .
+.TP
+.BI index( s , " t" )
+returns the index of the string
+.I t
+in the string
+.IR s ,
+or 0 if
+.I t
+is not present.
+.TP
+.BI length( s )
+returns the length of the string
+.IR s ,
+or the length of
+.B $0
+if
+.I s
+is not supplied.
+.TP
+.BI match( s , " r" )
+returns the position in
+.I s
+where the regular expression
+.I r
+occurs, or 0 if
+.I r
+is not present, and sets the values of
+.B RSTART
+and
+.BR RLENGTH .
+.TP
+\fBsplit(\fIs\fB, \fIa\fB, \fIr\fB)\fR
+splits the string
+.I s
+into the array
+.I a
+on the regular expression
+.IR r ,
+and returns the number of fields. If
+.I r
+is omitted,
+.B FS
+is used instead.
+.TP
+.BI sprintf( fmt , " expr-list" )
+prints
+.I expr-list
+according to
+.IR fmt ,
+and returns the resulting string.
+.TP
+\fBsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR
+just like
+.BR gsub() ,
+but only the first matching substring is replaced.
+.TP
+\fBsubstr(\fIs\fB, \fIi\fB, \fIn\fB)\fR
+returns the
+.IR n -character
+substring of
+.I s
+starting at
+.IR i .
+If
+.I n
+is omitted, the rest of
+.I s
+is used.
+.TP
+.BI tolower( str )
+returns a copy of the string
+.IR str ,
+with all the upper-case characters in
+.I str
+translated to their corresponding lower-case counterparts.
+Non-alphabetic characters are left unchanged.
+.TP
+.BI toupper( str )
+returns a copy of the string
+.IR str ,
+with all the lower-case characters in
+.I str
+translated to their corresponding upper-case counterparts.
+Non-alphabetic characters are left unchanged.
+.SS Time Functions
+.PP
+Since one of the primary uses of AWK programs is processing log files
+that contain time stamp information,
+.I gawk
+provides the following two functions for obtaining time stamps and
+formatting them.
+.PP
+.TP "\w'\fBsystime()\fR'u+1n"
+.B systime()
+returns the current time of day as the number of seconds since the Epoch
+(Midnight UTC, January 1, 1970 on \*(PX systems).
+.TP
+\fBstrftime(\fIformat\fR, \fItimestamp\fB)\fR
+formats
+.I timestamp
+according to the specification in
+.IR format.
+The
+.I timestamp
+should be of the same form as returned by
+.BR systime() .
+If
+.I timestamp
+is missing, the current time of day is used.
+See the specification for the
+.B strftime()
+function in \*(AN C for the format conversions that are
+guaranteed to be available.
+A public-domain version of
+.IR strftime (3)
+and a man page for it are shipped with
+.IR gawk ;
+if that version was used to build
+.IR gawk ,
+then all of the conversions described in that man page are available to
+.IR gawk.
+.SS String Constants
+.PP
+String constants in AWK are sequences of characters enclosed
+between double quotes (\fB"\fR). Within strings, certain
+.I "escape sequences"
+are recognized, as in C. These are:
+.PP
+.TP \w'\fB\e\^\fIddd\fR'u+1n
+.B \e\e
+A literal backslash.
+.TP
+.B \ea
+The ``alert'' character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character.
+.TP
+.B \eb
+backspace.
+.TP
+.B \ef
+form-feed.
+.TP
+.B \en
+new line.
+.TP
+.B \er
+carriage return.
+.TP
+.B \et
+horizontal tab.
+.TP
+.B \ev
+vertical tab.
+.TP
+.BI \ex "\^hex digits"
+The character represented by the string of hexadecimal digits following
+the
+.BR \ex .
+As in \*(AN C, all following hexadecimal digits are considered part of
+the escape sequence.
+(This feature should tell us something about language design by committee.)
+E.g., "\ex1B" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+.TP
+.BI \e ddd
+The character represented by the 1-, 2-, or 3-digit sequence of octal
+digits. E.g. "\e033" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+.TP
+.BI \e c
+The literal character
+.IR c\^ .
+.PP
+The escape sequences may also be used inside constant regular expressions
+(e.g.,
+.B "/[\ \et\ef\en\er\ev]/"
+matches whitespace characters).
+.SH FUNCTIONS
+Functions in AWK are defined as follows:
+.PP
+.RS
+\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR
+.RE
+.PP
+Functions are executed when called from within the action parts of regular
+pattern-action statements. Actual parameters supplied in the function
+call are used to instantiate the formal parameters declared in the function.
+Arrays are passed by reference, other variables are passed by value.
+.PP
+Since functions were not originally part of the AWK language, the provision
+for local variables is rather clumsy: They are declared as extra parameters
+in the parameter list. The convention is to separate local variables from
+real parameters by extra spaces in the parameter list. For example:
+.PP
+.RS
+.ft B
+.nf
+function f(p, q, a, b) { # a & b are local
+ ..... }
+
+/abc/ { ... ; f(1, 2) ; ... }
+.fi
+.ft R
+.RE
+.PP
+The left parenthesis in a function call is required
+to immediately follow the function name,
+without any intervening white space.
+This is to avoid a syntactic ambiguity with the concatenation operator.
+This restriction does not apply to the built-in functions listed above.
+.PP
+Functions may call each other and may be recursive.
+Function parameters used as local variables are initialized
+to the null string and the number zero upon function invocation.
+.PP
+The word
+.B func
+may be used in place of
+.BR function .
+.SH EXAMPLES
+.nf
+Print and sort the login names of all users:
+
+.ft B
+ BEGIN { FS = ":" }
+ { print $1 | "sort" }
+
+.ft R
+Count lines in a file:
+
+.ft B
+ { nlines++ }
+ END { print nlines }
+
+.ft R
+Precede each line by its number in the file:
+
+.ft B
+ { print FNR, $0 }
+
+.ft R
+Concatenate and line number (a variation on a theme):
+
+.ft B
+ { print NR, $0 }
+.ft R
+.fi
+.SH SEE ALSO
+.IR egrep (1)
+.PP
+.IR "The AWK Programming Language" ,
+Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
+Addison-Wesley, 1988. ISBN 0-201-07981-X.
+.PP
+.IR "The GAWK Manual" ,
+Edition 0.15, published by the Free Software Foundation, 1993.
+.SH POSIX COMPATIBILITY
+A primary goal for
+.I gawk
+is compatibility with the \*(PX standard, as well as with the
+latest version of \*(UX
+.IR awk .
+To this end,
+.I gawk
+incorporates the following user visible
+features which are not described in the AWK book,
+but are part of
+.I awk
+in System V Release 4, and are in the \*(PX standard.
+.PP
+The
+.B \-v
+option for assigning variables before program execution starts is new.
+The book indicates that command line variable assignment happens when
+.I awk
+would otherwise open the argument as a file, which is after the
+.B BEGIN
+block is executed. However, in earlier implementations, when such an
+assignment appeared before any file names, the assignment would happen
+.I before
+the
+.B BEGIN
+block was run. Applications came to depend on this ``feature.''
+When
+.I awk
+was changed to match its documentation, this option was added to
+accomodate applications that depended upon the old behavior.
+(This feature was agreed upon by both the AT&T and GNU developers.)
+.PP
+The
+.B \-W
+option for implementation specific features is from the \*(PX standard.
+.PP
+When processing arguments,
+.I gawk
+uses the special option ``\fB\-\^\-\fP'' to signal the end of
+arguments, and warns about, but otherwise ignores, undefined options.
+.PP
+The AWK book does not define the return value of
+.BR srand() .
+The System V Release 4 version of \*(UX
+.I awk
+(and the \*(PX standard)
+has it return the seed it was using, to allow keeping track
+of random number sequences. Therefore
+.B srand()
+in
+.I gawk
+also returns its current seed.
+.PP
+Other new features are:
+The use of multiple
+.B \-f
+options (from MKS
+.IR awk );
+the
+.B ENVIRON
+array; the
+.BR \ea ,
+and
+.BR \ev
+escape sequences (done originally in
+.I gawk
+and fed back into AT&T's); the
+.B tolower()
+and
+.B toupper()
+built-in functions (from AT&T); and the \*(AN C conversion specifications in
+.B printf
+(done first in AT&T's version).
+.SH GNU EXTENSIONS
+.I Gawk
+has some extensions to \*(PX
+.IR awk .
+They are described in this section. All the extensions described here
+can be disabled by
+invoking
+.I gawk
+with the
+.B "\-W compat"
+option.
+.PP
+The following features of
+.I gawk
+are not available in
+\*(PX
+.IR awk .
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+The
+.B \ex
+escape sequence.
+.TP
+\(bu
+The
+.B systime()
+and
+.B strftime()
+functions.
+.TP
+\(bu
+The special file names available for I/O redirection are not recognized.
+.TP
+\(bu
+The
+.B ARGIND
+and
+.B ERRNO
+variables are not special.
+.TP
+\(bu
+The
+.B IGNORECASE
+variable and its side-effects are not available.
+.TP
+\(bu
+The
+.B FIELDWIDTHS
+variable and fixed width field splitting.
+.TP
+\(bu
+No path search is performed for files named via the
+.B \-f
+option. Therefore the
+.B AWKPATH
+environment variable is not special.
+.TP
+\(bu
+The use of
+.B "next file"
+to abandon processing of the current input file.
+.RE
+.PP
+The AWK book does not define the return value of the
+.B close()
+function.
+.IR Gawk\^ 's
+.B close()
+returns the value from
+.IR fclose (3),
+or
+.IR pclose (3),
+when closing a file or pipe, respectively.
+.PP
+When
+.I gawk
+is invoked with the
+.B "\-W compat"
+option,
+if the
+.I fs
+argument to the
+.B \-F
+option is ``t'', then
+.B FS
+will be set to the tab character.
+Since this is a rather ugly special case, it is not the default behavior.
+This behavior also does not occur if
+.B \-Wposix
+has been specified.
+.ig
+.PP
+If
+.I gawk
+was compiled for debugging, it will
+accept the following additional options:
+.TP
+.PD 0
+.B \-Wparsedebug
+.TP
+.PD
+.B \-\^\-parsedebug
+Turn on
+.IR yacc (1)
+or
+.IR bison (1)
+debugging output during program parsing.
+This option should only be of interest to the
+.I gawk
+maintainers, and may not even be compiled into
+.IR gawk .
+..
+.SH HISTORICAL FEATURES
+There are two features of historical AWK implementations that
+.I gawk
+supports.
+First, it is possible to call the
+.B length()
+built-in function not only with no argument, but even without parentheses!
+Thus,
+.RS
+.PP
+.ft B
+a = length
+.ft R
+.RE
+.PP
+is the same as either of
+.RS
+.PP
+.ft B
+a = length()
+.br
+a = length($0)
+.ft R
+.RE
+.PP
+This feature is marked as ``deprecated'' in the \*(PX standard, and
+.I gawk
+will issue a warning about its use if
+.B \-Wlint
+is specified on the command line.
+.PP
+The other feature is the use of the
+.B continue
+statement outside the body of a
+.BR while ,
+.BR for ,
+or
+.B do
+loop. Traditional AWK implementations have treated such usage as
+equivalent to the
+.B next
+statement.
+.I Gawk
+will support this usage if
+.B \-Wposix
+has not been specified.
+.SH BUGS
+The
+.B \-F
+option is not necessary given the command line variable assignment feature;
+it remains only for backwards compatibility.
+.PP
+If your system actually has support for
+.B /dev/fd
+and the associated
+.BR /dev/stdin ,
+.BR /dev/stdout ,
+and
+.B /dev/stderr
+files, you may get different output from
+.I gawk
+than you would get on a system without those files. When
+.I gawk
+interprets these files internally, it synchronizes output to the standard
+output with output to
+.BR /dev/stdout ,
+while on a system with those files, the output is actually to different
+open files.
+Caveat Emptor.
+.SH VERSION INFORMATION
+This man page documents
+.IR gawk ,
+version 2.15.
+.PP
+Starting with the 2.15 version of
+.IR gawk ,
+the
+.BR \-c ,
+.BR \-V ,
+.BR \-C ,
+.ig
+.BR \-D ,
+..
+.BR \-a ,
+and
+.B \-e
+options of the 2.11 version are no longer recognized.
+.SH AUTHORS
+The original version of \*(UX
+.I awk
+was designed and implemented by Alfred Aho,
+Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan
+continues to maintain and enhance it.
+.PP
+Paul Rubin and Jay Fenlason,
+of the Free Software Foundation, wrote
+.IR gawk ,
+to be compatible with the original version of
+.I awk
+distributed in Seventh Edition \*(UX.
+John Woods contributed a number of bug fixes.
+David Trueman, with contributions
+from Arnold Robbins, made
+.I gawk
+compatible with the new version of \*(UX
+.IR awk .
+.PP
+The initial DOS port was done by Conrad Kwok and Scott Garfinkle.
+Scott Deifik is the current DOS maintainer. Pat Rankin did the
+port to VMS, and Michal Jaegermann did the port to the Atari ST.
+.SH ACKNOWLEDGEMENTS
+Brian Kernighan of Bell Labs
+provided valuable assistance during testing and debugging.
+We thank him.
diff --git a/gnu/usr.bin/awk/awk.h b/gnu/usr.bin/awk/awk.h
new file mode 100644
index 000000000000..ca3997f11d4b
--- /dev/null
+++ b/gnu/usr.bin/awk/awk.h
@@ -0,0 +1,763 @@
+/*
+ * awk.h -- Definitions for gawk.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* ------------------------------ Includes ------------------------------ */
+#include <stdio.h>
+#include <limits.h>
+#include <ctype.h>
+#include <setjmp.h>
+#include <varargs.h>
+#include <time.h>
+#include <errno.h>
+#if !defined(errno) && !defined(MSDOS)
+extern int errno;
+#endif
+#ifdef __GNU_LIBRARY__
+#ifndef linux
+#include <signum.h>
+#endif
+#endif
+
+/* ----------------- System dependencies (with more includes) -----------*/
+
+#if !defined(VMS) || (!defined(VAXC) && !defined(__DECC))
+#include <sys/types.h>
+#include <sys/stat.h>
+#else /* VMS w/ VAXC or DECC */
+#include <types.h>
+#include <stat.h>
+#include <file.h> /* avoid <fcntl.h> in io.c */
+#endif
+
+#include <signal.h>
+
+#include "config.h"
+
+#ifdef __STDC__
+#define P(s) s
+#define MALLOC_ARG_T size_t
+#else
+#define P(s) ()
+#define MALLOC_ARG_T unsigned
+#define volatile
+#define const
+#endif
+
+#ifndef SIGTYPE
+#define SIGTYPE void
+#endif
+
+#ifdef SIZE_T_MISSING
+typedef unsigned int size_t;
+#endif
+
+#ifndef SZTC
+#define SZTC
+#define INTC
+#endif
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#include <string.h>
+#ifdef NeXT
+#include <libc.h>
+#undef atof
+#else
+#if defined(atarist) || defined(VMS)
+#include <unixlib.h>
+#else /* atarist || VMS */
+#ifndef MSDOS
+#include <unistd.h>
+#endif /* MSDOS */
+#endif /* atarist || VMS */
+#endif /* Next */
+#else /* STDC_HEADERS */
+#include "protos.h"
+#endif /* STDC_HEADERS */
+
+#if defined(ultrix) && !defined(Ultrix41)
+extern char * getenv P((char *name));
+extern double atof P((char *s));
+#endif
+
+#ifndef __GNUC__
+#ifdef sparc
+/* nasty nasty SunOS-ism */
+#include <alloca.h>
+#ifdef lint
+extern char *alloca();
+#endif
+#else /* not sparc */
+#if !defined(alloca) && !defined(ALLOCA_PROTO)
+extern char *alloca();
+#endif
+#endif /* sparc */
+#endif /* __GNUC__ */
+
+#ifdef HAVE_UNDERSCORE_SETJMP
+/* nasty nasty berkelixm */
+#define setjmp _setjmp
+#define longjmp _longjmp
+#endif
+
+/*
+ * if you don't have vprintf, try this and cross your fingers.
+ */
+#if defined(VPRINTF_MISSING)
+#define vfprintf(fp,fmt,arg) _doprnt((fmt), (arg), (fp))
+#endif
+
+#ifdef VMS
+/* some macros to redirect to code in vms/vms_misc.c */
+#define exit vms_exit
+#define open vms_open
+#define strerror vms_strerror
+#define strdup vms_strdup
+extern void exit P((int));
+extern int open P((const char *,int,...));
+extern char *strerror P((int));
+extern char *strdup P((const char *str));
+extern int vms_devopen P((const char *,int));
+# ifndef NO_TTY_FWRITE
+#define fwrite tty_fwrite
+#define fclose tty_fclose
+extern size_t fwrite P((const void *,size_t,size_t,FILE *));
+extern int fclose P((FILE *));
+# endif
+extern FILE *popen P((const char *,const char *));
+extern int pclose P((FILE *));
+extern void vms_arg_fixup P((int *,char ***));
+/* some things not in STDC_HEADERS */
+extern int gnu_strftime P((char *,size_t,const char *,const struct tm *));
+extern int unlink P((const char *));
+extern int getopt P((int,char **,char *));
+extern int isatty P((int));
+#ifndef fileno
+extern int fileno P((FILE *));
+#endif
+extern int close(), dup(), dup2(), fstat(), read(), stat();
+#endif /*VMS*/
+
+#ifdef MSDOS
+#include <io.h>
+extern FILE *popen P((char *, char *));
+extern int pclose P((FILE *));
+#endif
+
+#define GNU_REGEX
+#ifdef GNU_REGEX
+#include "regex.h"
+#include "dfa.h"
+typedef struct Regexp {
+ struct re_pattern_buffer pat;
+ struct re_registers regs;
+ struct regexp dfareg;
+ int dfa;
+} Regexp;
+#define RESTART(rp,s) (rp)->regs.start[0]
+#define REEND(rp,s) (rp)->regs.end[0]
+#else /* GNU_REGEX */
+#endif /* GNU_REGEX */
+
+#ifdef atarist
+#define read _text_read /* we do not want all these CR's to mess our input */
+extern int _text_read (int, char *, int);
+#endif
+
+#ifndef DEFPATH
+#define DEFPATH ".:/usr/local/lib/awk:/usr/lib/awk"
+#endif
+
+#ifndef ENVSEP
+#define ENVSEP ':'
+#endif
+
+/* ------------------ Constants, Structures, Typedefs ------------------ */
+#define AWKNUM double
+
+typedef enum {
+ /* illegal entry == 0 */
+ Node_illegal,
+
+ /* binary operators lnode and rnode are the expressions to work on */
+ Node_times,
+ Node_quotient,
+ Node_mod,
+ Node_plus,
+ Node_minus,
+ Node_cond_pair, /* conditional pair (see Node_line_range) */
+ Node_subscript,
+ Node_concat,
+ Node_exp,
+
+ /* unary operators subnode is the expression to work on */
+/*10*/ Node_preincrement,
+ Node_predecrement,
+ Node_postincrement,
+ Node_postdecrement,
+ Node_unary_minus,
+ Node_field_spec,
+
+ /* assignments lnode is the var to assign to, rnode is the exp */
+ Node_assign,
+ Node_assign_times,
+ Node_assign_quotient,
+ Node_assign_mod,
+/*20*/ Node_assign_plus,
+ Node_assign_minus,
+ Node_assign_exp,
+
+ /* boolean binaries lnode and rnode are expressions */
+ Node_and,
+ Node_or,
+
+ /* binary relationals compares lnode and rnode */
+ Node_equal,
+ Node_notequal,
+ Node_less,
+ Node_greater,
+ Node_leq,
+/*30*/ Node_geq,
+ Node_match,
+ Node_nomatch,
+
+ /* unary relationals works on subnode */
+ Node_not,
+
+ /* program structures */
+ Node_rule_list, /* lnode is a rule, rnode is rest of list */
+ Node_rule_node, /* lnode is pattern, rnode is statement */
+ Node_statement_list, /* lnode is statement, rnode is more list */
+ Node_if_branches, /* lnode is to run on true, rnode on false */
+ Node_expression_list, /* lnode is an exp, rnode is more list */
+ Node_param_list, /* lnode is a variable, rnode is more list */
+
+ /* keywords */
+/*40*/ Node_K_if, /* lnode is conditonal, rnode is if_branches */
+ Node_K_while, /* lnode is condtional, rnode is stuff to run */
+ Node_K_for, /* lnode is for_struct, rnode is stuff to run */
+ Node_K_arrayfor, /* lnode is for_struct, rnode is stuff to run */
+ Node_K_break, /* no subs */
+ Node_K_continue, /* no stuff */
+ Node_K_print, /* lnode is exp_list, rnode is redirect */
+ Node_K_printf, /* lnode is exp_list, rnode is redirect */
+ Node_K_next, /* no subs */
+ Node_K_exit, /* subnode is return value, or NULL */
+/*50*/ Node_K_do, /* lnode is conditional, rnode stuff to run */
+ Node_K_return,
+ Node_K_delete,
+ Node_K_getline,
+ Node_K_function, /* lnode is statement list, rnode is params */
+
+ /* I/O redirection for print statements */
+ Node_redirect_output, /* subnode is where to redirect */
+ Node_redirect_append, /* subnode is where to redirect */
+ Node_redirect_pipe, /* subnode is where to redirect */
+ Node_redirect_pipein, /* subnode is where to redirect */
+ Node_redirect_input, /* subnode is where to redirect */
+
+ /* Variables */
+/*60*/ Node_var, /* rnode is value, lnode is array stuff */
+ Node_var_array, /* array is ptr to elements, asize num of
+ * eles */
+ Node_val, /* node is a value - type in flags */
+
+ /* Builtins subnode is explist to work on, proc is func to call */
+ Node_builtin,
+
+ /*
+ * pattern: conditional ',' conditional ; lnode of Node_line_range
+ * is the two conditionals (Node_cond_pair), other word (rnode place)
+ * is a flag indicating whether or not this range has been entered.
+ */
+ Node_line_range,
+
+ /*
+ * boolean test of membership in array lnode is string-valued
+ * expression rnode is array name
+ */
+ Node_in_array,
+
+ Node_func, /* lnode is param. list, rnode is body */
+ Node_func_call, /* lnode is name, rnode is argument list */
+
+ Node_cond_exp, /* lnode is conditonal, rnode is if_branches */
+ Node_regex,
+/*70*/ Node_hashnode,
+ Node_ahash,
+ Node_NF,
+ Node_NR,
+ Node_FNR,
+ Node_FS,
+ Node_RS,
+ Node_FIELDWIDTHS,
+ Node_IGNORECASE,
+ Node_OFS,
+ Node_ORS,
+ Node_OFMT,
+ Node_CONVFMT,
+ Node_K_nextfile
+} NODETYPE;
+
+/*
+ * NOTE - this struct is a rather kludgey -- it is packed to minimize
+ * space usage, at the expense of cleanliness. Alter at own risk.
+ */
+typedef struct exp_node {
+ union {
+ struct {
+ union {
+ struct exp_node *lptr;
+ char *param_name;
+ } l;
+ union {
+ struct exp_node *rptr;
+ struct exp_node *(*pptr) ();
+ Regexp *preg;
+ struct for_loop_header *hd;
+ struct exp_node **av;
+ int r_ent; /* range entered */
+ } r;
+ union {
+ char *name;
+ struct exp_node *extra;
+ } x;
+ short number;
+ unsigned char reflags;
+# define CASE 1
+# define CONST 2
+# define FS_DFLT 4
+ } nodep;
+ struct {
+ AWKNUM fltnum; /* this is here for optimal packing of
+ * the structure on many machines
+ */
+ char *sp;
+ size_t slen;
+ unsigned char sref;
+ char idx;
+ } val;
+ struct {
+ struct exp_node *next;
+ char *name;
+ int length;
+ struct exp_node *value;
+ } hash;
+#define hnext sub.hash.next
+#define hname sub.hash.name
+#define hlength sub.hash.length
+#define hvalue sub.hash.value
+ struct {
+ struct exp_node *next;
+ struct exp_node *name;
+ struct exp_node *value;
+ } ahash;
+#define ahnext sub.ahash.next
+#define ahname sub.ahash.name
+#define ahvalue sub.ahash.value
+ } sub;
+ NODETYPE type;
+ unsigned short flags;
+# define MALLOC 1 /* can be free'd */
+# define TEMP 2 /* should be free'd */
+# define PERM 4 /* can't be free'd */
+# define STRING 8 /* assigned as string */
+# define STR 16 /* string value is current */
+# define NUM 32 /* numeric value is current */
+# define NUMBER 64 /* assigned as number */
+# define MAYBE_NUM 128 /* user input: if NUMERIC then
+ * a NUMBER
+ */
+ char *vname; /* variable's name */
+} NODE;
+
+#define lnode sub.nodep.l.lptr
+#define nextp sub.nodep.l.lptr
+#define rnode sub.nodep.r.rptr
+#define source_file sub.nodep.x.name
+#define source_line sub.nodep.number
+#define param_cnt sub.nodep.number
+#define param sub.nodep.l.param_name
+
+#define subnode lnode
+#define proc sub.nodep.r.pptr
+
+#define re_reg sub.nodep.r.preg
+#define re_flags sub.nodep.reflags
+#define re_text lnode
+#define re_exp sub.nodep.x.extra
+#define re_cnt sub.nodep.number
+
+#define forsub lnode
+#define forloop rnode->sub.nodep.r.hd
+
+#define stptr sub.val.sp
+#define stlen sub.val.slen
+#define stref sub.val.sref
+#define stfmt sub.val.idx
+
+#define numbr sub.val.fltnum
+
+#define var_value lnode
+#define var_array sub.nodep.r.av
+
+#define condpair lnode
+#define triggered sub.nodep.r.r_ent
+
+#ifdef DONTDEF
+int primes[] = {31, 61, 127, 257, 509, 1021, 2053, 4099, 8191, 16381};
+#endif
+/* a quick profile suggests that the following is a good value */
+#define HASHSIZE 127
+
+typedef struct for_loop_header {
+ NODE *init;
+ NODE *cond;
+ NODE *incr;
+} FOR_LOOP_HEADER;
+
+/* for "for(iggy in foo) {" */
+struct search {
+ NODE **arr_ptr;
+ NODE **arr_end;
+ NODE *bucket;
+ NODE *retval;
+};
+
+/* for faster input, bypass stdio */
+typedef struct iobuf {
+ int fd;
+ char *buf;
+ char *off;
+ char *end;
+ size_t size; /* this will be determined by an fstat() call */
+ int cnt;
+ long secsiz;
+ int flag;
+# define IOP_IS_TTY 1
+# define IOP_IS_INTERNAL 2
+# define IOP_NO_FREE 4
+} IOBUF;
+
+typedef void (*Func_ptr)();
+
+/*
+ * structure used to dynamically maintain a linked-list of open files/pipes
+ */
+struct redirect {
+ unsigned int flag;
+# define RED_FILE 1
+# define RED_PIPE 2
+# define RED_READ 4
+# define RED_WRITE 8
+# define RED_APPEND 16
+# define RED_NOBUF 32
+# define RED_USED 64
+# define RED_EOF 128
+ char *value;
+ FILE *fp;
+ IOBUF *iop;
+ int pid;
+ int status;
+ struct redirect *prev;
+ struct redirect *next;
+};
+
+/* structure for our source, either a command line string or a source file */
+struct src {
+ enum srctype { CMDLINE = 1, SOURCEFILE } stype;
+ char *val;
+};
+
+/* longjmp return codes, must be nonzero */
+/* Continue means either for loop/while continue, or next input record */
+#define TAG_CONTINUE 1
+/* Break means either for/while break, or stop reading input */
+#define TAG_BREAK 2
+/* Return means return from a function call; leave value in ret_node */
+#define TAG_RETURN 3
+
+#define HUGE INT_MAX
+
+/* -------------------------- External variables -------------------------- */
+/* gawk builtin variables */
+extern int NF;
+extern int NR;
+extern int FNR;
+extern int IGNORECASE;
+extern char *RS;
+extern char *OFS;
+extern int OFSlen;
+extern char *ORS;
+extern int ORSlen;
+extern char *OFMT;
+extern char *CONVFMT;
+extern int CONVFMTidx;
+extern int OFMTidx;
+extern NODE *FS_node, *NF_node, *RS_node, *NR_node;
+extern NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node;
+extern NODE *CONVFMT_node;
+extern NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node;
+extern NODE *IGNORECASE_node;
+extern NODE *FIELDWIDTHS_node;
+
+extern NODE **stack_ptr;
+extern NODE *Nnull_string;
+extern NODE **fields_arr;
+extern int sourceline;
+extern char *source;
+extern NODE *expression_value;
+
+extern NODE *_t; /* used as temporary in tree_eval */
+
+extern const char *myname;
+
+extern NODE *nextfree;
+extern int field0_valid;
+extern int do_unix;
+extern int do_posix;
+extern int do_lint;
+extern int in_begin_rule;
+extern int in_end_rule;
+
+/* ------------------------- Pseudo-functions ------------------------- */
+
+#define is_identchar(c) (isalnum(c) || (c) == '_')
+
+
+#ifndef MPROF
+#define getnode(n) if (nextfree) n = nextfree, nextfree = nextfree->nextp;\
+ else n = more_nodes()
+#define freenode(n) ((n)->nextp = nextfree, nextfree = (n))
+#else
+#define getnode(n) emalloc(n, NODE *, sizeof(NODE), "getnode")
+#define freenode(n) free(n)
+#endif
+
+#ifdef DEBUG
+#define tree_eval(t) r_tree_eval(t)
+#else
+#define tree_eval(t) (_t = (t),(_t) == NULL ? Nnull_string : \
+ ((_t)->type == Node_val ? (_t) : \
+ ((_t)->type == Node_var ? (_t)->var_value : \
+ ((_t)->type == Node_param_list ? \
+ (stack_ptr[(_t)->param_cnt])->var_value : \
+ r_tree_eval((_t))))))
+#endif
+
+#define make_number(x) mk_number((x), (MALLOC|NUM|NUMBER))
+#define tmp_number(x) mk_number((x), (MALLOC|TEMP|NUM|NUMBER))
+
+#define free_temp(n) do {if ((n)->flags&TEMP) { unref(n); }} while (0)
+#define make_string(s,l) make_str_node((s), SZTC (l),0)
+#define SCAN 1
+#define ALREADY_MALLOCED 2
+
+#define cant_happen() fatal("internal error line %d, file: %s", \
+ __LINE__, __FILE__);
+
+#if defined(__STDC__) && !defined(NO_TOKEN_PASTING)
+#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), #var, strerror(errno)),0))
+#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\
+ (MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), #var, strerror(errno)),0))
+#else /* __STDC__ */
+#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), "var", strerror(errno)),0))
+#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\
+ (MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), "var", strerror(errno)),0))
+#endif /* __STDC__ */
+
+#ifdef DEBUG
+#define force_number r_force_number
+#define force_string r_force_string
+#else /* not DEBUG */
+#ifdef lint
+extern AWKNUM force_number();
+#endif
+#ifdef MSDOS
+extern double _msc51bug;
+#define force_number(n) (_msc51bug=(_t = (n),(_t->flags & NUM) ? _t->numbr : r_force_number(_t)))
+#else /* not MSDOS */
+#define force_number(n) (_t = (n),(_t->flags & NUM) ? _t->numbr : r_force_number(_t))
+#endif /* MSDOS */
+#define force_string(s) (_t = (s),(_t->flags & STR) ? _t : r_force_string(_t))
+#endif /* not DEBUG */
+
+#define STREQ(a,b) (*(a) == *(b) && strcmp((a), (b)) == 0)
+#define STREQN(a,b,n) ((n)&& *(a)== *(b) && strncmp((a), (b), SZTC (n)) == 0)
+
+/* ------------- Function prototypes or defs (as appropriate) ------------- */
+
+/* array.c */
+extern NODE *concat_exp P((NODE *tree));
+extern void assoc_clear P((NODE *symbol));
+extern unsigned int hash P((char *s, int len));
+extern int in_array P((NODE *symbol, NODE *subs));
+extern NODE **assoc_lookup P((NODE *symbol, NODE *subs));
+extern void do_delete P((NODE *symbol, NODE *tree));
+extern void assoc_scan P((NODE *symbol, struct search *lookat));
+extern void assoc_next P((struct search *lookat));
+/* awk.tab.c */
+extern char *tokexpand P((void));
+extern char nextc P((void));
+extern NODE *node P((NODE *left, NODETYPE op, NODE *right));
+extern NODE *install P((char *name, NODE *value));
+extern NODE *lookup P((char *name));
+extern NODE *variable P((char *name, int can_free));
+extern int yyparse P((void));
+/* builtin.c */
+extern NODE *do_exp P((NODE *tree));
+extern NODE *do_index P((NODE *tree));
+extern NODE *do_int P((NODE *tree));
+extern NODE *do_length P((NODE *tree));
+extern NODE *do_log P((NODE *tree));
+extern NODE *do_sprintf P((NODE *tree));
+extern void do_printf P((NODE *tree));
+extern void print_simple P((NODE *tree, FILE *fp));
+extern NODE *do_sqrt P((NODE *tree));
+extern NODE *do_substr P((NODE *tree));
+extern NODE *do_strftime P((NODE *tree));
+extern NODE *do_systime P((NODE *tree));
+extern NODE *do_system P((NODE *tree));
+extern void do_print P((NODE *tree));
+extern NODE *do_tolower P((NODE *tree));
+extern NODE *do_toupper P((NODE *tree));
+extern NODE *do_atan2 P((NODE *tree));
+extern NODE *do_sin P((NODE *tree));
+extern NODE *do_cos P((NODE *tree));
+extern NODE *do_rand P((NODE *tree));
+extern NODE *do_srand P((NODE *tree));
+extern NODE *do_match P((NODE *tree));
+extern NODE *do_gsub P((NODE *tree));
+extern NODE *do_sub P((NODE *tree));
+/* eval.c */
+extern int interpret P((NODE *volatile tree));
+extern NODE *r_tree_eval P((NODE *tree));
+extern int cmp_nodes P((NODE *t1, NODE *t2));
+extern NODE **get_lhs P((NODE *ptr, Func_ptr *assign));
+extern void set_IGNORECASE P((void));
+void set_OFS P((void));
+void set_ORS P((void));
+void set_OFMT P((void));
+void set_CONVFMT P((void));
+/* field.c */
+extern void init_fields P((void));
+extern void set_record P((char *buf, int cnt, int freeold));
+extern void reset_record P((void));
+extern void set_NF P((void));
+extern NODE **get_field P((int num, Func_ptr *assign));
+extern NODE *do_split P((NODE *tree));
+extern void set_FS P((void));
+extern void set_RS P((void));
+extern void set_FIELDWIDTHS P((void));
+/* io.c */
+extern void set_FNR P((void));
+extern void set_NR P((void));
+extern void do_input P((void));
+extern struct redirect *redirect P((NODE *tree, int *errflg));
+extern NODE *do_close P((NODE *tree));
+extern int flush_io P((void));
+extern int close_io P((void));
+extern int devopen P((char *name, char *mode));
+extern int pathopen P((char *file));
+extern NODE *do_getline P((NODE *tree));
+extern void do_nextfile P((void));
+/* iop.c */
+extern int optimal_bufsize P((int fd));
+extern IOBUF *iop_alloc P((int fd));
+extern int get_a_record P((char **out, IOBUF *iop, int rs, int *errcode));
+/* main.c */
+extern int main P((int argc, char **argv));
+extern Regexp *mk_re_parse P((char *s, int ignorecase));
+extern void load_environ P((void));
+extern char *arg_assign P((char *arg));
+extern SIGTYPE catchsig P((int sig, int code));
+/* msg.c */
+#ifdef MSDOS
+extern void err P((char *s, char *emsg, char *va_list, ...));
+extern void msg P((char *va_alist, ...));
+extern void warning P((char *va_alist, ...));
+extern void fatal P((char *va_alist, ...));
+#else
+extern void err ();
+extern void msg ();
+extern void warning ();
+extern void fatal ();
+#endif
+/* node.c */
+extern AWKNUM r_force_number P((NODE *n));
+extern NODE *r_force_string P((NODE *s));
+extern NODE *dupnode P((NODE *n));
+extern NODE *mk_number P((AWKNUM x, unsigned int flags));
+extern NODE *make_str_node P((char *s, size_t len, int scan ));
+extern NODE *tmp_string P((char *s, size_t len ));
+extern NODE *more_nodes P((void));
+#ifdef DEBUG
+extern void freenode P((NODE *it));
+#endif
+extern void unref P((NODE *tmp));
+extern int parse_escape P((char **string_ptr));
+/* re.c */
+extern Regexp *make_regexp P((char *s, int len, int ignorecase, int dfa));
+extern int research P((Regexp *rp, char *str, int start, int len, int need_start));
+extern void refree P((Regexp *rp));
+extern void reg_error P((const char *s));
+extern Regexp *re_update P((NODE *t));
+extern void resyntax P((int syntax));
+extern void resetup P((void));
+
+/* strcase.c */
+extern int strcasecmp P((const char *s1, const char *s2));
+extern int strncasecmp P((const char *s1, const char *s2, register size_t n));
+
+#ifdef atarist
+/* atari/tmpnam.c */
+extern char *tmpnam P((char *buf));
+extern char *tempnam P((const char *path, const char *base));
+#endif
+
+/* Figure out what '\a' really is. */
+#ifdef __STDC__
+#define BELL '\a' /* sure makes life easy, don't it? */
+#else
+# if 'z' - 'a' == 25 /* ascii */
+# if 'a' != 97 /* machine is dumb enough to use mark parity */
+# define BELL '\207'
+# else
+# define BELL '\07'
+# endif
+# else
+# define BELL '\057'
+# endif
+#endif
+
+extern char casetable[]; /* for case-independent regexp matching */
diff --git a/gnu/usr.bin/awk/awk.y b/gnu/usr.bin/awk/awk.y
new file mode 100644
index 000000000000..6e87f1c449cc
--- /dev/null
+++ b/gnu/usr.bin/awk/awk.y
@@ -0,0 +1,1804 @@
+/*
+ * awk.y --- yacc/bison parser
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+%{
+#ifdef DEBUG
+#define YYDEBUG 12
+#endif
+
+#include "awk.h"
+
+static void yyerror (); /* va_alist */
+static char *get_src_buf P((void));
+static int yylex P((void));
+static NODE *node_common P((NODETYPE op));
+static NODE *snode P((NODE *subn, NODETYPE op, int sindex));
+static NODE *mkrangenode P((NODE *cpair));
+static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
+static NODE *append_right P((NODE *list, NODE *new));
+static void func_install P((NODE *params, NODE *def));
+static void pop_var P((NODE *np, int freeit));
+static void pop_params P((NODE *params));
+static NODE *make_param P((char *name));
+static NODE *mk_rexp P((NODE *exp));
+
+static int want_assign; /* lexical scanning kludge */
+static int want_regexp; /* lexical scanning kludge */
+static int can_return; /* lexical scanning kludge */
+static int io_allowed = 1; /* lexical scanning kludge */
+static char *lexptr; /* pointer to next char during parsing */
+static char *lexend;
+static char *lexptr_begin; /* keep track of where we were for error msgs */
+static char *lexeme; /* beginning of lexeme for debugging */
+static char *thisline = NULL;
+#define YYDEBUG_LEXER_TEXT (lexeme)
+static int param_counter;
+static char *tokstart = NULL;
+static char *token = NULL;
+static char *tokend;
+
+NODE *variables[HASHSIZE];
+
+extern char *source;
+extern int sourceline;
+extern struct src *srcfiles;
+extern int numfiles;
+extern int errcount;
+extern NODE *begin_block;
+extern NODE *end_block;
+%}
+
+%union {
+ long lval;
+ AWKNUM fval;
+ NODE *nodeval;
+ NODETYPE nodetypeval;
+ char *sval;
+ NODE *(*ptrval)();
+}
+
+%type <nodeval> function_prologue function_body
+%type <nodeval> rexp exp start program rule simp_exp
+%type <nodeval> non_post_simp_exp
+%type <nodeval> pattern
+%type <nodeval> action variable param_list
+%type <nodeval> rexpression_list opt_rexpression_list
+%type <nodeval> expression_list opt_expression_list
+%type <nodeval> statements statement if_statement opt_param_list
+%type <nodeval> opt_exp opt_variable regexp
+%type <nodeval> input_redir output_redir
+%type <nodetypeval> print
+%type <sval> func_name
+%type <lval> lex_builtin
+
+%token <sval> FUNC_CALL NAME REGEXP
+%token <lval> ERROR
+%token <nodeval> YNUMBER YSTRING
+%token <nodetypeval> RELOP APPEND_OP
+%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP
+%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
+%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
+%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
+%token <nodetypeval> LEX_GETLINE
+%token <nodetypeval> LEX_IN
+%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
+%token <lval> LEX_BUILTIN LEX_LENGTH
+
+/* these are just yylval numbers */
+
+/* Lowest to highest */
+%right ASSIGNOP
+%right '?' ':'
+%left LEX_OR
+%left LEX_AND
+%left LEX_GETLINE
+%nonassoc LEX_IN
+%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
+%nonassoc MATCHOP
+%nonassoc RELOP '<' '>' '|' APPEND_OP
+%left CONCAT_OP
+%left YSTRING YNUMBER
+%left '+' '-'
+%left '*' '/' '%'
+%right '!' UNARY
+%right '^'
+%left INCREMENT DECREMENT
+%left '$'
+%left '(' ')'
+%%
+
+start
+ : opt_nls program opt_nls
+ { expression_value = $2; }
+ ;
+
+program
+ : rule
+ {
+ if ($1 != NULL)
+ $$ = $1;
+ else
+ $$ = NULL;
+ yyerrok;
+ }
+ | program rule
+ /* add the rule to the tail of list */
+ {
+ if ($2 == NULL)
+ $$ = $1;
+ else if ($1 == NULL)
+ $$ = $2;
+ else {
+ if ($1->type != Node_rule_list)
+ $1 = node($1, Node_rule_list,
+ (NODE*)NULL);
+ $$ = append_right ($1,
+ node($2, Node_rule_list,(NODE *) NULL));
+ }
+ yyerrok;
+ }
+ | error { $$ = NULL; }
+ | program error { $$ = NULL; }
+ ;
+
+rule
+ : LEX_BEGIN { io_allowed = 0; }
+ action
+ {
+ if (begin_block) {
+ if (begin_block->type != Node_rule_list)
+ begin_block = node(begin_block, Node_rule_list,
+ (NODE *)NULL);
+ (void) append_right (begin_block, node(
+ node((NODE *)NULL, Node_rule_node, $3),
+ Node_rule_list, (NODE *)NULL) );
+ } else
+ begin_block = node((NODE *)NULL, Node_rule_node, $3);
+ $$ = NULL;
+ io_allowed = 1;
+ yyerrok;
+ }
+ | LEX_END { io_allowed = 0; }
+ action
+ {
+ if (end_block) {
+ if (end_block->type != Node_rule_list)
+ end_block = node(end_block, Node_rule_list,
+ (NODE *)NULL);
+ (void) append_right (end_block, node(
+ node((NODE *)NULL, Node_rule_node, $3),
+ Node_rule_list, (NODE *)NULL));
+ } else
+ end_block = node((NODE *)NULL, Node_rule_node, $3);
+ $$ = NULL;
+ io_allowed = 1;
+ yyerrok;
+ }
+ | LEX_BEGIN statement_term
+ {
+ warning("BEGIN blocks must have an action part");
+ errcount++;
+ yyerrok;
+ }
+ | LEX_END statement_term
+ {
+ warning("END blocks must have an action part");
+ errcount++;
+ yyerrok;
+ }
+ | pattern action
+ { $$ = node ($1, Node_rule_node, $2); yyerrok; }
+ | action
+ { $$ = node ((NODE *)NULL, Node_rule_node, $1); yyerrok; }
+ | pattern statement_term
+ {
+ $$ = node ($1,
+ Node_rule_node,
+ node(node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL),
+ Node_K_print,
+ (NODE *) NULL));
+ yyerrok;
+ }
+ | function_prologue function_body
+ {
+ func_install($1, $2);
+ $$ = NULL;
+ yyerrok;
+ }
+ ;
+
+func_name
+ : NAME
+ { $$ = $1; }
+ | FUNC_CALL
+ { $$ = $1; }
+ | lex_builtin
+ {
+ yyerror("%s() is a built-in function, it cannot be redefined",
+ tokstart);
+ errcount++;
+ /* yyerrok; */
+ }
+ ;
+
+lex_builtin
+ : LEX_BUILTIN
+ | LEX_LENGTH
+ ;
+
+function_prologue
+ : LEX_FUNCTION
+ {
+ param_counter = 0;
+ }
+ func_name '(' opt_param_list r_paren opt_nls
+ {
+ $$ = append_right(make_param($3), $5);
+ can_return = 1;
+ }
+ ;
+
+function_body
+ : l_brace statements r_brace opt_semi
+ {
+ $$ = $2;
+ can_return = 0;
+ }
+ ;
+
+
+pattern
+ : exp
+ { $$ = $1; }
+ | exp comma exp
+ { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); }
+ ;
+
+regexp
+ /*
+ * In this rule, want_regexp tells yylex that the next thing
+ * is a regexp so it should read up to the closing slash.
+ */
+ : '/'
+ { ++want_regexp; }
+ REGEXP '/'
+ {
+ NODE *n;
+ int len;
+
+ getnode(n);
+ n->type = Node_regex;
+ len = strlen($3);
+ n->re_exp = make_string($3, len);
+ n->re_reg = make_regexp($3, len, 0, 1);
+ n->re_text = NULL;
+ n->re_flags = CONST;
+ n->re_cnt = 1;
+ $$ = n;
+ }
+ ;
+
+action
+ : l_brace statements r_brace opt_semi opt_nls
+ { $$ = $2 ; }
+ | l_brace r_brace opt_semi opt_nls
+ { $$ = NULL; }
+ ;
+
+statements
+ : statement
+ { $$ = $1; }
+ | statements statement
+ {
+ if ($1 == NULL || $1->type != Node_statement_list)
+ $1 = node($1, Node_statement_list,(NODE *)NULL);
+ $$ = append_right($1,
+ node( $2, Node_statement_list, (NODE *)NULL));
+ yyerrok;
+ }
+ | error
+ { $$ = NULL; }
+ | statements error
+ { $$ = NULL; }
+ ;
+
+statement_term
+ : nls
+ | semi opt_nls
+ ;
+
+statement
+ : semi opt_nls
+ { $$ = NULL; }
+ | l_brace r_brace
+ { $$ = NULL; }
+ | l_brace statements r_brace
+ { $$ = $2; }
+ | if_statement
+ { $$ = $1; }
+ | LEX_WHILE '(' exp r_paren opt_nls statement
+ { $$ = node ($3, Node_K_while, $6); }
+ | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
+ { $$ = node ($6, Node_K_do, $3); }
+ | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
+ {
+ $$ = node ($8, Node_K_arrayfor, make_for_loop(variable($3,1),
+ (NODE *)NULL, variable($5,1)));
+ }
+ | LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement
+ {
+ $$ = node($10, Node_K_for, (NODE *)make_for_loop($3, $5, $7));
+ }
+ | LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement
+ {
+ $$ = node ($9, Node_K_for,
+ (NODE *)make_for_loop($3, (NODE *)NULL, $6));
+ }
+ | LEX_BREAK statement_term
+ /* for break, maybe we'll have to remember where to break to */
+ { $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); }
+ | LEX_CONTINUE statement_term
+ /* similarly */
+ { $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); }
+ | print '(' expression_list r_paren output_redir statement_term
+ { $$ = node ($3, $1, $5); }
+ | print opt_rexpression_list output_redir statement_term
+ {
+ if ($1 == Node_K_print && $2 == NULL)
+ $2 = node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL);
+
+ $$ = node ($2, $1, $3);
+ }
+ | LEX_NEXT opt_exp statement_term
+ { NODETYPE type;
+
+ if ($2 && $2 == lookup("file")) {
+ if (do_lint)
+ warning("`next file' is a gawk extension");
+ else if (do_unix || do_posix)
+ yyerror("`next file' is a gawk extension");
+ else if (! io_allowed)
+ yyerror("`next file' used in BEGIN or END action");
+ type = Node_K_nextfile;
+ } else {
+ if (! io_allowed)
+ yyerror("next used in BEGIN or END action");
+ type = Node_K_next;
+ }
+ $$ = node ((NODE *)NULL, type, (NODE *)NULL);
+ }
+ | LEX_EXIT opt_exp statement_term
+ { $$ = node ($2, Node_K_exit, (NODE *)NULL); }
+ | LEX_RETURN
+ { if (! can_return) yyerror("return used outside function context"); }
+ opt_exp statement_term
+ { $$ = node ($3, Node_K_return, (NODE *)NULL); }
+ | LEX_DELETE NAME '[' expression_list ']' statement_term
+ { $$ = node (variable($2,1), Node_K_delete, $4); }
+ | exp statement_term
+ { $$ = $1; }
+ ;
+
+print
+ : LEX_PRINT
+ { $$ = $1; }
+ | LEX_PRINTF
+ { $$ = $1; }
+ ;
+
+if_statement
+ : LEX_IF '(' exp r_paren opt_nls statement
+ {
+ $$ = node($3, Node_K_if,
+ node($6, Node_if_branches, (NODE *)NULL));
+ }
+ | LEX_IF '(' exp r_paren opt_nls statement
+ LEX_ELSE opt_nls statement
+ { $$ = node ($3, Node_K_if,
+ node ($6, Node_if_branches, $9)); }
+ ;
+
+nls
+ : NEWLINE
+ { want_assign = 0; }
+ | nls NEWLINE
+ ;
+
+opt_nls
+ : /* empty */
+ | nls
+ ;
+
+input_redir
+ : /* empty */
+ { $$ = NULL; }
+ | '<' simp_exp
+ { $$ = node ($2, Node_redirect_input, (NODE *)NULL); }
+ ;
+
+output_redir
+ : /* empty */
+ { $$ = NULL; }
+ | '>' exp
+ { $$ = node ($2, Node_redirect_output, (NODE *)NULL); }
+ | APPEND_OP exp
+ { $$ = node ($2, Node_redirect_append, (NODE *)NULL); }
+ | '|' exp
+ { $$ = node ($2, Node_redirect_pipe, (NODE *)NULL); }
+ ;
+
+opt_param_list
+ : /* empty */
+ { $$ = NULL; }
+ | param_list
+ { $$ = $1; }
+ ;
+
+param_list
+ : NAME
+ { $$ = make_param($1); }
+ | param_list comma NAME
+ { $$ = append_right($1, make_param($3)); yyerrok; }
+ | error
+ { $$ = NULL; }
+ | param_list error
+ { $$ = NULL; }
+ | param_list comma error
+ { $$ = NULL; }
+ ;
+
+/* optional expression, as in for loop */
+opt_exp
+ : /* empty */
+ { $$ = NULL; }
+ | exp
+ { $$ = $1; }
+ ;
+
+opt_rexpression_list
+ : /* empty */
+ { $$ = NULL; }
+ | rexpression_list
+ { $$ = $1; }
+ ;
+
+rexpression_list
+ : rexp
+ { $$ = node ($1, Node_expression_list, (NODE *)NULL); }
+ | rexpression_list comma rexp
+ {
+ $$ = append_right($1,
+ node( $3, Node_expression_list, (NODE *)NULL));
+ yyerrok;
+ }
+ | error
+ { $$ = NULL; }
+ | rexpression_list error
+ { $$ = NULL; }
+ | rexpression_list error rexp
+ { $$ = NULL; }
+ | rexpression_list comma error
+ { $$ = NULL; }
+ ;
+
+opt_expression_list
+ : /* empty */
+ { $$ = NULL; }
+ | expression_list
+ { $$ = $1; }
+ ;
+
+expression_list
+ : exp
+ { $$ = node ($1, Node_expression_list, (NODE *)NULL); }
+ | expression_list comma exp
+ {
+ $$ = append_right($1,
+ node( $3, Node_expression_list, (NODE *)NULL));
+ yyerrok;
+ }
+ | error
+ { $$ = NULL; }
+ | expression_list error
+ { $$ = NULL; }
+ | expression_list error exp
+ { $$ = NULL; }
+ | expression_list comma error
+ { $$ = NULL; }
+ ;
+
+/* Expressions, not including the comma operator. */
+exp : variable ASSIGNOP
+ { want_assign = 0; }
+ exp
+ {
+ if (do_lint && $4->type == Node_regex)
+ warning("Regular expression on left of assignment.");
+ $$ = node ($1, $2, $4);
+ }
+ | '(' expression_list r_paren LEX_IN NAME
+ { $$ = node (variable($5,1), Node_in_array, $2); }
+ | exp '|' LEX_GETLINE opt_variable
+ {
+ $$ = node ($4, Node_K_getline,
+ node ($1, Node_redirect_pipein, (NODE *)NULL));
+ }
+ | LEX_GETLINE opt_variable input_redir
+ {
+ if (do_lint && ! io_allowed && $3 == NULL)
+ warning("non-redirected getline undefined inside BEGIN or END action");
+ $$ = node ($2, Node_K_getline, $3);
+ }
+ | exp LEX_AND exp
+ { $$ = node ($1, Node_and, $3); }
+ | exp LEX_OR exp
+ { $$ = node ($1, Node_or, $3); }
+ | exp MATCHOP exp
+ {
+ if ($1->type == Node_regex)
+ warning("Regular expression on left of MATCH operator.");
+ $$ = node ($1, $2, mk_rexp($3));
+ }
+ | regexp
+ { $$ = $1; }
+ | '!' regexp %prec UNARY
+ {
+ $$ = node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_nomatch,
+ $2);
+ }
+ | exp LEX_IN NAME
+ { $$ = node (variable($3,1), Node_in_array, $1); }
+ | exp RELOP exp
+ {
+ if (do_lint && $3->type == Node_regex)
+ warning("Regular expression on left of comparison.");
+ $$ = node ($1, $2, $3);
+ }
+ | exp '<' exp
+ { $$ = node ($1, Node_less, $3); }
+ | exp '>' exp
+ { $$ = node ($1, Node_greater, $3); }
+ | exp '?' exp ':' exp
+ { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
+ | simp_exp
+ { $$ = $1; }
+ | exp simp_exp %prec CONCAT_OP
+ { $$ = node ($1, Node_concat, $2); }
+ ;
+
+rexp
+ : variable ASSIGNOP
+ { want_assign = 0; }
+ rexp
+ { $$ = node ($1, $2, $4); }
+ | rexp LEX_AND rexp
+ { $$ = node ($1, Node_and, $3); }
+ | rexp LEX_OR rexp
+ { $$ = node ($1, Node_or, $3); }
+ | LEX_GETLINE opt_variable input_redir
+ {
+ if (do_lint && ! io_allowed && $3 == NULL)
+ warning("non-redirected getline undefined inside BEGIN or END action");
+ $$ = node ($2, Node_K_getline, $3);
+ }
+ | regexp
+ { $$ = $1; }
+ | '!' regexp %prec UNARY
+ { $$ = node((NODE *) NULL, Node_nomatch, $2); }
+ | rexp MATCHOP rexp
+ { $$ = node ($1, $2, mk_rexp($3)); }
+ | rexp LEX_IN NAME
+ { $$ = node (variable($3,1), Node_in_array, $1); }
+ | rexp RELOP rexp
+ { $$ = node ($1, $2, $3); }
+ | rexp '?' rexp ':' rexp
+ { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
+ | simp_exp
+ { $$ = $1; }
+ | rexp simp_exp %prec CONCAT_OP
+ { $$ = node ($1, Node_concat, $2); }
+ ;
+
+simp_exp
+ : non_post_simp_exp
+ /* Binary operators in order of decreasing precedence. */
+ | simp_exp '^' simp_exp
+ { $$ = node ($1, Node_exp, $3); }
+ | simp_exp '*' simp_exp
+ { $$ = node ($1, Node_times, $3); }
+ | simp_exp '/' simp_exp
+ { $$ = node ($1, Node_quotient, $3); }
+ | simp_exp '%' simp_exp
+ { $$ = node ($1, Node_mod, $3); }
+ | simp_exp '+' simp_exp
+ { $$ = node ($1, Node_plus, $3); }
+ | simp_exp '-' simp_exp
+ { $$ = node ($1, Node_minus, $3); }
+ | variable INCREMENT
+ { $$ = node ($1, Node_postincrement, (NODE *)NULL); }
+ | variable DECREMENT
+ { $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
+ ;
+
+non_post_simp_exp
+ : '!' simp_exp %prec UNARY
+ { $$ = node ($2, Node_not,(NODE *) NULL); }
+ | '(' exp r_paren
+ { $$ = $2; }
+ | LEX_BUILTIN
+ '(' opt_expression_list r_paren
+ { $$ = snode ($3, Node_builtin, (int) $1); }
+ | LEX_LENGTH '(' opt_expression_list r_paren
+ { $$ = snode ($3, Node_builtin, (int) $1); }
+ | LEX_LENGTH
+ {
+ if (do_lint)
+ warning("call of `length' without parentheses is not portable");
+ $$ = snode ((NODE *)NULL, Node_builtin, (int) $1);
+ if (do_posix)
+ warning( "call of `length' without parentheses is deprecated by POSIX");
+ }
+ | FUNC_CALL '(' opt_expression_list r_paren
+ {
+ $$ = node ($3, Node_func_call, make_string($1, strlen($1)));
+ }
+ | variable
+ | INCREMENT variable
+ { $$ = node ($2, Node_preincrement, (NODE *)NULL); }
+ | DECREMENT variable
+ { $$ = node ($2, Node_predecrement, (NODE *)NULL); }
+ | YNUMBER
+ { $$ = $1; }
+ | YSTRING
+ { $$ = $1; }
+
+ | '-' simp_exp %prec UNARY
+ { if ($2->type == Node_val) {
+ $2->numbr = -(force_number($2));
+ $$ = $2;
+ } else
+ $$ = node ($2, Node_unary_minus, (NODE *)NULL);
+ }
+ | '+' simp_exp %prec UNARY
+ { $$ = $2; }
+ ;
+
+opt_variable
+ : /* empty */
+ { $$ = NULL; }
+ | variable
+ { $$ = $1; }
+ ;
+
+variable
+ : NAME
+ { $$ = variable($1,1); }
+ | NAME '[' expression_list ']'
+ {
+ if ($3->rnode == NULL) {
+ $$ = node (variable($1,1), Node_subscript, $3->lnode);
+ freenode($3);
+ } else
+ $$ = node (variable($1,1), Node_subscript, $3);
+ }
+ | '$' non_post_simp_exp
+ { $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+ ;
+
+l_brace
+ : '{' opt_nls
+ ;
+
+r_brace
+ : '}' opt_nls { yyerrok; }
+ ;
+
+r_paren
+ : ')' { yyerrok; }
+ ;
+
+opt_semi
+ : /* empty */
+ | semi
+ ;
+
+semi
+ : ';' { yyerrok; want_assign = 0; }
+ ;
+
+comma : ',' opt_nls { yyerrok; }
+ ;
+
+%%
+
+struct token {
+ char *operator; /* text to match */
+ NODETYPE value; /* node type */
+ int class; /* lexical class */
+ unsigned flags; /* # of args. allowed and compatability */
+# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
+# define A(n) (1<<(n))
+# define VERSION 0xFF00 /* old awk is zero */
+# define NOT_OLD 0x0100 /* feature not in old awk */
+# define NOT_POSIX 0x0200 /* feature not in POSIX */
+# define GAWKX 0x0400 /* gawk extension */
+ NODE *(*ptr) (); /* function that implements this keyword */
+};
+
+extern NODE
+ *do_exp(), *do_getline(), *do_index(), *do_length(),
+ *do_sqrt(), *do_log(), *do_sprintf(), *do_substr(),
+ *do_split(), *do_system(), *do_int(), *do_close(),
+ *do_atan2(), *do_sin(), *do_cos(), *do_rand(),
+ *do_srand(), *do_match(), *do_tolower(), *do_toupper(),
+ *do_sub(), *do_gsub(), *do_strftime(), *do_systime();
+
+/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
+
+static struct token tokentab[] = {
+{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
+{"END", Node_illegal, LEX_END, 0, 0},
+{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
+{"break", Node_K_break, LEX_BREAK, 0, 0},
+{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close},
+{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
+{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
+{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
+{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
+{"else", Node_illegal, LEX_ELSE, 0, 0},
+{"exit", Node_K_exit, LEX_EXIT, 0, 0},
+{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
+{"for", Node_K_for, LEX_FOR, 0, 0},
+{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
+{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
+{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
+{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"if", Node_K_if, LEX_IF, 0, 0},
+{"in", Node_illegal, LEX_IN, 0, 0},
+{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
+{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
+{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length},
+{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
+{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match},
+{"next", Node_K_next, LEX_NEXT, 0, 0},
+{"print", Node_K_print, LEX_PRINT, 0, 0},
+{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
+{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
+{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
+{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
+{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},
+{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
+{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
+{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
+{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_strftime},
+{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
+{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
+{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},
+{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
+{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
+{"while", Node_K_while, LEX_WHILE, 0, 0},
+};
+
+/* VARARGS0 */
+static void
+yyerror(va_alist)
+va_dcl
+{
+ va_list args;
+ char *mesg = NULL;
+ register char *bp, *cp;
+ char *scan;
+ char buf[120];
+
+ errcount++;
+ /* Find the current line in the input file */
+ if (lexptr) {
+ if (!thisline) {
+ cp = lexeme;
+ if (*cp == '\n') {
+ cp--;
+ mesg = "unexpected newline";
+ }
+ for ( ; cp != lexptr_begin && *cp != '\n'; --cp)
+ ;
+ if (*cp == '\n')
+ cp++;
+ thisline = cp;
+ }
+ /* NL isn't guaranteed */
+ bp = lexeme;
+ while (bp < lexend && *bp && *bp != '\n')
+ bp++;
+ } else {
+ thisline = "(END OF FILE)";
+ bp = thisline + 13;
+ }
+ msg("%.*s", (int) (bp - thisline), thisline);
+ bp = buf;
+ cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */
+ if (lexptr) {
+ scan = thisline;
+ while (bp < cp && scan < lexeme)
+ if (*scan++ == '\t')
+ *bp++ = '\t';
+ else
+ *bp++ = ' ';
+ *bp++ = '^';
+ *bp++ = ' ';
+ }
+ va_start(args);
+ if (mesg == NULL)
+ mesg = va_arg(args, char *);
+ strcpy(bp, mesg);
+ err("", buf, args);
+ va_end(args);
+ exit(2);
+}
+
+static char *
+get_src_buf()
+{
+ static int samefile = 0;
+ static int nextfile = 0;
+ static char *buf = NULL;
+ static int fd;
+ int n;
+ register char *scan;
+ static int len = 0;
+ static int did_newline = 0;
+# define SLOP 128 /* enough space to hold most source lines */
+
+ if (nextfile > numfiles)
+ return NULL;
+
+ if (srcfiles[nextfile].stype == CMDLINE) {
+ if (len == 0) {
+ len = strlen(srcfiles[nextfile].val);
+ sourceline = 1;
+ lexptr = lexptr_begin = srcfiles[nextfile].val;
+ lexend = lexptr + len;
+ } else if (!did_newline && *(lexptr-1) != '\n') {
+ /*
+ * The following goop is to ensure that the source
+ * ends with a newline and that the entire current
+ * line is available for error messages.
+ */
+ int offset;
+
+ did_newline = 1;
+ offset = lexptr - lexeme;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
+ if (*scan == '\n') {
+ scan++;
+ break;
+ }
+ len = lexptr - scan;
+ emalloc(buf, char *, len+1, "get_src_buf");
+ memcpy(buf, scan, len);
+ thisline = buf;
+ lexptr = buf + len;
+ *lexptr = '\n';
+ lexeme = lexptr - offset;
+ lexptr_begin = buf;
+ lexend = lexptr + 1;
+ } else {
+ len = 0;
+ lexeme = lexptr = lexptr_begin = NULL;
+ }
+ if (lexptr == NULL && ++nextfile <= numfiles)
+ return get_src_buf();
+ return lexptr;
+ }
+ if (!samefile) {
+ source = srcfiles[nextfile].val;
+ if (source == NULL) {
+ if (buf) {
+ free(buf);
+ buf = NULL;
+ }
+ len = 0;
+ return lexeme = lexptr = lexptr_begin = NULL;
+ }
+ fd = pathopen(source);
+ if (fd == -1)
+ fatal("can't open source file \"%s\" for reading (%s)",
+ source, strerror(errno));
+ len = optimal_bufsize(fd);
+ if (buf)
+ free(buf);
+ emalloc(buf, char *, len + SLOP, "get_src_buf");
+ lexptr_begin = buf + SLOP;
+ samefile = 1;
+ sourceline = 1;
+ } else {
+ /*
+ * Here, we retain the current source line (up to length SLOP)
+ * in the beginning of the buffer that was overallocated above
+ */
+ int offset;
+ int linelen;
+
+ offset = lexptr - lexeme;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
+ if (*scan == '\n') {
+ scan++;
+ break;
+ }
+ linelen = lexptr - scan;
+ if (linelen > SLOP)
+ linelen = SLOP;
+ thisline = buf + SLOP - linelen;
+ memcpy(thisline, scan, linelen);
+ lexeme = buf + SLOP - offset;
+ lexptr_begin = thisline;
+ }
+ n = read(fd, buf + SLOP, len);
+ if (n == -1)
+ fatal("can't read sourcefile \"%s\" (%s)",
+ source, strerror(errno));
+ if (n == 0) {
+ samefile = 0;
+ nextfile++;
+ len = 0;
+ return get_src_buf();
+ }
+ lexptr = buf + SLOP;
+ lexend = lexptr + n;
+ return buf;
+}
+
+#define tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token)
+
+char *
+tokexpand()
+{
+ static int toksize = 60;
+ int tokoffset;
+
+ tokoffset = token - tokstart;
+ toksize *= 2;
+ if (tokstart)
+ erealloc(tokstart, char *, toksize, "tokexpand");
+ else
+ emalloc(tokstart, char *, toksize, "tokexpand");
+ tokend = tokstart + toksize;
+ token = tokstart + tokoffset;
+ return token;
+}
+
+#if DEBUG
+char
+nextc() {
+ if (lexptr && lexptr < lexend)
+ return *lexptr++;
+ else if (get_src_buf())
+ return *lexptr++;
+ else
+ return '\0';
+}
+#else
+#define nextc() ((lexptr && lexptr < lexend) ? \
+ *lexptr++ : \
+ (get_src_buf() ? *lexptr++ : '\0') \
+ )
+#endif
+#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
+
+/*
+ * Read the input and turn it into tokens.
+ */
+
+static int
+yylex()
+{
+ register int c;
+ int seen_e = 0; /* These are for numbers */
+ int seen_point = 0;
+ int esc_seen; /* for literal strings */
+ int low, mid, high;
+ static int did_newline = 0;
+ char *tokkey;
+
+ if (!nextc())
+ return 0;
+ pushback();
+ lexeme = lexptr;
+ thisline = NULL;
+ if (want_regexp) {
+ int in_brack = 0;
+
+ want_regexp = 0;
+ token = tokstart;
+ while ((c = nextc()) != 0) {
+ switch (c) {
+ case '[':
+ in_brack = 1;
+ break;
+ case ']':
+ in_brack = 0;
+ break;
+ case '\\':
+ if ((c = nextc()) == '\0') {
+ yyerror("unterminated regexp ends with \\ at end of file");
+ } else if (c == '\n') {
+ sourceline++;
+ continue;
+ } else
+ tokadd('\\');
+ break;
+ case '/': /* end of the regexp */
+ if (in_brack)
+ break;
+
+ pushback();
+ tokadd('\0');
+ yylval.sval = tokstart;
+ return REGEXP;
+ case '\n':
+ pushback();
+ yyerror("unterminated regexp");
+ case '\0':
+ yyerror("unterminated regexp at end of file");
+ }
+ tokadd(c);
+ }
+ }
+retry:
+ while ((c = nextc()) == ' ' || c == '\t')
+ ;
+
+ lexeme = lexptr ? lexptr - 1 : lexptr;
+ thisline = NULL;
+ token = tokstart;
+ yylval.nodetypeval = Node_illegal;
+
+ switch (c) {
+ case 0:
+ return 0;
+
+ case '\n':
+ sourceline++;
+ return NEWLINE;
+
+ case '#': /* it's a comment */
+ while ((c = nextc()) != '\n') {
+ if (c == '\0')
+ return 0;
+ }
+ sourceline++;
+ return NEWLINE;
+
+ case '\\':
+#ifdef RELAXED_CONTINUATION
+ if (!do_unix) { /* strip trailing white-space and/or comment */
+ while ((c = nextc()) == ' ' || c == '\t') continue;
+ if (c == '#')
+ while ((c = nextc()) != '\n') if (!c) break;
+ pushback();
+ }
+#endif /*RELAXED_CONTINUATION*/
+ if (nextc() == '\n') {
+ sourceline++;
+ goto retry;
+ } else
+ yyerror("inappropriate use of backslash");
+ break;
+
+ case '$':
+ want_assign = 1;
+ return '$';
+
+ case ')':
+ case ']':
+ case '(':
+ case '[':
+ case ';':
+ case ':':
+ case '?':
+ case '{':
+ case ',':
+ return c;
+
+ case '*':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_assign_times;
+ return ASSIGNOP;
+ } else if (do_posix) {
+ pushback();
+ return '*';
+ } else if (c == '*') {
+ /* make ** and **= aliases for ^ and ^= */
+ static int did_warn_op = 0, did_warn_assgn = 0;
+
+ if (nextc() == '=') {
+ if (do_lint && ! did_warn_assgn) {
+ did_warn_assgn = 1;
+ warning("**= is not allowed by POSIX");
+ }
+ yylval.nodetypeval = Node_assign_exp;
+ return ASSIGNOP;
+ } else {
+ pushback();
+ if (do_lint && ! did_warn_op) {
+ did_warn_op = 1;
+ warning("** is not allowed by POSIX");
+ }
+ return '^';
+ }
+ }
+ pushback();
+ return '*';
+
+ case '/':
+ if (want_assign) {
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_assign_quotient;
+ return ASSIGNOP;
+ }
+ pushback();
+ }
+ return '/';
+
+ case '%':
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_assign_mod;
+ return ASSIGNOP;
+ }
+ pushback();
+ return '%';
+
+ case '^':
+ {
+ static int did_warn_op = 0, did_warn_assgn = 0;
+
+ if (nextc() == '=') {
+
+ if (do_lint && ! did_warn_assgn) {
+ did_warn_assgn = 1;
+ warning("operator `^=' is not supported in old awk");
+ }
+ yylval.nodetypeval = Node_assign_exp;
+ return ASSIGNOP;
+ }
+ pushback();
+ if (do_lint && ! did_warn_op) {
+ did_warn_op = 1;
+ warning("operator `^' is not supported in old awk");
+ }
+ return '^';
+ }
+
+ case '+':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_assign_plus;
+ return ASSIGNOP;
+ }
+ if (c == '+')
+ return INCREMENT;
+ pushback();
+ return '+';
+
+ case '!':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_notequal;
+ return RELOP;
+ }
+ if (c == '~') {
+ yylval.nodetypeval = Node_nomatch;
+ want_assign = 0;
+ return MATCHOP;
+ }
+ pushback();
+ return '!';
+
+ case '<':
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_leq;
+ return RELOP;
+ }
+ yylval.nodetypeval = Node_less;
+ pushback();
+ return '<';
+
+ case '=':
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_equal;
+ return RELOP;
+ }
+ yylval.nodetypeval = Node_assign;
+ pushback();
+ return ASSIGNOP;
+
+ case '>':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_geq;
+ return RELOP;
+ } else if (c == '>') {
+ yylval.nodetypeval = Node_redirect_append;
+ return APPEND_OP;
+ }
+ yylval.nodetypeval = Node_greater;
+ pushback();
+ return '>';
+
+ case '~':
+ yylval.nodetypeval = Node_match;
+ want_assign = 0;
+ return MATCHOP;
+
+ case '}':
+ /*
+ * Added did newline stuff. Easier than
+ * hacking the grammar
+ */
+ if (did_newline) {
+ did_newline = 0;
+ return c;
+ }
+ did_newline++;
+ --lexptr; /* pick up } next time */
+ return NEWLINE;
+
+ case '"':
+ esc_seen = 0;
+ while ((c = nextc()) != '"') {
+ if (c == '\n') {
+ pushback();
+ yyerror("unterminated string");
+ }
+ if (c == '\\') {
+ c = nextc();
+ if (c == '\n') {
+ sourceline++;
+ continue;
+ }
+ esc_seen = 1;
+ tokadd('\\');
+ }
+ if (c == '\0') {
+ pushback();
+ yyerror("unterminated string");
+ }
+ tokadd(c);
+ }
+ yylval.nodeval = make_str_node(tokstart,
+ token - tokstart, esc_seen ? SCAN : 0);
+ yylval.nodeval->flags |= PERM;
+ return YSTRING;
+
+ case '-':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_assign_minus;
+ return ASSIGNOP;
+ }
+ if (c == '-')
+ return DECREMENT;
+ pushback();
+ return '-';
+
+ case '.':
+ c = nextc();
+ pushback();
+ if (!isdigit(c))
+ return '.';
+ else
+ c = '.'; /* FALL THROUGH */
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ /* It's a number */
+ for (;;) {
+ int gotnumber = 0;
+
+ tokadd(c);
+ switch (c) {
+ case '.':
+ if (seen_point) {
+ gotnumber++;
+ break;
+ }
+ ++seen_point;
+ break;
+ case 'e':
+ case 'E':
+ if (seen_e) {
+ gotnumber++;
+ break;
+ }
+ ++seen_e;
+ if ((c = nextc()) == '-' || c == '+')
+ tokadd(c);
+ else
+ pushback();
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ break;
+ default:
+ gotnumber++;
+ }
+ if (gotnumber)
+ break;
+ c = nextc();
+ }
+ pushback();
+ yylval.nodeval = make_number(atof(tokstart));
+ yylval.nodeval->flags |= PERM;
+ return YNUMBER;
+
+ case '&':
+ if ((c = nextc()) == '&') {
+ yylval.nodetypeval = Node_and;
+ for (;;) {
+ c = nextc();
+ if (c == '\0')
+ break;
+ if (c == '#') {
+ while ((c = nextc()) != '\n' && c != '\0')
+ ;
+ if (c == '\0')
+ break;
+ }
+ if (c == '\n')
+ sourceline++;
+ if (! isspace(c)) {
+ pushback();
+ break;
+ }
+ }
+ want_assign = 0;
+ return LEX_AND;
+ }
+ pushback();
+ return '&';
+
+ case '|':
+ if ((c = nextc()) == '|') {
+ yylval.nodetypeval = Node_or;
+ for (;;) {
+ c = nextc();
+ if (c == '\0')
+ break;
+ if (c == '#') {
+ while ((c = nextc()) != '\n' && c != '\0')
+ ;
+ if (c == '\0')
+ break;
+ }
+ if (c == '\n')
+ sourceline++;
+ if (! isspace(c)) {
+ pushback();
+ break;
+ }
+ }
+ want_assign = 0;
+ return LEX_OR;
+ }
+ pushback();
+ return '|';
+ }
+
+ if (c != '_' && ! isalpha(c))
+ yyerror("Invalid char '%c' in expression\n", c);
+
+ /* it's some type of name-type-thing. Find its length */
+ token = tokstart;
+ while (is_identchar(c)) {
+ tokadd(c);
+ c = nextc();
+ }
+ tokadd('\0');
+ emalloc(tokkey, char *, token - tokstart, "yylex");
+ memcpy(tokkey, tokstart, token - tokstart);
+ pushback();
+
+ /* See if it is a special token. */
+ low = 0;
+ high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1;
+ while (low <= high) {
+ int i/* , c */;
+
+ mid = (low + high) / 2;
+ c = *tokstart - tokentab[mid].operator[0];
+ i = c ? c : strcmp (tokstart, tokentab[mid].operator);
+
+ if (i < 0) { /* token < mid */
+ high = mid - 1;
+ } else if (i > 0) { /* token > mid */
+ low = mid + 1;
+ } else {
+ if (do_lint) {
+ if (tokentab[mid].flags & GAWKX)
+ warning("%s() is a gawk extension",
+ tokentab[mid].operator);
+ if (tokentab[mid].flags & NOT_POSIX)
+ warning("POSIX does not allow %s",
+ tokentab[mid].operator);
+ if (tokentab[mid].flags & NOT_OLD)
+ warning("%s is not supported in old awk",
+ tokentab[mid].operator);
+ }
+ if ((do_unix && (tokentab[mid].flags & GAWKX))
+ || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
+ break;
+ if (tokentab[mid].class == LEX_BUILTIN
+ || tokentab[mid].class == LEX_LENGTH
+ )
+ yylval.lval = mid;
+ else
+ yylval.nodetypeval = tokentab[mid].value;
+
+ return tokentab[mid].class;
+ }
+ }
+
+ yylval.sval = tokkey;
+ if (*lexptr == '(')
+ return FUNC_CALL;
+ else {
+ want_assign = 1;
+ return NAME;
+ }
+}
+
+static NODE *
+node_common(op)
+NODETYPE op;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = op;
+ r->flags = MALLOC;
+ /* if lookahead is NL, lineno is 1 too high */
+ if (lexeme && *lexeme == '\n')
+ r->source_line = sourceline - 1;
+ else
+ r->source_line = sourceline;
+ r->source_file = source;
+ return r;
+}
+
+/*
+ * This allocates a node with defined lnode and rnode.
+ */
+NODE *
+node(left, op, right)
+NODE *left, *right;
+NODETYPE op;
+{
+ register NODE *r;
+
+ r = node_common(op);
+ r->lnode = left;
+ r->rnode = right;
+ return r;
+}
+
+/*
+ * This allocates a node with defined subnode and proc for builtin functions
+ * Checks for arg. count and supplies defaults where possible.
+ */
+static NODE *
+snode(subn, op, idx)
+NODETYPE op;
+int idx;
+NODE *subn;
+{
+ register NODE *r;
+ register NODE *n;
+ int nexp = 0;
+ int args_allowed;
+
+ r = node_common(op);
+
+ /* traverse expression list to see how many args. given */
+ for (n= subn; n; n= n->rnode) {
+ nexp++;
+ if (nexp > 3)
+ break;
+ }
+
+ /* check against how many args. are allowed for this builtin */
+ args_allowed = tokentab[idx].flags & ARGS;
+ if (args_allowed && !(args_allowed & A(nexp)))
+ fatal("%s() cannot have %d argument%c",
+ tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's');
+
+ r->proc = tokentab[idx].ptr;
+
+ /* special case processing for a few builtins */
+ if (nexp == 0 && r->proc == do_length) {
+ subn = node(node(make_number(0.0),Node_field_spec,(NODE *)NULL),
+ Node_expression_list,
+ (NODE *) NULL);
+ } else if (r->proc == do_match) {
+ if (subn->rnode->lnode->type != Node_regex)
+ subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
+ } else if (r->proc == do_sub || r->proc == do_gsub) {
+ if (subn->lnode->type != Node_regex)
+ subn->lnode = mk_rexp(subn->lnode);
+ if (nexp == 2)
+ append_right(subn, node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL));
+ else if (do_lint && subn->rnode->rnode->lnode->type == Node_val)
+ warning("string literal as last arg of substitute");
+ } else if (r->proc == do_split) {
+ if (nexp == 2)
+ append_right(subn,
+ node(FS_node, Node_expression_list, (NODE *) NULL));
+ n = subn->rnode->rnode->lnode;
+ if (n->type != Node_regex)
+ subn->rnode->rnode->lnode = mk_rexp(n);
+ if (nexp == 2)
+ subn->rnode->rnode->lnode->re_flags |= FS_DFLT;
+ }
+
+ r->subnode = subn;
+ return r;
+}
+
+/*
+ * This allocates a Node_line_range node with defined condpair and
+ * zeroes the trigger word to avoid the temptation of assuming that calling
+ * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'.
+ */
+/* Otherwise like node() */
+static NODE *
+mkrangenode(cpair)
+NODE *cpair;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = Node_line_range;
+ r->condpair = cpair;
+ r->triggered = 0;
+ return r;
+}
+
+/* Build a for loop */
+static NODE *
+make_for_loop(init, cond, incr)
+NODE *init, *cond, *incr;
+{
+ register FOR_LOOP_HEADER *r;
+ NODE *n;
+
+ emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
+ getnode(n);
+ n->type = Node_illegal;
+ r->init = init;
+ r->cond = cond;
+ r->incr = incr;
+ n->sub.nodep.r.hd = r;
+ return n;
+}
+
+/*
+ * Install a name in the symbol table, even if it is already there.
+ * Caller must check against redefinition if that is desired.
+ */
+NODE *
+install(name, value)
+char *name;
+NODE *value;
+{
+ register NODE *hp;
+ register int len, bucket;
+
+ len = strlen(name);
+ bucket = hash(name, len);
+ getnode(hp);
+ hp->type = Node_hashnode;
+ hp->hnext = variables[bucket];
+ variables[bucket] = hp;
+ hp->hlength = len;
+ hp->hvalue = value;
+ hp->hname = name;
+ hp->hvalue->vname = name;
+ return hp->hvalue;
+}
+
+/* find the most recent hash node for name installed by install */
+NODE *
+lookup(name)
+char *name;
+{
+ register NODE *bucket;
+ register int len;
+
+ len = strlen(name);
+ bucket = variables[hash(name, len)];
+ while (bucket) {
+ if (bucket->hlength == len && STREQN(bucket->hname, name, len))
+ return bucket->hvalue;
+ bucket = bucket->hnext;
+ }
+ return NULL;
+}
+
+/*
+ * Add new to the rightmost branch of LIST. This uses n^2 time, so we make
+ * a simple attempt at optimizing it.
+ */
+static NODE *
+append_right(list, new)
+NODE *list, *new;
+{
+ register NODE *oldlist;
+ static NODE *savefront = NULL, *savetail = NULL;
+
+ oldlist = list;
+ if (savefront == oldlist) {
+ savetail = savetail->rnode = new;
+ return oldlist;
+ } else
+ savefront = oldlist;
+ while (list->rnode != NULL)
+ list = list->rnode;
+ savetail = list->rnode = new;
+ return oldlist;
+}
+
+/*
+ * check if name is already installed; if so, it had better have Null value,
+ * in which case def is added as the value. Otherwise, install name with def
+ * as value.
+ */
+static void
+func_install(params, def)
+NODE *params;
+NODE *def;
+{
+ NODE *r;
+
+ pop_params(params->rnode);
+ pop_var(params, 0);
+ r = lookup(params->param);
+ if (r != NULL) {
+ fatal("function name `%s' previously defined", params->param);
+ } else
+ (void) install(params->param, node(params, Node_func, def));
+}
+
+static void
+pop_var(np, freeit)
+NODE *np;
+int freeit;
+{
+ register NODE *bucket, **save;
+ register int len;
+ char *name;
+
+ name = np->param;
+ len = strlen(name);
+ save = &(variables[hash(name, len)]);
+ for (bucket = *save; bucket; bucket = bucket->hnext) {
+ if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
+ *save = bucket->hnext;
+ freenode(bucket);
+ if (freeit)
+ free(np->param);
+ return;
+ }
+ save = &(bucket->hnext);
+ }
+}
+
+static void
+pop_params(params)
+NODE *params;
+{
+ register NODE *np;
+
+ for (np = params; np != NULL; np = np->rnode)
+ pop_var(np, 1);
+}
+
+static NODE *
+make_param(name)
+char *name;
+{
+ NODE *r;
+
+ getnode(r);
+ r->type = Node_param_list;
+ r->rnode = NULL;
+ r->param = name;
+ r->param_cnt = param_counter++;
+ return (install(name, r));
+}
+
+/* Name points to a variable name. Make sure its in the symbol table */
+NODE *
+variable(name, can_free)
+char *name;
+int can_free;
+{
+ register NODE *r;
+ static int env_loaded = 0;
+
+ if (!env_loaded && STREQ(name, "ENVIRON")) {
+ load_environ();
+ env_loaded = 1;
+ }
+ if ((r = lookup(name)) == NULL)
+ r = install(name, node(Nnull_string, Node_var, (NODE *) NULL));
+ else if (can_free)
+ free(name);
+ return r;
+}
+
+static NODE *
+mk_rexp(exp)
+NODE *exp;
+{
+ if (exp->type == Node_regex)
+ return exp;
+ else {
+ NODE *n;
+
+ getnode(n);
+ n->type = Node_regex;
+ n->re_exp = exp;
+ n->re_text = NULL;
+ n->re_reg = NULL;
+ n->re_flags = 0;
+ n->re_cnt = 1;
+ return n;
+ }
+}
diff --git a/gnu/usr.bin/awk/builtin.c b/gnu/usr.bin/awk/builtin.c
new file mode 100644
index 000000000000..9d5e3b302fde
--- /dev/null
+++ b/gnu/usr.bin/awk/builtin.c
@@ -0,0 +1,1133 @@
+/*
+ * builtin.c - Builtin functions and various utility procedures
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include "awk.h"
+
+
+#ifndef SRANDOM_PROTO
+extern void srandom P((int seed));
+#endif
+#ifndef linux
+extern char *initstate P((unsigned seed, char *state, int n));
+extern char *setstate P((char *state));
+extern long random P((void));
+#endif
+
+extern NODE **fields_arr;
+extern int output_is_tty;
+
+static NODE *sub_common P((NODE *tree, int global));
+
+#ifdef GFMT_WORKAROUND
+char *gfmt P((double g, int prec, char *buf));
+#endif
+
+#ifdef _CRAY
+/* Work around a problem in conversion of doubles to exact integers. */
+#include <float.h>
+#define Floor(n) floor((n) * (1.0 + DBL_EPSILON))
+#define Ceil(n) ceil((n) * (1.0 + DBL_EPSILON))
+
+/* Force the standard C compiler to use the library math functions. */
+extern double exp(double);
+double (*Exp)() = exp;
+#define exp(x) (*Exp)(x)
+extern double log(double);
+double (*Log)() = log;
+#define log(x) (*Log)(x)
+#else
+#define Floor(n) floor(n)
+#define Ceil(n) ceil(n)
+#endif
+
+static void
+efwrite(ptr, size, count, fp, from, rp, flush)
+void *ptr;
+unsigned size, count;
+FILE *fp;
+char *from;
+struct redirect *rp;
+int flush;
+{
+ errno = 0;
+ if (fwrite(ptr, size, count, fp) != count)
+ goto wrerror;
+ if (flush
+ && ((fp == stdout && output_is_tty)
+ || (rp && (rp->flag & RED_NOBUF)))) {
+ fflush(fp);
+ if (ferror(fp))
+ goto wrerror;
+ }
+ return;
+
+ wrerror:
+ fatal("%s to \"%s\" failed (%s)", from,
+ rp ? rp->value : "standard output",
+ errno ? strerror(errno) : "reason unknown");
+}
+
+/* Builtin functions */
+NODE *
+do_exp(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double d, res;
+#ifndef exp
+ double exp P((double));
+#endif
+
+ tmp= tree_eval(tree->lnode);
+ d = force_number(tmp);
+ free_temp(tmp);
+ errno = 0;
+ res = exp(d);
+ if (errno == ERANGE)
+ warning("exp argument %g is out of range", d);
+ return tmp_number((AWKNUM) res);
+}
+
+NODE *
+do_index(tree)
+NODE *tree;
+{
+ NODE *s1, *s2;
+ register char *p1, *p2;
+ register int l1, l2;
+ long ret;
+
+
+ s1 = tree_eval(tree->lnode);
+ s2 = tree_eval(tree->rnode->lnode);
+ force_string(s1);
+ force_string(s2);
+ p1 = s1->stptr;
+ p2 = s2->stptr;
+ l1 = s1->stlen;
+ l2 = s2->stlen;
+ ret = 0;
+ if (IGNORECASE) {
+ while (l1) {
+ if (l2 > l1)
+ break;
+ if (casetable[(int)*p1] == casetable[(int)*p2]
+ && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
+ ret = 1 + s1->stlen - l1;
+ break;
+ }
+ l1--;
+ p1++;
+ }
+ } else {
+ while (l1) {
+ if (l2 > l1)
+ break;
+ if (*p1 == *p2
+ && (l2 == 1 || STREQN(p1, p2, l2))) {
+ ret = 1 + s1->stlen - l1;
+ break;
+ }
+ l1--;
+ p1++;
+ }
+ }
+ free_temp(s1);
+ free_temp(s2);
+ return tmp_number((AWKNUM) ret);
+}
+
+NODE *
+do_int(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double floor P((double));
+ double ceil P((double));
+ double d;
+
+ tmp = tree_eval(tree->lnode);
+ d = force_number(tmp);
+ if (d >= 0)
+ d = Floor(d);
+ else
+ d = Ceil(d);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) d);
+}
+
+NODE *
+do_length(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ int len;
+
+ tmp = tree_eval(tree->lnode);
+ len = force_string(tmp)->stlen;
+ free_temp(tmp);
+ return tmp_number((AWKNUM) len);
+}
+
+NODE *
+do_log(tree)
+NODE *tree;
+{
+ NODE *tmp;
+#ifndef log
+ double log P((double));
+#endif
+ double d, arg;
+
+ tmp = tree_eval(tree->lnode);
+ arg = (double) force_number(tmp);
+ if (arg < 0.0)
+ warning("log called with negative argument %g", arg);
+ d = log(arg);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) d);
+}
+
+/* %e and %f formats are not properly implemented. Someone should fix them */
+/* Actually, this whole thing should be reimplemented. */
+
+NODE *
+do_sprintf(tree)
+NODE *tree;
+{
+#define bchunk(s,l) if(l) {\
+ while((l)>ofre) {\
+ erealloc(obuf, char *, osiz*2, "do_sprintf");\
+ ofre+=osiz;\
+ osiz*=2;\
+ }\
+ memcpy(obuf+olen,s,(l));\
+ olen+=(l);\
+ ofre-=(l);\
+ }
+
+ /* Is there space for something L big in the buffer? */
+#define chksize(l) if((l)>ofre) {\
+ erealloc(obuf, char *, osiz*2, "do_sprintf");\
+ ofre+=osiz;\
+ osiz*=2;\
+ }
+
+ /*
+ * Get the next arg to be formatted. If we've run out of args,
+ * return "" (Null string)
+ */
+#define parse_next_arg() {\
+ if(!carg) { toofew = 1; break; }\
+ else {\
+ arg=tree_eval(carg->lnode);\
+ carg=carg->rnode;\
+ }\
+ }
+
+ NODE *r;
+ int toofew = 0;
+ char *obuf;
+ int osiz, ofre, olen;
+ static char chbuf[] = "0123456789abcdef";
+ static char sp[] = " ";
+ char *s0, *s1;
+ int n0;
+ NODE *sfmt, *arg;
+ register NODE *carg;
+ long fw, prec, lj, alt, big;
+ long *cur;
+ long val;
+#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */
+ long tmp_uval; /* on 386i 4.0.1 C compiler -- it just hangs */
+#endif
+ unsigned long uval;
+ int sgn;
+ int base;
+ char cpbuf[30]; /* if we have numbers bigger than 30 */
+ char *cend = &cpbuf[30];/* chars, we lose, but seems unlikely */
+ char *cp;
+ char *fill;
+ double tmpval;
+ char *pr_str;
+ int ucasehex = 0;
+ char signchar = 0;
+ int len;
+
+
+ emalloc(obuf, char *, 120, "do_sprintf");
+ osiz = 120;
+ ofre = osiz - 1;
+ olen = 0;
+ sfmt = tree_eval(tree->lnode);
+ sfmt = force_string(sfmt);
+ carg = tree->rnode;
+ for (s0 = s1 = sfmt->stptr, n0 = sfmt->stlen; n0-- > 0;) {
+ if (*s1 != '%') {
+ s1++;
+ continue;
+ }
+ bchunk(s0, s1 - s0);
+ s0 = s1;
+ cur = &fw;
+ fw = 0;
+ prec = 0;
+ lj = alt = big = 0;
+ fill = sp;
+ cp = cend;
+ s1++;
+
+retry:
+ --n0;
+ switch (*s1++) {
+ case '%':
+ bchunk("%", 1);
+ s0 = s1;
+ break;
+
+ case '0':
+ if (fill != sp || lj)
+ goto lose;
+ if (cur == &fw)
+ fill = "0"; /* FALL through */
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (cur == 0)
+ goto lose;
+ *cur = s1[-1] - '0';
+ while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
+ --n0;
+ *cur = *cur * 10 + *s1++ - '0';
+ }
+ goto retry;
+ case '*':
+ if (cur == 0)
+ goto lose;
+ parse_next_arg();
+ *cur = force_number(arg);
+ free_temp(arg);
+ goto retry;
+ case ' ': /* print ' ' or '-' */
+ case '+': /* print '+' or '-' */
+ signchar = *(s1-1);
+ goto retry;
+ case '-':
+ if (lj || fill != sp)
+ goto lose;
+ lj++;
+ goto retry;
+ case '.':
+ if (cur != &fw)
+ goto lose;
+ cur = &prec;
+ goto retry;
+ case '#':
+ if (alt)
+ goto lose;
+ alt++;
+ goto retry;
+ case 'l':
+ if (big)
+ goto lose;
+ big++;
+ goto retry;
+ case 'c':
+ parse_next_arg();
+ if (arg->flags & NUMBER) {
+#ifdef sun386
+ tmp_uval = arg->numbr;
+ uval= (unsigned long) tmp_uval;
+#else
+ uval = (unsigned long) arg->numbr;
+#endif
+ cpbuf[0] = uval;
+ prec = 1;
+ pr_str = cpbuf;
+ goto dopr_string;
+ }
+ if (! prec)
+ prec = 1;
+ else if (prec > arg->stlen)
+ prec = arg->stlen;
+ pr_str = arg->stptr;
+ goto dopr_string;
+ case 's':
+ parse_next_arg();
+ arg = force_string(arg);
+ if (!prec || prec > arg->stlen)
+ prec = arg->stlen;
+ pr_str = arg->stptr;
+
+ dopr_string:
+ if (fw > prec && !lj) {
+ while (fw > prec) {
+ bchunk(sp, 1);
+ fw--;
+ }
+ }
+ bchunk(pr_str, (int) prec);
+ if (fw > prec) {
+ while (fw > prec) {
+ bchunk(sp, 1);
+ fw--;
+ }
+ }
+ s0 = s1;
+ free_temp(arg);
+ break;
+ case 'd':
+ case 'i':
+ parse_next_arg();
+ val = (long) force_number(arg);
+ free_temp(arg);
+ if (val < 0) {
+ sgn = 1;
+ val = -val;
+ } else
+ sgn = 0;
+ do {
+ *--cp = '0' + val % 10;
+ val /= 10;
+ } while (val);
+ if (sgn)
+ *--cp = '-';
+ else if (signchar)
+ *--cp = signchar;
+ if (prec > fw)
+ fw = prec;
+ prec = cend - cp;
+ if (fw > prec && !lj) {
+ if (fill != sp && (*cp == '-' || signchar)) {
+ bchunk(cp, 1);
+ cp++;
+ prec--;
+ fw--;
+ }
+ while (fw > prec) {
+ bchunk(fill, 1);
+ fw--;
+ }
+ }
+ bchunk(cp, (int) prec);
+ if (fw > prec) {
+ while (fw > prec) {
+ bchunk(fill, 1);
+ fw--;
+ }
+ }
+ s0 = s1;
+ break;
+ case 'u':
+ base = 10;
+ goto pr_unsigned;
+ case 'o':
+ base = 8;
+ goto pr_unsigned;
+ case 'X':
+ ucasehex = 1;
+ case 'x':
+ base = 16;
+ goto pr_unsigned;
+ pr_unsigned:
+ parse_next_arg();
+ uval = (unsigned long) force_number(arg);
+ free_temp(arg);
+ do {
+ *--cp = chbuf[uval % base];
+ if (ucasehex && isalpha(*cp))
+ *cp = toupper(*cp);
+ uval /= base;
+ } while (uval);
+ if (alt && (base == 8 || base == 16)) {
+ if (base == 16) {
+ if (ucasehex)
+ *--cp = 'X';
+ else
+ *--cp = 'x';
+ }
+ *--cp = '0';
+ }
+ prec = cend - cp;
+ if (fw > prec && !lj) {
+ while (fw > prec) {
+ bchunk(fill, 1);
+ fw--;
+ }
+ }
+ bchunk(cp, (int) prec);
+ if (fw > prec) {
+ while (fw > prec) {
+ bchunk(fill, 1);
+ fw--;
+ }
+ }
+ s0 = s1;
+ break;
+ case 'g':
+ parse_next_arg();
+ tmpval = force_number(arg);
+ free_temp(arg);
+ chksize(fw + prec + 9); /* 9==slop */
+
+ cp = cpbuf;
+ *cp++ = '%';
+ if (lj)
+ *cp++ = '-';
+ if (fill != sp)
+ *cp++ = '0';
+#ifndef GFMT_WORKAROUND
+ if (cur != &fw) {
+ (void) strcpy(cp, "*.*g");
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
+ } else {
+ (void) strcpy(cp, "*g");
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
+ }
+#else /* GFMT_WORKAROUND */
+ {
+ char *gptr, gbuf[120];
+#define DEFAULT_G_PRECISION 6
+ if (fw + prec + 9 > sizeof gbuf) { /* 9==slop */
+ emalloc(gptr, char *, fw+prec+9, "do_sprintf(gfmt)");
+ } else
+ gptr = gbuf;
+ (void) gfmt((double) tmpval, cur != &fw ?
+ (int) prec : DEFAULT_G_PRECISION, gptr);
+ *cp++ = '*', *cp++ = 's', *cp = '\0';
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, gptr);
+ if (fill != sp && *gptr == ' ') {
+ char *p = gptr;
+ do { *p++ = '0'; } while (*p == ' ');
+ }
+ if (gptr != gbuf) free(gptr);
+ }
+#endif /* GFMT_WORKAROUND */
+ len = strlen(obuf + olen);
+ ofre -= len;
+ olen += len;
+ s0 = s1;
+ break;
+
+ case 'f':
+ parse_next_arg();
+ tmpval = force_number(arg);
+ free_temp(arg);
+ chksize(fw + prec + 9); /* 9==slop */
+
+ cp = cpbuf;
+ *cp++ = '%';
+ if (lj)
+ *cp++ = '-';
+ if (fill != sp)
+ *cp++ = '0';
+ if (cur != &fw) {
+ (void) strcpy(cp, "*.*f");
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
+ } else {
+ (void) strcpy(cp, "*f");
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
+ }
+ len = strlen(obuf + olen);
+ ofre -= len;
+ olen += len;
+ s0 = s1;
+ break;
+ case 'e':
+ parse_next_arg();
+ tmpval = force_number(arg);
+ free_temp(arg);
+ chksize(fw + prec + 9); /* 9==slop */
+ cp = cpbuf;
+ *cp++ = '%';
+ if (lj)
+ *cp++ = '-';
+ if (fill != sp)
+ *cp++ = '0';
+ if (cur != &fw) {
+ (void) strcpy(cp, "*.*e");
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
+ } else {
+ (void) strcpy(cp, "*e");
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
+ }
+ len = strlen(obuf + olen);
+ ofre -= len;
+ olen += len;
+ s0 = s1;
+ break;
+
+ default:
+ lose:
+ break;
+ }
+ if (toofew)
+ fatal("%s\n\t%s\n\t%*s%s",
+ "not enough arguments to satisfy format string",
+ sfmt->stptr, s1 - sfmt->stptr - 2, "",
+ "^ ran out for this one"
+ );
+ }
+ if (do_lint && carg != NULL)
+ warning("too many arguments supplied for format string");
+ bchunk(s0, s1 - s0);
+ free_temp(sfmt);
+ r = make_str_node(obuf, olen, ALREADY_MALLOCED);
+ r->flags |= TEMP;
+ return r;
+}
+
+void
+do_printf(tree)
+register NODE *tree;
+{
+ struct redirect *rp = NULL;
+ register FILE *fp;
+
+ if (tree->rnode) {
+ int errflg; /* not used, sigh */
+
+ rp = redirect(tree->rnode, &errflg);
+ if (rp) {
+ fp = rp->fp;
+ if (!fp)
+ return;
+ } else
+ return;
+ } else
+ fp = stdout;
+ tree = do_sprintf(tree->lnode);
+ efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp , 1);
+ free_temp(tree);
+}
+
+NODE *
+do_sqrt(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double arg;
+ extern double sqrt P((double));
+
+ tmp = tree_eval(tree->lnode);
+ arg = (double) force_number(tmp);
+ free_temp(tmp);
+ if (arg < 0.0)
+ warning("sqrt called with negative argument %g", arg);
+ return tmp_number((AWKNUM) sqrt(arg));
+}
+
+NODE *
+do_substr(tree)
+NODE *tree;
+{
+ NODE *t1, *t2, *t3;
+ NODE *r;
+ register int indx;
+ size_t length;
+
+ t1 = tree_eval(tree->lnode);
+ t2 = tree_eval(tree->rnode->lnode);
+ if (tree->rnode->rnode == NULL) /* third arg. missing */
+ length = t1->stlen;
+ else {
+ t3 = tree_eval(tree->rnode->rnode->lnode);
+ length = (size_t) force_number(t3);
+ free_temp(t3);
+ }
+ indx = (int) force_number(t2) - 1;
+ free_temp(t2);
+ t1 = force_string(t1);
+ if (indx < 0)
+ indx = 0;
+ if (indx >= t1->stlen || length <= 0) {
+ free_temp(t1);
+ return Nnull_string;
+ }
+ if (indx + length > t1->stlen || LONG_MAX - indx < length)
+ length = t1->stlen - indx;
+ r = tmp_string(t1->stptr + indx, length);
+ free_temp(t1);
+ return r;
+}
+
+NODE *
+do_strftime(tree)
+NODE *tree;
+{
+ NODE *t1, *t2;
+ struct tm *tm;
+ time_t fclock;
+ char buf[100];
+ int ret;
+
+ t1 = force_string(tree_eval(tree->lnode));
+
+ if (tree->rnode == NULL) /* second arg. missing, default */
+ (void) time(&fclock);
+ else {
+ t2 = tree_eval(tree->rnode->lnode);
+ fclock = (time_t) force_number(t2);
+ free_temp(t2);
+ }
+ tm = localtime(&fclock);
+
+ ret = strftime(buf, 100, t1->stptr, tm);
+
+ return tmp_string(buf, ret);
+}
+
+NODE *
+do_systime(tree)
+NODE *tree;
+{
+ time_t lclock;
+
+ (void) time(&lclock);
+ return tmp_number((AWKNUM) lclock);
+}
+
+NODE *
+do_system(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ int ret = 0;
+ char *cmd;
+
+ (void) flush_io (); /* so output is synchronous with gawk's */
+ tmp = tree_eval(tree->lnode);
+ cmd = force_string(tmp)->stptr;
+ if (cmd && *cmd) {
+ ret = system(cmd);
+ ret = (ret >> 8) & 0xff;
+ }
+ free_temp(tmp);
+ return tmp_number((AWKNUM) ret);
+}
+
+void
+do_print(tree)
+register NODE *tree;
+{
+ register NODE *t1;
+ struct redirect *rp = NULL;
+ register FILE *fp;
+ register char *s;
+
+ if (tree->rnode) {
+ int errflg; /* not used, sigh */
+
+ rp = redirect(tree->rnode, &errflg);
+ if (rp) {
+ fp = rp->fp;
+ if (!fp)
+ return;
+ } else
+ return;
+ } else
+ fp = stdout;
+ tree = tree->lnode;
+ while (tree) {
+ t1 = tree_eval(tree->lnode);
+ if (t1->flags & NUMBER) {
+ if (OFMTidx == CONVFMTidx)
+ (void) force_string(t1);
+ else {
+ char buf[100];
+
+ sprintf(buf, OFMT, t1->numbr);
+ t1 = tmp_string(buf, strlen(buf));
+ }
+ }
+ efwrite(t1->stptr, sizeof(char), t1->stlen, fp, "print", rp, 0);
+ free_temp(t1);
+ tree = tree->rnode;
+ if (tree) {
+ s = OFS;
+ if (OFSlen)
+ efwrite(s, sizeof(char), OFSlen, fp, "print", rp, 0);
+ }
+ }
+ s = ORS;
+ if (ORSlen)
+ efwrite(s, sizeof(char), ORSlen, fp, "print", rp, 1);
+}
+
+NODE *
+do_tolower(tree)
+NODE *tree;
+{
+ NODE *t1, *t2;
+ register char *cp, *cp2;
+
+ t1 = tree_eval(tree->lnode);
+ t1 = force_string(t1);
+ t2 = tmp_string(t1->stptr, t1->stlen);
+ for (cp = t2->stptr, cp2 = t2->stptr + t2->stlen; cp < cp2; cp++)
+ if (isupper(*cp))
+ *cp = tolower(*cp);
+ free_temp(t1);
+ return t2;
+}
+
+NODE *
+do_toupper(tree)
+NODE *tree;
+{
+ NODE *t1, *t2;
+ register char *cp;
+
+ t1 = tree_eval(tree->lnode);
+ t1 = force_string(t1);
+ t2 = tmp_string(t1->stptr, t1->stlen);
+ for (cp = t2->stptr; cp < t2->stptr + t2->stlen; cp++)
+ if (islower(*cp))
+ *cp = toupper(*cp);
+ free_temp(t1);
+ return t2;
+}
+
+NODE *
+do_atan2(tree)
+NODE *tree;
+{
+ NODE *t1, *t2;
+ extern double atan2 P((double, double));
+ double d1, d2;
+
+ t1 = tree_eval(tree->lnode);
+ t2 = tree_eval(tree->rnode->lnode);
+ d1 = force_number(t1);
+ d2 = force_number(t2);
+ free_temp(t1);
+ free_temp(t2);
+ return tmp_number((AWKNUM) atan2(d1, d2));
+}
+
+NODE *
+do_sin(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ extern double sin P((double));
+ double d;
+
+ tmp = tree_eval(tree->lnode);
+ d = sin((double)force_number(tmp));
+ free_temp(tmp);
+ return tmp_number((AWKNUM) d);
+}
+
+NODE *
+do_cos(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ extern double cos P((double));
+ double d;
+
+ tmp = tree_eval(tree->lnode);
+ d = cos((double)force_number(tmp));
+ free_temp(tmp);
+ return tmp_number((AWKNUM) d);
+}
+
+static int firstrand = 1;
+static char state[256];
+
+/* ARGSUSED */
+NODE *
+do_rand(tree)
+NODE *tree;
+{
+ if (firstrand) {
+ (void) initstate((unsigned) 1, state, sizeof state);
+ srandom(1);
+ firstrand = 0;
+ }
+ return tmp_number((AWKNUM) random() / LONG_MAX);
+}
+
+NODE *
+do_srand(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ static long save_seed = 0;
+ long ret = save_seed; /* SVR4 awk srand returns previous seed */
+
+ if (firstrand)
+ (void) initstate((unsigned) 1, state, sizeof state);
+ else
+ (void) setstate(state);
+
+ if (!tree)
+ srandom((int) (save_seed = (long) time((time_t *) 0)));
+ else {
+ tmp = tree_eval(tree->lnode);
+ srandom((int) (save_seed = (long) force_number(tmp)));
+ free_temp(tmp);
+ }
+ firstrand = 0;
+ return tmp_number((AWKNUM) ret);
+}
+
+NODE *
+do_match(tree)
+NODE *tree;
+{
+ NODE *t1;
+ int rstart;
+ AWKNUM rlength;
+ Regexp *rp;
+
+ t1 = force_string(tree_eval(tree->lnode));
+ tree = tree->rnode->lnode;
+ rp = re_update(tree);
+ rstart = research(rp, t1->stptr, 0, t1->stlen, 1);
+ if (rstart >= 0) { /* match succeded */
+ rstart++; /* 1-based indexing */
+ rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);
+ } else { /* match failed */
+ rstart = 0;
+ rlength = -1.0;
+ }
+ free_temp(t1);
+ unref(RSTART_node->var_value);
+ RSTART_node->var_value = make_number((AWKNUM) rstart);
+ unref(RLENGTH_node->var_value);
+ RLENGTH_node->var_value = make_number(rlength);
+ return tmp_number((AWKNUM) rstart);
+}
+
+static NODE *
+sub_common(tree, global)
+NODE *tree;
+int global;
+{
+ register char *scan;
+ register char *bp, *cp;
+ char *buf;
+ int buflen;
+ register char *matchend;
+ register int len;
+ char *matchstart;
+ char *text;
+ int textlen;
+ char *repl;
+ char *replend;
+ int repllen;
+ int sofar;
+ int ampersands;
+ int matches = 0;
+ Regexp *rp;
+ NODE *s; /* subst. pattern */
+ NODE *t; /* string to make sub. in; $0 if none given */
+ NODE *tmp;
+ NODE **lhs = &tree; /* value not used -- just different from NULL */
+ int priv = 0;
+ Func_ptr after_assign = NULL;
+
+ tmp = tree->lnode;
+ rp = re_update(tmp);
+
+ tree = tree->rnode;
+ s = tree->lnode;
+
+ tree = tree->rnode;
+ tmp = tree->lnode;
+ t = force_string(tree_eval(tmp));
+
+ /* do the search early to avoid work on non-match */
+ if (research(rp, t->stptr, 0, t->stlen, 1) == -1 ||
+ (RESTART(rp, t->stptr) > t->stlen) && (matches = 1)) {
+ free_temp(t);
+ return tmp_number((AWKNUM) matches);
+ }
+
+ if (tmp->type == Node_val)
+ lhs = NULL;
+ else
+ lhs = get_lhs(tmp, &after_assign);
+ t->flags |= STRING;
+ /*
+ * create a private copy of the string
+ */
+ if (t->stref > 1 || (t->flags & PERM)) {
+ unsigned int saveflags;
+
+ saveflags = t->flags;
+ t->flags &= ~MALLOC;
+ tmp = dupnode(t);
+ t->flags = saveflags;
+ t = tmp;
+ priv = 1;
+ }
+ text = t->stptr;
+ textlen = t->stlen;
+ buflen = textlen + 2;
+
+ s = force_string(tree_eval(s));
+ repl = s->stptr;
+ replend = repl + s->stlen;
+ repllen = replend - repl;
+ emalloc(buf, char *, buflen, "do_sub");
+ ampersands = 0;
+ for (scan = repl; scan < replend; scan++) {
+ if (*scan == '&') {
+ repllen--;
+ ampersands++;
+ } else if (*scan == '\\' && (*(scan+1) == '&' || *(scan+1) == '\\')) {
+ repllen--;
+ scan++;
+ }
+ }
+
+ bp = buf;
+ for (;;) {
+ matches++;
+ matchstart = t->stptr + RESTART(rp, t->stptr);
+ matchend = t->stptr + REEND(rp, t->stptr);
+
+ /*
+ * create the result, copying in parts of the original
+ * string
+ */
+ len = matchstart - text + repllen
+ + ampersands * (matchend - matchstart);
+ sofar = bp - buf;
+ while (buflen - sofar - len - 1 < 0) {
+ buflen *= 2;
+ erealloc(buf, char *, buflen, "do_sub");
+ bp = buf + sofar;
+ }
+ for (scan = text; scan < matchstart; scan++)
+ *bp++ = *scan;
+ for (scan = repl; scan < replend; scan++)
+ if (*scan == '&')
+ for (cp = matchstart; cp < matchend; cp++)
+ *bp++ = *cp;
+ else if (*scan == '\\' && (*(scan+1) == '&' || *(scan+1) == '\\')) {
+ scan++;
+ *bp++ = *scan;
+ } else
+ *bp++ = *scan;
+ if (global && matchstart == matchend && matchend < text + textlen) {
+ *bp++ = *matchend;
+ matchend++;
+ }
+ textlen = text + textlen - matchend;
+ text = matchend;
+ if (!global || textlen <= 0 ||
+ research(rp, t->stptr, text-t->stptr, textlen, 1) == -1)
+ break;
+ }
+ sofar = bp - buf;
+ if (buflen - sofar - textlen - 1) {
+ buflen = sofar + textlen + 2;
+ erealloc(buf, char *, buflen, "do_sub");
+ bp = buf + sofar;
+ }
+ for (scan = matchend; scan < text + textlen; scan++)
+ *bp++ = *scan;
+ textlen = bp - buf;
+ free(t->stptr);
+ t->stptr = buf;
+ t->stlen = textlen;
+
+ free_temp(s);
+ if (matches > 0 && lhs) {
+ if (priv) {
+ unref(*lhs);
+ *lhs = t;
+ }
+ if (after_assign)
+ (*after_assign)();
+ t->flags &= ~(NUM|NUMBER);
+ }
+ return tmp_number((AWKNUM) matches);
+}
+
+NODE *
+do_gsub(tree)
+NODE *tree;
+{
+ return sub_common(tree, 1);
+}
+
+NODE *
+do_sub(tree)
+NODE *tree;
+{
+ return sub_common(tree, 0);
+}
+
+#ifdef GFMT_WORKAROUND
+ /*
+ * printf's %g format [can't rely on gcvt()]
+ * caveat: don't use as argument to *printf()!
+ */
+char *
+gfmt(g, prec, buf)
+double g; /* value to format */
+int prec; /* indicates desired significant digits, not decimal places */
+char *buf; /* return buffer; assumed big enough to hold result */
+{
+ if (g == 0.0) {
+ (void) strcpy(buf, "0"); /* easy special case */
+ } else {
+ register char *d, *e, *p;
+
+ /* start with 'e' format (it'll provide nice exponent) */
+ if (prec < 1) prec = 1; /* at least 1 significant digit */
+ (void) sprintf(buf, "%.*e", prec - 1, g);
+ if ((e = strchr(buf, 'e')) != 0) { /* find exponent */
+ int exp = atoi(e+1); /* fetch exponent */
+ if (exp >= -4 && exp < prec) { /* per K&R2, B1.2 */
+ /* switch to 'f' format and re-do */
+ prec -= (exp + 1); /* decimal precision */
+ (void) sprintf(buf, "%.*f", prec, g);
+ e = buf + strlen(buf);
+ }
+ if ((d = strchr(buf, '.')) != 0) {
+ /* remove trailing zeroes and decimal point */
+ for (p = e; p > d && *--p == '0'; ) continue;
+ if (*p == '.') --p;
+ if (++p < e) /* copy exponent and NUL */
+ while ((*p++ = *e++) != '\0') continue;
+ }
+ }
+ }
+ return buf;
+}
+#endif /* GFMT_WORKAROUND */
diff --git a/gnu/usr.bin/awk/config.h b/gnu/usr.bin/awk/config.h
new file mode 100644
index 000000000000..8c20953ed531
--- /dev/null
+++ b/gnu/usr.bin/awk/config.h
@@ -0,0 +1,272 @@
+/*
+ * config.h -- configuration definitions for gawk.
+ *
+ * For generic 4.4 alpha
+ */
+
+/*
+ * Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file isolates configuration dependencies for gnu awk.
+ * You should know something about your system, perhaps by having
+ * a manual handy, when you edit this file. You should copy config.h-dist
+ * to config.h, and edit config.h. Do not modify config.h-dist, so that
+ * it will be easy to apply any patches that may be distributed.
+ *
+ * The general idea is that systems conforming to the various standards
+ * should need to do the least amount of changing. Definining the various
+ * items in ths file usually means that your system is missing that
+ * particular feature.
+ *
+ * The order of preference in standard conformance is ANSI C, POSIX,
+ * and the SVID.
+ *
+ * If you have no clue as to what's going on with your system, try
+ * compiling gawk without editing this file and see what shows up
+ * missing in the link stage. From there, you can probably figure out
+ * which defines to turn on.
+ */
+
+/**************************/
+/* Miscellanious features */
+/**************************/
+
+/*
+ * BLKSIZE_MISSING
+ *
+ * Check your /usr/include/sys/stat.h file. If the stat structure
+ * does not have a member named st_blksize, define this. (This will
+ * most likely be the case on most System V systems prior to V.4.)
+ */
+/* #define BLKSIZE_MISSING 1 */
+
+/*
+ * SIGTYPE
+ *
+ * The return type of the routines passed to the signal function.
+ * Modern systems use `void', older systems use `int'.
+ * If left undefined, it will default to void.
+ */
+/* #define SIGTYPE int */
+
+/*
+ * SIZE_T_MISSING
+ *
+ * If your system has no typedef for size_t, define this to get a default
+ */
+/* #define SIZE_T_MISSING 1 */
+
+/*
+ * CHAR_UNSIGNED
+ *
+ * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
+ * then define this for use in regex.c
+ */
+/* #define CHAR_UNSIGNED 1 */
+
+/*
+ * HAVE_UNDERSCORE_SETJMP
+ *
+ * Check in your /usr/include/setjmp.h file. If there are routines
+ * there named _setjmp and _longjmp, then you should define this.
+ * Typically only systems derived from Berkeley Unix have this.
+ */
+#define HAVE_UNDERSCORE_SETJMP 1
+
+/***********************************************/
+/* Missing library subroutines or system calls */
+/***********************************************/
+
+/*
+ * MEMCMP_MISSING
+ * MEMCPY_MISSING
+ * MEMSET_MISSING
+ *
+ * These three routines are for manipulating blocks of memory. Most
+ * likely they will either all three be present or all three be missing,
+ * so they're grouped together.
+ */
+/* #define MEMCMP_MISSING 1 */
+/* #define MEMCPY_MISSING 1 */
+/* #define MEMSET_MISSING 1 */
+
+/*
+ * RANDOM_MISSING
+ *
+ * Your system does not have the random(3) suite of random number
+ * generating routines. These are different than the old rand(3)
+ * routines!
+ */
+/* #define RANDOM_MISSING 1 */
+
+/*
+ * STRCASE_MISSING
+ *
+ * Your system does not have the strcasemp() and strncasecmp()
+ * routines that originated in Berkeley Unix.
+ */
+/* #define STRCASE_MISSING 1 */
+
+/*
+ * STRCHR_MISSING
+ *
+ * Your system does not have the strchr() and strrchr() functions.
+ */
+/* #define STRCHR_MISSING 1 */
+
+/*
+ * STRERROR_MISSING
+ *
+ * Your system lacks the ANSI C strerror() routine for returning the
+ * strings associated with errno values.
+ */
+/* #define STRERROR_MISSING 1 */
+
+/*
+ * STRTOD_MISSING
+ *
+ * Your system does not have the strtod() routine for converting
+ * strings to double precision floating point values.
+ */
+/* #define STRTOD_MISSING 1 */
+
+/*
+ * STRFTIME_MISSING
+ *
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
+ */
+/* #define STRFTIME_MISSING 1 */
+
+/*
+ * TZSET_MISSING
+ *
+ * If you have a 4.2 BSD vintage system, then the strftime() routine
+ * supplied in the missing directory won't be enough, because it relies on the
+ * tzset() routine from System V / Posix. Fortunately, there is an
+ * emulation for tzset() too that should do the trick. If you don't
+ * have tzset(), define this.
+ */
+/* #define TZSET_MISSING 1 */
+
+/*
+ * TZNAME_MISSING
+ *
+ * Some systems do not support the external variables tzname and daylight.
+ * If this is the case *and* strftime() is missing, define this.
+ */
+/* #define TZNAME_MISSING 1 */
+
+/*
+ * STDC_HEADERS
+ *
+ * If your system does have ANSI compliant header files that
+ * provide prototypes for library routines, then define this.
+ */
+#define STDC_HEADERS 1
+
+/*
+ * NO_TOKEN_PASTING
+ *
+ * If your compiler define's __STDC__ but does not support token
+ * pasting (tok##tok), then define this.
+ */
+/* #define NO_TOKEN_PASTING 1 */
+
+/*****************************************************************/
+/* Stuff related to the Standard I/O Library. */
+/*****************************************************************/
+/* Much of this is (still, unfortunately) black magic in nature. */
+/* You may have to use some or all of these together to get gawk */
+/* to work correctly. */
+/*****************************************************************/
+
+/*
+ * NON_STD_SPRINTF
+ *
+ * Look in your /usr/include/stdio.h file. If the return type of the
+ * sprintf() function is NOT `int', define this.
+ */
+/* #define NON_STD_SPRINTF 1 */
+
+/*
+ * VPRINTF_MISSING
+ *
+ * Define this if your system lacks vprintf() and the other routines
+ * that go with it. This will trigger an attempt to use _doprnt().
+ * If you don't have that, this attempt will fail and you are on your own.
+ */
+/* #define VPRINTF_MISSING 1 */
+
+/*
+ * Casts from size_t to int and back. These will become unnecessary
+ * at some point in the future, but for now are required where the
+ * two types are a different representation.
+ */
+/* #define SZTC */
+/* #define INTC */
+
+/*
+ * SYSTEM_MISSING
+ *
+ * Define this if your library does not provide a system function
+ * or you are not entirely happy with it and would rather use
+ * a provided replacement (atari only).
+ */
+/* #define SYSTEM_MISSING 1 */
+
+/*
+ * FMOD_MISSING
+ *
+ * Define this if your system lacks the fmod() function and modf() will
+ * be used instead.
+ */
+/* #define FMOD_MISSING 1 */
+
+
+/*******************************/
+/* Gawk configuration options. */
+/*******************************/
+
+/*
+ * DEFPATH
+ *
+ * The default search path for the -f option of gawk. It is used
+ * if the AWKPATH environment variable is undefined. The default
+ * definition is provided here. Most likely you should not change
+ * this.
+ */
+
+/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
+/* #define ENVSEP ':' */
+
+/*
+ * alloca already has a prototype defined - don't redefine it
+ */
+#define ALLOCA_PROTO 1
+
+/*
+ * srandom already has a prototype defined - don't redefine it
+ */
+#define SRANDOM_PROTO 1
+
+/* anything that follows is for system-specific short-term kludges */
diff --git a/gnu/usr.bin/awk/dfa.c b/gnu/usr.bin/awk/dfa.c
new file mode 100644
index 000000000000..5293c755871d
--- /dev/null
+++ b/gnu/usr.bin/awk/dfa.c
@@ -0,0 +1,2291 @@
+/* dfa.c - determinisitic extended regexp routines for GNU
+ Copyright (C) 1988 Free Software Foundation, Inc.
+ Written June, 1988 by Mike Haertel
+ Modified July, 1988 by Arthur David Olson
+ to assist BMG speedups
+
+ NO WARRANTY
+
+ BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
+NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
+WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
+RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
+WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
+AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
+CORRECTION.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
+STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
+WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
+LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
+OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
+DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
+A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
+PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
+
+ GENERAL PUBLIC LICENSE TO COPY
+
+ 1. You may copy and distribute verbatim copies of this source file
+as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy a valid copyright notice "Copyright
+ (C) 1988 Free Software Foundation, Inc."; and include following the
+copyright notice a verbatim copy of the above disclaimer of warranty
+and of this License. You may charge a distribution fee for the
+physical act of transferring a copy.
+
+ 2. You may modify your copy or copies of this source file or
+any portion of it, and copy and distribute such modifications under
+the terms of Paragraph 1 above, provided that you also do the following:
+
+ a) cause the modified files to carry prominent notices stating
+ that you changed the files and the date of any change; and
+
+ b) cause the whole of any work that you distribute or publish,
+ that in whole or in part contains or is a derivative of this
+ program or any part thereof, to be licensed at no charge to all
+ third parties on terms identical to those contained in this
+ License Agreement (except that you may choose to grant more extensive
+ warranty protection to some or all third parties, at your option).
+
+ c) You may charge a distribution fee for the physical act of
+ transferring a copy, and you may at your option offer warranty
+ protection in exchange for a fee.
+
+Mere aggregation of another unrelated program with this program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other program under the scope of these terms.
+
+ 3. You may copy and distribute this program or any portion of it in
+compiled, executable or object code form under the terms of Paragraphs
+1 and 2 above provided that you do the following:
+
+ a) accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ b) accompany it with a written offer, valid for at least three
+ years, to give any third party free (except for a nominal
+ shipping charge) a complete machine-readable copy of the
+ corresponding source code, to be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ c) accompany it with the information you received as to where the
+ corresponding source code may be obtained. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form alone.)
+
+For an executable file, complete source code means all the source code for
+all modules it contains; but, as a special exception, it need not include
+source code for modules which are standard libraries that accompany the
+operating system on which the executable file runs.
+
+ 4. You may not copy, sublicense, distribute or transfer this program
+except as expressly provided under this License Agreement. Any attempt
+otherwise to copy, sublicense, distribute or transfer this program is void and
+your rights to use the program under this License agreement shall be
+automatically terminated. However, parties who have received computer
+software programs from you with this License Agreement will not have
+their licenses terminated so long as such parties remain in full compliance.
+
+ 5. If you wish to incorporate parts of this program into other free
+programs whose distribution conditions are different, write to the Free
+Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
+worked out a simple rule that can be stated here, but we will often permit
+this. We will be guided by the two goals of preserving the free status of
+all derivatives our free software and of promoting the sharing and reuse of
+software.
+
+
+In other words, you are welcome to use, share and improve this program.
+You are forbidden to forbid anyone else to use, share and improve
+what you give them. Help stamp out software-hoarding! */
+
+#include "awk.h"
+#include <assert.h>
+
+#ifdef setbit /* surprise - setbit and clrbit are macros on NeXT */
+#undef setbit
+#endif
+#ifdef clrbit
+#undef clrbit
+#endif
+
+#ifdef __STDC__
+typedef void *ptr_t;
+#else
+typedef char *ptr_t;
+#endif
+
+typedef struct {
+ char ** in;
+ char * left;
+ char * right;
+ char * is;
+} must;
+
+static ptr_t xcalloc P((int n, size_t s));
+static ptr_t xmalloc P((size_t n));
+static ptr_t xrealloc P((ptr_t p, size_t n));
+static int tstbit P((int b, _charset c));
+static void setbit P((int b, _charset c));
+static void clrbit P((int b, _charset c));
+static void copyset P((const _charset src, _charset dst));
+static void zeroset P((_charset s));
+static void notset P((_charset s));
+static int equal P((const _charset s1, const _charset s2));
+static int charset_index P((const _charset s));
+static _token lex P((void));
+static void addtok P((_token t));
+static void atom P((void));
+static void closure P((void));
+static void branch P((void));
+static void regexp P((void));
+static void copy P((const _position_set *src, _position_set *dst));
+static void insert P((_position p, _position_set *s));
+static void merge P((_position_set *s1, _position_set *s2, _position_set *m));
+static void delete P((_position p, _position_set *s));
+static int state_index P((struct regexp *r, _position_set *s,
+ int newline, int letter));
+static void epsclosure P((_position_set *s, struct regexp *r));
+static void build_state P((int s, struct regexp *r));
+static void build_state_zero P((struct regexp *r));
+static char *icatalloc P((char *old, const char *new));
+static char *icpyalloc P((const char *string));
+static char *istrstr P((char *lookin, char *lookfor));
+static void ifree P((char *cp));
+static void freelist P((char **cpp));
+static char **enlist P((char **cpp, char *new, size_t len));
+static char **comsubs P((char *left, char *right));
+static char **addlists P((char **old, char **new));
+static char **inboth P((char **left, char **right));
+static void resetmust P((must *mp));
+static void regmust P((struct regexp *r));
+
+#undef P
+
+static ptr_t
+xcalloc(n, s)
+ int n;
+ size_t s;
+{
+ ptr_t r = calloc(n, s);
+
+ if (NULL == r)
+ reg_error("Memory exhausted"); /* reg_error does not return */
+ return r;
+}
+
+static ptr_t
+xmalloc(n)
+ size_t n;
+{
+ ptr_t r = malloc(n);
+
+ assert(n != 0);
+ if (NULL == r)
+ reg_error("Memory exhausted");
+ return r;
+}
+
+static ptr_t
+xrealloc(p, n)
+ ptr_t p;
+ size_t n;
+{
+ ptr_t r = realloc(p, n);
+
+ assert(n != 0);
+ if (NULL == r)
+ reg_error("Memory exhausted");
+ return r;
+}
+
+#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t)))
+#undef MALLOC
+#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
+#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
+
+/* Reallocate an array of type t if nalloc is too small for index. */
+#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
+ if ((index) >= (nalloc)) \
+ { \
+ while ((index) >= (nalloc)) \
+ (nalloc) *= 2; \
+ REALLOC(p, t, nalloc); \
+ }
+
+/* Stuff pertaining to charsets. */
+
+static int
+tstbit(b, c)
+ int b;
+ _charset c;
+{
+ return c[b / INTBITS] & 1 << b % INTBITS;
+}
+
+static void
+setbit(b, c)
+ int b;
+ _charset c;
+{
+ c[b / INTBITS] |= 1 << b % INTBITS;
+}
+
+static void
+clrbit(b, c)
+ int b;
+ _charset c;
+{
+ c[b / INTBITS] &= ~(1 << b % INTBITS);
+}
+
+static void
+copyset(src, dst)
+ const _charset src;
+ _charset dst;
+{
+ int i;
+
+ for (i = 0; i < _CHARSET_INTS; ++i)
+ dst[i] = src[i];
+}
+
+static void
+zeroset(s)
+ _charset s;
+{
+ int i;
+
+ for (i = 0; i < _CHARSET_INTS; ++i)
+ s[i] = 0;
+}
+
+static void
+notset(s)
+ _charset s;
+{
+ int i;
+
+ for (i = 0; i < _CHARSET_INTS; ++i)
+ s[i] = ~s[i];
+}
+
+static int
+equal(s1, s2)
+ const _charset s1;
+ const _charset s2;
+{
+ int i;
+
+ for (i = 0; i < _CHARSET_INTS; ++i)
+ if (s1[i] != s2[i])
+ return 0;
+ return 1;
+}
+
+/* A pointer to the current regexp is kept here during parsing. */
+static struct regexp *reg;
+
+/* Find the index of charset s in reg->charsets, or allocate a new charset. */
+static int
+charset_index(s)
+ const _charset s;
+{
+ int i;
+
+ for (i = 0; i < reg->cindex; ++i)
+ if (equal(s, reg->charsets[i]))
+ return i;
+ REALLOC_IF_NECESSARY(reg->charsets, _charset, reg->calloc, reg->cindex);
+ ++reg->cindex;
+ copyset(s, reg->charsets[i]);
+ return i;
+}
+
+/* Syntax bits controlling the behavior of the lexical analyzer. */
+static syntax_bits, syntax_bits_set;
+
+/* Flag for case-folding letters into sets. */
+static case_fold;
+
+/* Entry point to set syntax options. */
+void
+regsyntax(bits, fold)
+ long bits;
+ int fold;
+{
+ syntax_bits_set = 1;
+ syntax_bits = bits;
+ case_fold = fold;
+}
+
+/* Lexical analyzer. */
+static const char *lexstart; /* Pointer to beginning of input string. */
+static const char *lexptr; /* Pointer to next input character. */
+static lexleft; /* Number of characters remaining. */
+static caret_allowed; /* True if backward context allows ^
+ (meaningful only if RE_CONTEXT_INDEP_OPS
+ is turned off). */
+static closure_allowed; /* True if backward context allows closures
+ (meaningful only if RE_CONTEXT_INDEP_OPS
+ is turned off). */
+
+/* Note that characters become unsigned here. */
+#define FETCH(c, eoferr) \
+ { \
+ if (! lexleft) \
+ if (eoferr != NULL) \
+ reg_error(eoferr); \
+ else \
+ return _END; \
+ (c) = (unsigned char) *lexptr++; \
+ --lexleft; \
+ }
+
+static _token
+lex()
+{
+ _token c, c2;
+ int invert;
+ _charset cset;
+
+ FETCH(c, (char *) 0);
+ switch (c)
+ {
+ case '^':
+ if (! (syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && (!caret_allowed ||
+ ((syntax_bits & RE_TIGHT_VBAR) && lexptr - 1 != lexstart)))
+ goto normal_char;
+ caret_allowed = 0;
+ return syntax_bits & RE_TIGHT_VBAR ? _ALLBEGLINE : _BEGLINE;
+
+ case '$':
+ if (syntax_bits & RE_CONTEXT_INDEP_OPS || !lexleft
+ || (! (syntax_bits & RE_TIGHT_VBAR)
+ && ((syntax_bits & RE_NO_BK_PARENS
+ ? lexleft > 0 && *lexptr == ')'
+ : lexleft > 1 && *lexptr == '\\' && lexptr[1] == ')')
+ || (syntax_bits & RE_NO_BK_VBAR
+ ? lexleft > 0 && *lexptr == '|'
+ : lexleft > 1 && *lexptr == '\\' && lexptr[1] == '|'))))
+ return syntax_bits & RE_TIGHT_VBAR ? _ALLENDLINE : _ENDLINE;
+ goto normal_char;
+
+ case '\\':
+ FETCH(c, "Unfinished \\ quote");
+ switch (c)
+ {
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _BACKREF;
+
+ case '<':
+ caret_allowed = 0;
+ return _BEGWORD;
+
+ case '>':
+ caret_allowed = 0;
+ return _ENDWORD;
+
+ case 'b':
+ caret_allowed = 0;
+ return _LIMWORD;
+
+ case 'B':
+ caret_allowed = 0;
+ return _NOTLIMWORD;
+
+ case 'w':
+ case 'W':
+ zeroset(cset);
+ for (c2 = 0; c2 < _NOTCHAR; ++c2)
+ if (ISALNUM(c2))
+ setbit(c2, cset);
+ if (c == 'W')
+ notset(cset);
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _SET + charset_index(cset);
+
+ case '?':
+ if (syntax_bits & RE_BK_PLUS_QM)
+ goto qmark;
+ goto normal_char;
+
+ case '+':
+ if (syntax_bits & RE_BK_PLUS_QM)
+ goto plus;
+ goto normal_char;
+
+ case '|':
+ if (! (syntax_bits & RE_NO_BK_VBAR))
+ goto or;
+ goto normal_char;
+
+ case '(':
+ if (! (syntax_bits & RE_NO_BK_PARENS))
+ goto lparen;
+ goto normal_char;
+
+ case ')':
+ if (! (syntax_bits & RE_NO_BK_PARENS))
+ goto rparen;
+ goto normal_char;
+
+ default:
+ goto normal_char;
+ }
+
+ case '?':
+ if (syntax_bits & RE_BK_PLUS_QM)
+ goto normal_char;
+ qmark:
+ if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
+ goto normal_char;
+ return _QMARK;
+
+ case '*':
+ if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
+ goto normal_char;
+ return _STAR;
+
+ case '+':
+ if (syntax_bits & RE_BK_PLUS_QM)
+ goto normal_char;
+ plus:
+ if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
+ goto normal_char;
+ return _PLUS;
+
+ case '|':
+ if (! (syntax_bits & RE_NO_BK_VBAR))
+ goto normal_char;
+ or:
+ caret_allowed = 1;
+ closure_allowed = 0;
+ return _OR;
+
+ case '\n':
+ if (! (syntax_bits & RE_NEWLINE_OR))
+ goto normal_char;
+ goto or;
+
+ case '(':
+ if (! (syntax_bits & RE_NO_BK_PARENS))
+ goto normal_char;
+ lparen:
+ caret_allowed = 1;
+ closure_allowed = 0;
+ return _LPAREN;
+
+ case ')':
+ if (! (syntax_bits & RE_NO_BK_PARENS))
+ goto normal_char;
+ rparen:
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _RPAREN;
+
+ case '.':
+ zeroset(cset);
+ notset(cset);
+ clrbit('\n', cset);
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _SET + charset_index(cset);
+
+ case '[':
+ zeroset(cset);
+ FETCH(c, "Unbalanced [");
+ if (c == '^')
+ {
+ FETCH(c, "Unbalanced [");
+ invert = 1;
+ }
+ else
+ invert = 0;
+ do
+ {
+ FETCH(c2, "Unbalanced [");
+ if ((syntax_bits & RE_AWK_CLASS_HACK) && c == '\\')
+ {
+ c = c2;
+ FETCH(c2, "Unbalanced [");
+ }
+ if (c2 == '-')
+ {
+ FETCH(c2, "Unbalanced [");
+ if (c2 == ']' && (syntax_bits & RE_AWK_CLASS_HACK))
+ {
+ setbit(c, cset);
+ setbit('-', cset);
+ break;
+ }
+ while (c <= c2)
+ setbit(c++, cset);
+ FETCH(c, "Unbalanced [");
+ }
+ else
+ {
+ setbit(c, cset);
+ c = c2;
+ }
+ }
+ while (c != ']');
+ if (invert)
+ notset(cset);
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _SET + charset_index(cset);
+
+ default:
+ normal_char:
+ caret_allowed = 0;
+ closure_allowed = 1;
+ if (case_fold && ISALPHA(c))
+ {
+ zeroset(cset);
+ if (isupper(c))
+ c = tolower(c);
+ setbit(c, cset);
+ setbit(toupper(c), cset);
+ return _SET + charset_index(cset);
+ }
+ return c;
+ }
+}
+
+/* Recursive descent parser for regular expressions. */
+
+static _token tok; /* Lookahead token. */
+static depth; /* Current depth of a hypothetical stack
+ holding deferred productions. This is
+ used to determine the depth that will be
+ required of the real stack later on in
+ reganalyze(). */
+
+/* Add the given token to the parse tree, maintaining the depth count and
+ updating the maximum depth if necessary. */
+static void
+addtok(t)
+ _token t;
+{
+ REALLOC_IF_NECESSARY(reg->tokens, _token, reg->talloc, reg->tindex);
+ reg->tokens[reg->tindex++] = t;
+
+ switch (t)
+ {
+ case _QMARK:
+ case _STAR:
+ case _PLUS:
+ break;
+
+ case _CAT:
+ case _OR:
+ --depth;
+ break;
+
+ default:
+ ++reg->nleaves;
+ case _EMPTY:
+ ++depth;
+ break;
+ }
+ if (depth > reg->depth)
+ reg->depth = depth;
+}
+
+/* The grammar understood by the parser is as follows.
+
+ start:
+ regexp
+ _ALLBEGLINE regexp
+ regexp _ALLENDLINE
+ _ALLBEGLINE regexp _ALLENDLINE
+
+ regexp:
+ regexp _OR branch
+ branch
+
+ branch:
+ branch closure
+ closure
+
+ closure:
+ closure _QMARK
+ closure _STAR
+ closure _PLUS
+ atom
+
+ atom:
+ <normal character>
+ _SET
+ _BACKREF
+ _BEGLINE
+ _ENDLINE
+ _BEGWORD
+ _ENDWORD
+ _LIMWORD
+ _NOTLIMWORD
+ <empty>
+
+ The parser builds a parse tree in postfix form in an array of tokens. */
+
+#ifdef __STDC__
+static void regexp(void);
+#else
+static void regexp();
+#endif
+
+static void
+atom()
+{
+ if (tok >= 0 && (tok < _NOTCHAR || tok >= _SET || tok == _BACKREF
+ || tok == _BEGLINE || tok == _ENDLINE || tok == _BEGWORD
+ || tok == _ENDWORD || tok == _LIMWORD || tok == _NOTLIMWORD))
+ {
+ addtok(tok);
+ tok = lex();
+ }
+ else if (tok == _LPAREN)
+ {
+ tok = lex();
+ regexp();
+ if (tok != _RPAREN)
+ reg_error("Unbalanced (");
+ tok = lex();
+ }
+ else
+ addtok(_EMPTY);
+}
+
+static void
+closure()
+{
+ atom();
+ while (tok == _QMARK || tok == _STAR || tok == _PLUS)
+ {
+ addtok(tok);
+ tok = lex();
+ }
+}
+
+static void
+branch()
+{
+ closure();
+ while (tok != _RPAREN && tok != _OR && tok != _ALLENDLINE && tok >= 0)
+ {
+ closure();
+ addtok(_CAT);
+ }
+}
+
+static void
+regexp()
+{
+ branch();
+ while (tok == _OR)
+ {
+ tok = lex();
+ branch();
+ addtok(_OR);
+ }
+}
+
+/* Main entry point for the parser. S is a string to be parsed, len is the
+ length of the string, so s can include NUL characters. R is a pointer to
+ the struct regexp to parse into. */
+void
+regparse(s, len, r)
+ const char *s;
+ size_t len;
+ struct regexp *r;
+{
+ reg = r;
+ lexstart = lexptr = s;
+ lexleft = len;
+ caret_allowed = 1;
+ closure_allowed = 0;
+
+ if (! syntax_bits_set)
+ reg_error("No syntax specified");
+
+ tok = lex();
+ depth = r->depth;
+
+ if (tok == _ALLBEGLINE)
+ {
+ addtok(_BEGLINE);
+ tok = lex();
+ regexp();
+ addtok(_CAT);
+ }
+ else
+ regexp();
+
+ if (tok == _ALLENDLINE)
+ {
+ addtok(_ENDLINE);
+ addtok(_CAT);
+ tok = lex();
+ }
+
+ if (tok != _END)
+ reg_error("Unbalanced )");
+
+ addtok(_END - r->nregexps);
+ addtok(_CAT);
+
+ if (r->nregexps)
+ addtok(_OR);
+
+ ++r->nregexps;
+}
+
+/* Some primitives for operating on sets of positions. */
+
+/* Copy one set to another; the destination must be large enough. */
+static void
+copy(src, dst)
+ const _position_set *src;
+ _position_set *dst;
+{
+ int i;
+
+ for (i = 0; i < src->nelem; ++i)
+ dst->elems[i] = src->elems[i];
+ dst->nelem = src->nelem;
+}
+
+/* Insert a position in a set. Position sets are maintained in sorted
+ order according to index. If position already exists in the set with
+ the same index then their constraints are logically or'd together.
+ S->elems must point to an array large enough to hold the resulting set. */
+static void
+insert(p, s)
+ _position p;
+ _position_set *s;
+{
+ int i;
+ _position t1, t2;
+
+ for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
+ ;
+ if (i < s->nelem && p.index == s->elems[i].index)
+ s->elems[i].constraint |= p.constraint;
+ else
+ {
+ t1 = p;
+ ++s->nelem;
+ while (i < s->nelem)
+ {
+ t2 = s->elems[i];
+ s->elems[i++] = t1;
+ t1 = t2;
+ }
+ }
+}
+
+/* Merge two sets of positions into a third. The result is exactly as if
+ the positions of both sets were inserted into an initially empty set. */
+static void
+merge(s1, s2, m)
+ _position_set *s1;
+ _position_set *s2;
+ _position_set *m;
+{
+ int i = 0, j = 0;
+
+ m->nelem = 0;
+ while (i < s1->nelem && j < s2->nelem)
+ if (s1->elems[i].index > s2->elems[j].index)
+ m->elems[m->nelem++] = s1->elems[i++];
+ else if (s1->elems[i].index < s2->elems[j].index)
+ m->elems[m->nelem++] = s2->elems[j++];
+ else
+ {
+ m->elems[m->nelem] = s1->elems[i++];
+ m->elems[m->nelem++].constraint |= s2->elems[j++].constraint;
+ }
+ while (i < s1->nelem)
+ m->elems[m->nelem++] = s1->elems[i++];
+ while (j < s2->nelem)
+ m->elems[m->nelem++] = s2->elems[j++];
+}
+
+/* Delete a position from a set. */
+static void
+delete(p, s)
+ _position p;
+ _position_set *s;
+{
+ int i;
+
+ for (i = 0; i < s->nelem; ++i)
+ if (p.index == s->elems[i].index)
+ break;
+ if (i < s->nelem)
+ for (--s->nelem; i < s->nelem; ++i)
+ s->elems[i] = s->elems[i + 1];
+}
+
+/* Find the index of the state corresponding to the given position set with
+ the given preceding context, or create a new state if there is no such
+ state. Newline and letter tell whether we got here on a newline or
+ letter, respectively. */
+static int
+state_index(r, s, newline, letter)
+ struct regexp *r;
+ _position_set *s;
+ int newline;
+ int letter;
+{
+ int lhash = 0;
+ int constraint;
+ int i, j;
+
+ newline = newline ? 1 : 0;
+ letter = letter ? 1 : 0;
+
+ for (i = 0; i < s->nelem; ++i)
+ lhash ^= s->elems[i].index + s->elems[i].constraint;
+
+ /* Try to find a state that exactly matches the proposed one. */
+ for (i = 0; i < r->sindex; ++i)
+ {
+ if (lhash != r->states[i].hash || s->nelem != r->states[i].elems.nelem
+ || newline != r->states[i].newline || letter != r->states[i].letter)
+ continue;
+ for (j = 0; j < s->nelem; ++j)
+ if (s->elems[j].constraint
+ != r->states[i].elems.elems[j].constraint
+ || s->elems[j].index != r->states[i].elems.elems[j].index)
+ break;
+ if (j == s->nelem)
+ return i;
+ }
+
+ /* We'll have to create a new state. */
+ REALLOC_IF_NECESSARY(r->states, _dfa_state, r->salloc, r->sindex);
+ r->states[i].hash = lhash;
+ MALLOC(r->states[i].elems.elems, _position, s->nelem);
+ copy(s, &r->states[i].elems);
+ r->states[i].newline = newline;
+ r->states[i].letter = letter;
+ r->states[i].backref = 0;
+ r->states[i].constraint = 0;
+ r->states[i].first_end = 0;
+ for (j = 0; j < s->nelem; ++j)
+ if (r->tokens[s->elems[j].index] < 0)
+ {
+ constraint = s->elems[j].constraint;
+ if (_SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
+ || _SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
+ || _SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
+ || _SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))
+ r->states[i].constraint |= constraint;
+ if (! r->states[i].first_end)
+ r->states[i].first_end = r->tokens[s->elems[j].index];
+ }
+ else if (r->tokens[s->elems[j].index] == _BACKREF)
+ {
+ r->states[i].constraint = _NO_CONSTRAINT;
+ r->states[i].backref = 1;
+ }
+
+ ++r->sindex;
+
+ return i;
+}
+
+/* Find the epsilon closure of a set of positions. If any position of the set
+ contains a symbol that matches the empty string in some context, replace
+ that position with the elements of its follow labeled with an appropriate
+ constraint. Repeat exhaustively until no funny positions are left.
+ S->elems must be large enough to hold the result. */
+static void
+epsclosure(s, r)
+ _position_set *s;
+ struct regexp *r;
+{
+ int i, j;
+ int *visited;
+ _position p, old;
+
+ MALLOC(visited, int, r->tindex);
+ for (i = 0; i < r->tindex; ++i)
+ visited[i] = 0;
+
+ for (i = 0; i < s->nelem; ++i)
+ if (r->tokens[s->elems[i].index] >= _NOTCHAR
+ && r->tokens[s->elems[i].index] != _BACKREF
+ && r->tokens[s->elems[i].index] < _SET)
+ {
+ old = s->elems[i];
+ p.constraint = old.constraint;
+ delete(s->elems[i], s);
+ if (visited[old.index])
+ {
+ --i;
+ continue;
+ }
+ visited[old.index] = 1;
+ switch (r->tokens[old.index])
+ {
+ case _BEGLINE:
+ p.constraint &= _BEGLINE_CONSTRAINT;
+ break;
+ case _ENDLINE:
+ p.constraint &= _ENDLINE_CONSTRAINT;
+ break;
+ case _BEGWORD:
+ p.constraint &= _BEGWORD_CONSTRAINT;
+ break;
+ case _ENDWORD:
+ p.constraint &= _ENDWORD_CONSTRAINT;
+ break;
+ case _LIMWORD:
+ p.constraint &= _ENDWORD_CONSTRAINT;
+ break;
+ case _NOTLIMWORD:
+ p.constraint &= _NOTLIMWORD_CONSTRAINT;
+ break;
+ default:
+ break;
+ }
+ for (j = 0; j < r->follows[old.index].nelem; ++j)
+ {
+ p.index = r->follows[old.index].elems[j].index;
+ insert(p, s);
+ }
+ /* Force rescan to start at the beginning. */
+ i = -1;
+ }
+
+ free(visited);
+}
+
+/* Perform bottom-up analysis on the parse tree, computing various functions.
+ Note that at this point, we're pretending constructs like \< are real
+ characters rather than constraints on what can follow them.
+
+ Nullable: A node is nullable if it is at the root of a regexp that can
+ match the empty string.
+ * _EMPTY leaves are nullable.
+ * No other leaf is nullable.
+ * A _QMARK or _STAR node is nullable.
+ * A _PLUS node is nullable if its argument is nullable.
+ * A _CAT node is nullable if both its arguments are nullable.
+ * An _OR node is nullable if either argument is nullable.
+
+ Firstpos: The firstpos of a node is the set of positions (nonempty leaves)
+ that could correspond to the first character of a string matching the
+ regexp rooted at the given node.
+ * _EMPTY leaves have empty firstpos.
+ * The firstpos of a nonempty leaf is that leaf itself.
+ * The firstpos of a _QMARK, _STAR, or _PLUS node is the firstpos of its
+ argument.
+ * The firstpos of a _CAT node is the firstpos of the left argument, union
+ the firstpos of the right if the left argument is nullable.
+ * The firstpos of an _OR node is the union of firstpos of each argument.
+
+ Lastpos: The lastpos of a node is the set of positions that could
+ correspond to the last character of a string matching the regexp at
+ the given node.
+ * _EMPTY leaves have empty lastpos.
+ * The lastpos of a nonempty leaf is that leaf itself.
+ * The lastpos of a _QMARK, _STAR, or _PLUS node is the lastpos of its
+ argument.
+ * The lastpos of a _CAT node is the lastpos of its right argument, union
+ the lastpos of the left if the right argument is nullable.
+ * The lastpos of an _OR node is the union of the lastpos of each argument.
+
+ Follow: The follow of a position is the set of positions that could
+ correspond to the character following a character matching the node in
+ a string matching the regexp. At this point we consider special symbols
+ that match the empty string in some context to be just normal characters.
+ Later, if we find that a special symbol is in a follow set, we will
+ replace it with the elements of its follow, labeled with an appropriate
+ constraint.
+ * Every node in the firstpos of the argument of a _STAR or _PLUS node is in
+ the follow of every node in the lastpos.
+ * Every node in the firstpos of the second argument of a _CAT node is in
+ the follow of every node in the lastpos of the first argument.
+
+ Because of the postfix representation of the parse tree, the depth-first
+ analysis is conveniently done by a linear scan with the aid of a stack.
+ Sets are stored as arrays of the elements, obeying a stack-like allocation
+ scheme; the number of elements in each set deeper in the stack can be
+ used to determine the address of a particular set's array. */
+void
+reganalyze(r, searchflag)
+ struct regexp *r;
+ int searchflag;
+{
+ int *nullable; /* Nullable stack. */
+ int *nfirstpos; /* Element count stack for firstpos sets. */
+ _position *firstpos; /* Array where firstpos elements are stored. */
+ int *nlastpos; /* Element count stack for lastpos sets. */
+ _position *lastpos; /* Array where lastpos elements are stored. */
+ int *nalloc; /* Sizes of arrays allocated to follow sets. */
+ _position_set tmp; /* Temporary set for merging sets. */
+ _position_set merged; /* Result of merging sets. */
+ int wants_newline; /* True if some position wants newline info. */
+ int *o_nullable;
+ int *o_nfirst, *o_nlast;
+ _position *o_firstpos, *o_lastpos;
+ int i, j;
+ _position *pos;
+
+ r->searchflag = searchflag;
+
+ MALLOC(nullable, int, r->depth);
+ o_nullable = nullable;
+ MALLOC(nfirstpos, int, r->depth);
+ o_nfirst = nfirstpos;
+ MALLOC(firstpos, _position, r->nleaves);
+ o_firstpos = firstpos, firstpos += r->nleaves;
+ MALLOC(nlastpos, int, r->depth);
+ o_nlast = nlastpos;
+ MALLOC(lastpos, _position, r->nleaves);
+ o_lastpos = lastpos, lastpos += r->nleaves;
+ MALLOC(nalloc, int, r->tindex);
+ for (i = 0; i < r->tindex; ++i)
+ nalloc[i] = 0;
+ MALLOC(merged.elems, _position, r->nleaves);
+
+ CALLOC(r->follows, _position_set, r->tindex);
+
+ for (i = 0; i < r->tindex; ++i)
+ switch (r->tokens[i])
+ {
+ case _EMPTY:
+ /* The empty set is nullable. */
+ *nullable++ = 1;
+
+ /* The firstpos and lastpos of the empty leaf are both empty. */
+ *nfirstpos++ = *nlastpos++ = 0;
+ break;
+
+ case _STAR:
+ case _PLUS:
+ /* Every element in the firstpos of the argument is in the follow
+ of every element in the lastpos. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos;
+ for (j = 0; j < nlastpos[-1]; ++j)
+ {
+ merge(&tmp, &r->follows[pos[j].index], &merged);
+ REALLOC_IF_NECESSARY(r->follows[pos[j].index].elems, _position,
+ nalloc[pos[j].index], merged.nelem - 1);
+ copy(&merged, &r->follows[pos[j].index]);
+ }
+
+ case _QMARK:
+ /* A _QMARK or _STAR node is automatically nullable. */
+ if (r->tokens[i] != _PLUS)
+ nullable[-1] = 1;
+ break;
+
+ case _CAT:
+ /* Every element in the firstpos of the second argument is in the
+ follow of every element in the lastpos of the first argument. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos + nlastpos[-1];
+ for (j = 0; j < nlastpos[-2]; ++j)
+ {
+ merge(&tmp, &r->follows[pos[j].index], &merged);
+ REALLOC_IF_NECESSARY(r->follows[pos[j].index].elems, _position,
+ nalloc[pos[j].index], merged.nelem - 1);
+ copy(&merged, &r->follows[pos[j].index]);
+ }
+
+ /* The firstpos of a _CAT node is the firstpos of the first argument,
+ union that of the second argument if the first is nullable. */
+ if (nullable[-2])
+ nfirstpos[-2] += nfirstpos[-1];
+ else
+ firstpos += nfirstpos[-1];
+ --nfirstpos;
+
+ /* The lastpos of a _CAT node is the lastpos of the second argument,
+ union that of the first argument if the second is nullable. */
+ if (nullable[-1])
+ nlastpos[-2] += nlastpos[-1];
+ else
+ {
+ pos = lastpos + nlastpos[-2];
+ for (j = nlastpos[-1] - 1; j >= 0; --j)
+ pos[j] = lastpos[j];
+ lastpos += nlastpos[-2];
+ nlastpos[-2] = nlastpos[-1];
+ }
+ --nlastpos;
+
+ /* A _CAT node is nullable if both arguments are nullable. */
+ nullable[-2] = nullable[-1] && nullable[-2];
+ --nullable;
+ break;
+
+ case _OR:
+ /* The firstpos is the union of the firstpos of each argument. */
+ nfirstpos[-2] += nfirstpos[-1];
+ --nfirstpos;
+
+ /* The lastpos is the union of the lastpos of each argument. */
+ nlastpos[-2] += nlastpos[-1];
+ --nlastpos;
+
+ /* An _OR node is nullable if either argument is nullable. */
+ nullable[-2] = nullable[-1] || nullable[-2];
+ --nullable;
+ break;
+
+ default:
+ /* Anything else is a nonempty position. (Note that special
+ constructs like \< are treated as nonempty strings here;
+ an "epsilon closure" effectively makes them nullable later.
+ Backreferences have to get a real position so we can detect
+ transitions on them later. But they are nullable. */
+ *nullable++ = r->tokens[i] == _BACKREF;
+
+ /* This position is in its own firstpos and lastpos. */
+ *nfirstpos++ = *nlastpos++ = 1;
+ --firstpos, --lastpos;
+ firstpos->index = lastpos->index = i;
+ firstpos->constraint = lastpos->constraint = _NO_CONSTRAINT;
+
+ /* Allocate the follow set for this position. */
+ nalloc[i] = 1;
+ MALLOC(r->follows[i].elems, _position, nalloc[i]);
+ break;
+ }
+
+ /* For each follow set that is the follow set of a real position, replace
+ it with its epsilon closure. */
+ for (i = 0; i < r->tindex; ++i)
+ if (r->tokens[i] < _NOTCHAR || r->tokens[i] == _BACKREF
+ || r->tokens[i] >= _SET)
+ {
+ copy(&r->follows[i], &merged);
+ epsclosure(&merged, r);
+ if (r->follows[i].nelem < merged.nelem)
+ REALLOC(r->follows[i].elems, _position, merged.nelem);
+ copy(&merged, &r->follows[i]);
+ }
+
+ /* Get the epsilon closure of the firstpos of the regexp. The result will
+ be the set of positions of state 0. */
+ merged.nelem = 0;
+ for (i = 0; i < nfirstpos[-1]; ++i)
+ insert(firstpos[i], &merged);
+ epsclosure(&merged, r);
+
+ /* Check if any of the positions of state 0 will want newline context. */
+ wants_newline = 0;
+ for (i = 0; i < merged.nelem; ++i)
+ if (_PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
+ wants_newline = 1;
+
+ /* Build the initial state. */
+ r->salloc = 1;
+ r->sindex = 0;
+ MALLOC(r->states, _dfa_state, r->salloc);
+ state_index(r, &merged, wants_newline, 0);
+
+ free(o_nullable);
+ free(o_nfirst);
+ free(o_firstpos);
+ free(o_nlast);
+ free(o_lastpos);
+ free(nalloc);
+ free(merged.elems);
+}
+
+/* Find, for each character, the transition out of state s of r, and store
+ it in the appropriate slot of trans.
+
+ We divide the positions of s into groups (positions can appear in more
+ than one group). Each group is labeled with a set of characters that
+ every position in the group matches (taking into account, if necessary,
+ preceding context information of s). For each group, find the union
+ of the its elements' follows. This set is the set of positions of the
+ new state. For each character in the group's label, set the transition
+ on this character to be to a state corresponding to the set's positions,
+ and its associated backward context information, if necessary.
+
+ If we are building a searching matcher, we include the positions of state
+ 0 in every state.
+
+ The collection of groups is constructed by building an equivalence-class
+ partition of the positions of s.
+
+ For each position, find the set of characters C that it matches. Eliminate
+ any characters from C that fail on grounds of backward context.
+
+ Search through the groups, looking for a group whose label L has nonempty
+ intersection with C. If L - C is nonempty, create a new group labeled
+ L - C and having the same positions as the current group, and set L to
+ the intersection of L and C. Insert the position in this group, set
+ C = C - L, and resume scanning.
+
+ If after comparing with every group there are characters remaining in C,
+ create a new group labeled with the characters of C and insert this
+ position in that group. */
+void
+regstate(s, r, trans)
+ int s;
+ struct regexp *r;
+ int trans[];
+{
+ _position_set grps[_NOTCHAR]; /* As many as will ever be needed. */
+ _charset labels[_NOTCHAR]; /* Labels corresponding to the groups. */
+ int ngrps = 0; /* Number of groups actually used. */
+ _position pos; /* Current position being considered. */
+ _charset matches; /* Set of matching characters. */
+ int matchesf; /* True if matches is nonempty. */
+ _charset intersect; /* Intersection with some label set. */
+ int intersectf; /* True if intersect is nonempty. */
+ _charset leftovers; /* Stuff in the label that didn't match. */
+ int leftoversf; /* True if leftovers is nonempty. */
+ static _charset letters; /* Set of characters considered letters. */
+ static _charset newline; /* Set of characters that aren't newline. */
+ _position_set follows; /* Union of the follows of some group. */
+ _position_set tmp; /* Temporary space for merging sets. */
+ int state; /* New state. */
+ int wants_newline; /* New state wants to know newline context. */
+ int state_newline; /* New state on a newline transition. */
+ int wants_letter; /* New state wants to know letter context. */
+ int state_letter; /* New state on a letter transition. */
+ static initialized; /* Flag for static initialization. */
+ int i, j, k;
+
+ /* Initialize the set of letters, if necessary. */
+ if (! initialized)
+ {
+ initialized = 1;
+ for (i = 0; i < _NOTCHAR; ++i)
+ if (ISALNUM(i))
+ setbit(i, letters);
+ setbit('\n', newline);
+ }
+
+ zeroset(matches);
+
+ for (i = 0; i < r->states[s].elems.nelem; ++i)
+ {
+ pos = r->states[s].elems.elems[i];
+ if (r->tokens[pos.index] >= 0 && r->tokens[pos.index] < _NOTCHAR)
+ setbit(r->tokens[pos.index], matches);
+ else if (r->tokens[pos.index] >= _SET)
+ copyset(r->charsets[r->tokens[pos.index] - _SET], matches);
+ else
+ continue;
+
+ /* Some characters may need to be climinated from matches because
+ they fail in the current context. */
+ if (pos.constraint != 0xff)
+ {
+ if (! _MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ r->states[s].newline, 1))
+ clrbit('\n', matches);
+ if (! _MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ r->states[s].newline, 0))
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ matches[j] &= newline[j];
+ if (! _MATCHES_LETTER_CONTEXT(pos.constraint,
+ r->states[s].letter, 1))
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ matches[j] &= ~letters[j];
+ if (! _MATCHES_LETTER_CONTEXT(pos.constraint,
+ r->states[s].letter, 0))
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ matches[j] &= letters[j];
+
+ /* If there are no characters left, there's no point in going on. */
+ for (j = 0; j < _CHARSET_INTS && !matches[j]; ++j)
+ ;
+ if (j == _CHARSET_INTS)
+ continue;
+ }
+
+ for (j = 0; j < ngrps; ++j)
+ {
+ /* If matches contains a single character only, and the current
+ group's label doesn't contain that character, go on to the
+ next group. */
+ if (r->tokens[pos.index] >= 0 && r->tokens[pos.index] < _NOTCHAR
+ && !tstbit(r->tokens[pos.index], labels[j]))
+ continue;
+
+ /* Check if this group's label has a nonempty intersection with
+ matches. */
+ intersectf = 0;
+ for (k = 0; k < _CHARSET_INTS; ++k)
+ (intersect[k] = matches[k] & labels[j][k]) ? intersectf = 1 : 0;
+ if (! intersectf)
+ continue;
+
+ /* It does; now find the set differences both ways. */
+ leftoversf = matchesf = 0;
+ for (k = 0; k < _CHARSET_INTS; ++k)
+ {
+ /* Even an optimizing compiler can't know this for sure. */
+ int match = matches[k], label = labels[j][k];
+
+ (leftovers[k] = ~match & label) ? leftoversf = 1 : 0;
+ (matches[k] = match & ~label) ? matchesf = 1 : 0;
+ }
+
+ /* If there were leftovers, create a new group labeled with them. */
+ if (leftoversf)
+ {
+ copyset(leftovers, labels[ngrps]);
+ copyset(intersect, labels[j]);
+ MALLOC(grps[ngrps].elems, _position, r->nleaves);
+ copy(&grps[j], &grps[ngrps]);
+ ++ngrps;
+ }
+
+ /* Put the position in the current group. Note that there is no
+ reason to call insert() here. */
+ grps[j].elems[grps[j].nelem++] = pos;
+
+ /* If every character matching the current position has been
+ accounted for, we're done. */
+ if (! matchesf)
+ break;
+ }
+
+ /* If we've passed the last group, and there are still characters
+ unaccounted for, then we'll have to create a new group. */
+ if (j == ngrps)
+ {
+ copyset(matches, labels[ngrps]);
+ zeroset(matches);
+ MALLOC(grps[ngrps].elems, _position, r->nleaves);
+ grps[ngrps].nelem = 1;
+ grps[ngrps].elems[0] = pos;
+ ++ngrps;
+ }
+ }
+
+ MALLOC(follows.elems, _position, r->nleaves);
+ MALLOC(tmp.elems, _position, r->nleaves);
+
+ /* If we are a searching matcher, the default transition is to a state
+ containing the positions of state 0, otherwise the default transition
+ is to fail miserably. */
+ if (r->searchflag)
+ {
+ wants_newline = 0;
+ wants_letter = 0;
+ for (i = 0; i < r->states[0].elems.nelem; ++i)
+ {
+ if (_PREV_NEWLINE_DEPENDENT(r->states[0].elems.elems[i].constraint))
+ wants_newline = 1;
+ if (_PREV_LETTER_DEPENDENT(r->states[0].elems.elems[i].constraint))
+ wants_letter = 1;
+ }
+ copy(&r->states[0].elems, &follows);
+ state = state_index(r, &follows, 0, 0);
+ if (wants_newline)
+ state_newline = state_index(r, &follows, 1, 0);
+ else
+ state_newline = state;
+ if (wants_letter)
+ state_letter = state_index(r, &follows, 0, 1);
+ else
+ state_letter = state;
+ for (i = 0; i < _NOTCHAR; ++i)
+ trans[i] = (ISALNUM(i)) ? state_letter : state ;
+ trans['\n'] = state_newline;
+ }
+ else
+ for (i = 0; i < _NOTCHAR; ++i)
+ trans[i] = -1;
+
+ for (i = 0; i < ngrps; ++i)
+ {
+ follows.nelem = 0;
+
+ /* Find the union of the follows of the positions of the group.
+ This is a hideously inefficient loop. Fix it someday. */
+ for (j = 0; j < grps[i].nelem; ++j)
+ for (k = 0; k < r->follows[grps[i].elems[j].index].nelem; ++k)
+ insert(r->follows[grps[i].elems[j].index].elems[k], &follows);
+
+ /* If we are building a searching matcher, throw in the positions
+ of state 0 as well. */
+ if (r->searchflag)
+ for (j = 0; j < r->states[0].elems.nelem; ++j)
+ insert(r->states[0].elems.elems[j], &follows);
+
+ /* Find out if the new state will want any context information. */
+ wants_newline = 0;
+ if (tstbit('\n', labels[i]))
+ for (j = 0; j < follows.nelem; ++j)
+ if (_PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
+ wants_newline = 1;
+
+ wants_letter = 0;
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ if (labels[i][j] & letters[j])
+ break;
+ if (j < _CHARSET_INTS)
+ for (j = 0; j < follows.nelem; ++j)
+ if (_PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
+ wants_letter = 1;
+
+ /* Find the state(s) corresponding to the union of the follows. */
+ state = state_index(r, &follows, 0, 0);
+ if (wants_newline)
+ state_newline = state_index(r, &follows, 1, 0);
+ else
+ state_newline = state;
+ if (wants_letter)
+ state_letter = state_index(r, &follows, 0, 1);
+ else
+ state_letter = state;
+
+ /* Set the transitions for each character in the current label. */
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ for (k = 0; k < INTBITS; ++k)
+ if (labels[i][j] & 1 << k)
+ {
+ int c = j * INTBITS + k;
+
+ if (c == '\n')
+ trans[c] = state_newline;
+ else if (ISALNUM(c))
+ trans[c] = state_letter;
+ else if (c < _NOTCHAR)
+ trans[c] = state;
+ }
+ }
+
+ for (i = 0; i < ngrps; ++i)
+ free(grps[i].elems);
+ free(follows.elems);
+ free(tmp.elems);
+}
+
+/* Some routines for manipulating a compiled regexp's transition tables.
+ Each state may or may not have a transition table; if it does, and it
+ is a non-accepting state, then r->trans[state] points to its table.
+ If it is an accepting state then r->fails[state] points to its table.
+ If it has no table at all, then r->trans[state] is NULL.
+ TODO: Improve this comment, get rid of the unnecessary redundancy. */
+
+static void
+build_state(s, r)
+ int s;
+ struct regexp *r;
+{
+ int *trans; /* The new transition table. */
+ int i;
+
+ /* Set an upper limit on the number of transition tables that will ever
+ exist at once. 1024 is arbitrary. The idea is that the frequently
+ used transition tables will be quickly rebuilt, whereas the ones that
+ were only needed once or twice will be cleared away. */
+ if (r->trcount >= 1024)
+ {
+ for (i = 0; i < r->tralloc; ++i)
+ if (r->trans[i])
+ {
+ free((ptr_t) r->trans[i]);
+ r->trans[i] = NULL;
+ }
+ else if (r->fails[i])
+ {
+ free((ptr_t) r->fails[i]);
+ r->fails[i] = NULL;
+ }
+ r->trcount = 0;
+ }
+
+ ++r->trcount;
+
+ /* Set up the success bits for this state. */
+ r->success[s] = 0;
+ if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 1, r->states[s].letter, 0,
+ s, *r))
+ r->success[s] |= 4;
+ if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 0, r->states[s].letter, 1,
+ s, *r))
+ r->success[s] |= 2;
+ if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 0, r->states[s].letter, 0,
+ s, *r))
+ r->success[s] |= 1;
+
+ MALLOC(trans, int, _NOTCHAR);
+ regstate(s, r, trans);
+
+ /* Now go through the new transition table, and make sure that the trans
+ and fail arrays are allocated large enough to hold a pointer for the
+ largest state mentioned in the table. */
+ for (i = 0; i < _NOTCHAR; ++i)
+ if (trans[i] >= r->tralloc)
+ {
+ int oldalloc = r->tralloc;
+
+ while (trans[i] >= r->tralloc)
+ r->tralloc *= 2;
+ REALLOC(r->realtrans, int *, r->tralloc + 1);
+ r->trans = r->realtrans + 1;
+ REALLOC(r->fails, int *, r->tralloc);
+ REALLOC(r->success, int, r->tralloc);
+ REALLOC(r->newlines, int, r->tralloc);
+ while (oldalloc < r->tralloc)
+ {
+ r->trans[oldalloc] = NULL;
+ r->fails[oldalloc++] = NULL;
+ }
+ }
+
+ /* Keep the newline transition in a special place so we can use it as
+ a sentinel. */
+ r->newlines[s] = trans['\n'];
+ trans['\n'] = -1;
+
+ if (ACCEPTING(s, *r))
+ r->fails[s] = trans;
+ else
+ r->trans[s] = trans;
+}
+
+static void
+build_state_zero(r)
+ struct regexp *r;
+{
+ r->tralloc = 1;
+ r->trcount = 0;
+ CALLOC(r->realtrans, int *, r->tralloc + 1);
+ r->trans = r->realtrans + 1;
+ CALLOC(r->fails, int *, r->tralloc);
+ MALLOC(r->success, int, r->tralloc);
+ MALLOC(r->newlines, int, r->tralloc);
+ build_state(0, r);
+}
+
+/* Search through a buffer looking for a match to the given struct regexp.
+ Find the first occurrence of a string matching the regexp in the buffer,
+ and the shortest possible version thereof. Return a pointer to the first
+ character after the match, or NULL if none is found. Begin points to
+ the beginning of the buffer, and end points to the first character after
+ its end. We store a newline in *end to act as a sentinel, so end had
+ better point somewhere valid. Newline is a flag indicating whether to
+ allow newlines to be in the matching string. If count is non-
+ NULL it points to a place we're supposed to increment every time we
+ see a newline. Finally, if backref is non-NULL it points to a place
+ where we're supposed to store a 1 if backreferencing happened and the
+ match needs to be verified by a backtracking matcher. Otherwise
+ we store a 0 in *backref. */
+char *
+regexecute(r, begin, end, newline, count, backref)
+ struct regexp *r;
+ char *begin;
+ char *end;
+ int newline;
+ int *count;
+ int *backref;
+{
+ register s, s1, tmp; /* Current state. */
+ register unsigned char *p; /* Current input character. */
+ register **trans, *t; /* Copy of r->trans so it can be optimized
+ into a register. */
+ static sbit[_NOTCHAR]; /* Table for anding with r->success. */
+ static sbit_init;
+
+ if (! sbit_init)
+ {
+ int i;
+
+ sbit_init = 1;
+ for (i = 0; i < _NOTCHAR; ++i)
+ sbit[i] = (ISALNUM(i)) ? 2 : 1;
+ sbit['\n'] = 4;
+ }
+
+ if (! r->tralloc)
+ build_state_zero(r);
+
+ s = s1 = 0;
+ p = (unsigned char *) begin;
+ trans = r->trans;
+ *end = '\n';
+
+ for (;;)
+ {
+ while ((t = trans[s]) != 0) { /* hand-optimized loop */
+ s1 = t[*p++];
+ if ((t = trans[s1]) == 0) {
+ tmp = s ; s = s1 ; s1 = tmp ; /* swap */
+ break;
+ }
+ s = t[*p++];
+ }
+
+ if (s >= 0 && p <= (unsigned char *) end && r->fails[s])
+ {
+ if (r->success[s] & sbit[*p])
+ {
+ if (backref)
+ *backref = (r->states[s].backref != 0);
+ return (char *) p;
+ }
+
+ s1 = s;
+ s = r->fails[s][*p++];
+ continue;
+ }
+
+ /* If the previous character was a newline, count it. */
+ if (count && (char *) p <= end && p[-1] == '\n')
+ ++*count;
+
+ /* Check if we've run off the end of the buffer. */
+ if ((char *) p >= end)
+ return NULL;
+
+ if (s >= 0)
+ {
+ build_state(s, r);
+ trans = r->trans;
+ continue;
+ }
+
+ if (p[-1] == '\n' && newline)
+ {
+ s = r->newlines[s1];
+ continue;
+ }
+
+ s = 0;
+ }
+}
+
+/* Initialize the components of a regexp that the other routines don't
+ initialize for themselves. */
+void
+reginit(r)
+ struct regexp *r;
+{
+ r->calloc = 1;
+ MALLOC(r->charsets, _charset, r->calloc);
+ r->cindex = 0;
+
+ r->talloc = 1;
+ MALLOC(r->tokens, _token, r->talloc);
+ r->tindex = r->depth = r->nleaves = r->nregexps = 0;
+
+ r->searchflag = 0;
+ r->tralloc = 0;
+}
+
+/* Parse and analyze a single string of the given length. */
+void
+regcompile(s, len, r, searchflag)
+ const char *s;
+ size_t len;
+ struct regexp *r;
+ int searchflag;
+{
+ if (case_fold) /* dummy folding in service of regmust() */
+ {
+ char *regcopy;
+ int i;
+
+ regcopy = malloc(len);
+ if (!regcopy)
+ reg_error("out of memory");
+
+ /* This is a complete kludge and could potentially break
+ \<letter> escapes . . . */
+ case_fold = 0;
+ for (i = 0; i < len; ++i)
+ if (ISUPPER(s[i]))
+ regcopy[i] = tolower(s[i]);
+ else
+ regcopy[i] = s[i];
+
+ reginit(r);
+ r->mustn = 0;
+ r->must[0] = '\0';
+ regparse(regcopy, len, r);
+ free(regcopy);
+ regmust(r);
+ reganalyze(r, searchflag);
+ case_fold = 1;
+ reginit(r);
+ regparse(s, len, r);
+ reganalyze(r, searchflag);
+ }
+ else
+ {
+ reginit(r);
+ regparse(s, len, r);
+ regmust(r);
+ reganalyze(r, searchflag);
+ }
+}
+
+/* Free the storage held by the components of a regexp. */
+void
+reg_free(r)
+ struct regexp *r;
+{
+ int i;
+
+ free((ptr_t) r->charsets);
+ free((ptr_t) r->tokens);
+ for (i = 0; i < r->sindex; ++i)
+ free((ptr_t) r->states[i].elems.elems);
+ free((ptr_t) r->states);
+ for (i = 0; i < r->tindex; ++i)
+ if (r->follows[i].elems)
+ free((ptr_t) r->follows[i].elems);
+ free((ptr_t) r->follows);
+ for (i = 0; i < r->tralloc; ++i)
+ if (r->trans[i])
+ free((ptr_t) r->trans[i]);
+ else if (r->fails[i])
+ free((ptr_t) r->fails[i]);
+ if (r->realtrans)
+ free((ptr_t) r->realtrans);
+ if (r->fails)
+ free((ptr_t) r->fails);
+ if (r->newlines)
+ free((ptr_t) r->newlines);
+}
+
+/*
+Having found the postfix representation of the regular expression,
+try to find a long sequence of characters that must appear in any line
+containing the r.e.
+Finding a "longest" sequence is beyond the scope here;
+we take an easy way out and hope for the best.
+(Take "(ab|a)b"--please.)
+
+We do a bottom-up calculation of sequences of characters that must appear
+in matches of r.e.'s represented by trees rooted at the nodes of the postfix
+representation:
+ sequences that must appear at the left of the match ("left")
+ sequences that must appear at the right of the match ("right")
+ lists of sequences that must appear somewhere in the match ("in")
+ sequences that must constitute the match ("is")
+When we get to the root of the tree, we use one of the longest of its
+calculated "in" sequences as our answer. The sequence we find is returned in
+r->must (where "r" is the single argument passed to "regmust");
+the length of the sequence is returned in r->mustn.
+
+The sequences calculated for the various types of node (in pseudo ANSI c)
+are shown below. "p" is the operand of unary operators (and the left-hand
+operand of binary operators); "q" is the right-hand operand of binary operators
+.
+"ZERO" means "a zero-length sequence" below.
+
+Type left right is in
+---- ---- ----- -- --
+char c # c # c # c # c
+
+SET ZERO ZERO ZERO ZERO
+
+STAR ZERO ZERO ZERO ZERO
+
+QMARK ZERO ZERO ZERO ZERO
+
+PLUS p->left p->right ZERO p->in
+
+CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus
+ p->left : q->right : q->is!=ZERO) ? q->in plus
+ p->is##q->left p->right##q->is p->is##q->is : p->right##q->left
+ ZERO
+
+OR longest common longest common (do p->is and substrings common to
+ leading trailing q->is have same p->in and q->in
+ (sub)sequence (sub)sequence length and
+ of p->left of p->right content) ?
+ and q->left and q->right p->is : NULL
+
+If there's anything else we recognize in the tree, all four sequences get set
+to zero-length sequences. If there's something we don't recognize in the tree,
+we just return a zero-length sequence.
+
+Break ties in favor of infrequent letters (choosing 'zzz' in preference to
+'aaa')?
+
+And. . .is it here or someplace that we might ponder "optimizations" such as
+ egrep 'psi|epsilon' -> egrep 'psi'
+ egrep 'pepsi|epsilon' -> egrep 'epsi'
+ (Yes, we now find "epsi" as a "string
+ that must occur", but we might also
+ simplify the *entire* r.e. being sought
+)
+ grep '[c]' -> grep 'c'
+ grep '(ab|a)b' -> grep 'ab'
+ grep 'ab*' -> grep 'a'
+ grep 'a*b' -> grep 'b'
+There are several issues:
+ Is optimization easy (enough)?
+
+ Does optimization actually accomplish anything,
+ or is the automaton you get from "psi|epsilon" (for example)
+ the same as the one you get from "psi" (for example)?
+
+ Are optimizable r.e.'s likely to be used in real-life situations
+ (something like 'ab*' is probably unlikely; something like is
+ 'psi|epsilon' is likelier)?
+*/
+
+static char *
+icatalloc(old, new)
+char * old;
+const char * new;
+{
+ register char * result;
+ register int oldsize, newsize;
+
+ newsize = (new == NULL) ? 0 : strlen(new);
+ if (old == NULL)
+ oldsize = 0;
+ else if (newsize == 0)
+ return old;
+ else oldsize = strlen(old);
+ if (old == NULL)
+ result = (char *) malloc(newsize + 1);
+ else result = (char *) realloc((void *) old, oldsize + newsize + 1);
+ if (result != NULL && new != NULL)
+ (void) strcpy(result + oldsize, new);
+ return result;
+}
+
+static char *
+icpyalloc(string)
+const char * string;
+{
+ return icatalloc((char *) NULL, string);
+}
+
+static char *
+istrstr(lookin, lookfor)
+char * lookin;
+register char * lookfor;
+{
+ register char * cp;
+ register int len;
+
+ len = strlen(lookfor);
+ for (cp = lookin; *cp != '\0'; ++cp)
+ if (strncmp(cp, lookfor, len) == 0)
+ return cp;
+ return NULL;
+}
+
+static void
+ifree(cp)
+char * cp;
+{
+ if (cp != NULL)
+ free(cp);
+}
+
+static void
+freelist(cpp)
+register char ** cpp;
+{
+ register int i;
+
+ if (cpp == NULL)
+ return;
+ for (i = 0; cpp[i] != NULL; ++i) {
+ free(cpp[i]);
+ cpp[i] = NULL;
+ }
+}
+
+static char **
+enlist(cpp, new, len)
+register char ** cpp;
+register char * new;
+#ifdef __STDC__
+size_t len;
+#else
+int len;
+#endif
+{
+ register int i, j;
+
+ if (cpp == NULL)
+ return NULL;
+ if ((new = icpyalloc(new)) == NULL) {
+ freelist(cpp);
+ return NULL;
+ }
+ new[len] = '\0';
+ /*
+ ** Is there already something in the list that's new (or longer)?
+ */
+ for (i = 0; cpp[i] != NULL; ++i)
+ if (istrstr(cpp[i], new) != NULL) {
+ free(new);
+ return cpp;
+ }
+ /*
+ ** Eliminate any obsoleted strings.
+ */
+ j = 0;
+ while (cpp[j] != NULL)
+ if (istrstr(new, cpp[j]) == NULL)
+ ++j;
+ else {
+ free(cpp[j]);
+ if (--i == j)
+ break;
+ cpp[j] = cpp[i];
+ }
+ /*
+ ** Add the new string.
+ */
+ cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp);
+ if (cpp == NULL)
+ return NULL;
+ cpp[i] = new;
+ cpp[i + 1] = NULL;
+ return cpp;
+}
+
+/*
+** Given pointers to two strings,
+** return a pointer to an allocated list of their distinct common substrings.
+** Return NULL if something seems wild.
+*/
+
+static char **
+comsubs(left, right)
+char * left;
+char * right;
+{
+ register char ** cpp;
+ register char * lcp;
+ register char * rcp;
+ register int i, len;
+
+ if (left == NULL || right == NULL)
+ return NULL;
+ cpp = (char **) malloc(sizeof *cpp);
+ if (cpp == NULL)
+ return NULL;
+ cpp[0] = NULL;
+ for (lcp = left; *lcp != '\0'; ++lcp) {
+ len = 0;
+ rcp = strchr(right, *lcp);
+ while (rcp != NULL) {
+ for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
+ ;
+ if (i > len)
+ len = i;
+ rcp = strchr(rcp + 1, *lcp);
+ }
+ if (len == 0)
+ continue;
+#ifdef __STDC__
+ if ((cpp = enlist(cpp, lcp, (size_t)len)) == NULL)
+#else
+ if ((cpp = enlist(cpp, lcp, len)) == NULL)
+#endif
+ break;
+ }
+ return cpp;
+}
+
+static char **
+addlists(old, new)
+char ** old;
+char ** new;
+{
+ register int i;
+
+ if (old == NULL || new == NULL)
+ return NULL;
+ for (i = 0; new[i] != NULL; ++i) {
+ old = enlist(old, new[i], strlen(new[i]));
+ if (old == NULL)
+ break;
+ }
+ return old;
+}
+
+/*
+** Given two lists of substrings,
+** return a new list giving substrings common to both.
+*/
+
+static char **
+inboth(left, right)
+char ** left;
+char ** right;
+{
+ register char ** both;
+ register char ** temp;
+ register int lnum, rnum;
+
+ if (left == NULL || right == NULL)
+ return NULL;
+ both = (char **) malloc(sizeof *both);
+ if (both == NULL)
+ return NULL;
+ both[0] = NULL;
+ for (lnum = 0; left[lnum] != NULL; ++lnum) {
+ for (rnum = 0; right[rnum] != NULL; ++rnum) {
+ temp = comsubs(left[lnum], right[rnum]);
+ if (temp == NULL) {
+ freelist(both);
+ return NULL;
+ }
+ both = addlists(both, temp);
+ freelist(temp);
+ if (both == NULL)
+ return NULL;
+ }
+ }
+ return both;
+}
+
+/*
+typedef struct {
+ char ** in;
+ char * left;
+ char * right;
+ char * is;
+} must;
+ */
+static void
+resetmust(mp)
+register must * mp;
+{
+ mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+ freelist(mp->in);
+}
+
+static void
+regmust(r)
+register struct regexp * r;
+{
+ register must * musts;
+ register must * mp;
+ register char * result = "";
+ register int ri;
+ register int i;
+ register _token t;
+ static must must0;
+
+ reg->mustn = 0;
+ reg->must[0] = '\0';
+ musts = (must *) malloc((reg->tindex + 1) * sizeof *musts);
+ if (musts == NULL)
+ return;
+ mp = musts;
+ for (i = 0; i <= reg->tindex; ++i)
+ mp[i] = must0;
+ for (i = 0; i <= reg->tindex; ++i) {
+ mp[i].in = (char **) malloc(sizeof *mp[i].in);
+ mp[i].left = malloc(2);
+ mp[i].right = malloc(2);
+ mp[i].is = malloc(2);
+ if (mp[i].in == NULL || mp[i].left == NULL ||
+ mp[i].right == NULL || mp[i].is == NULL)
+ goto done;
+ mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
+ mp[i].in[0] = NULL;
+ }
+ for (ri = 0; ri < reg->tindex; ++ri) {
+ switch (t = reg->tokens[ri]) {
+ case _ALLBEGLINE:
+ case _ALLENDLINE:
+ case _LPAREN:
+ case _RPAREN:
+ goto done; /* "cannot happen" */
+ case _EMPTY:
+ case _BEGLINE:
+ case _ENDLINE:
+ case _BEGWORD:
+ case _ENDWORD:
+ case _LIMWORD:
+ case _NOTLIMWORD:
+ case _BACKREF:
+ resetmust(mp);
+ break;
+ case _STAR:
+ case _QMARK:
+ if (mp <= musts)
+ goto done; /* "cannot happen" */
+ --mp;
+ resetmust(mp);
+ break;
+ case _OR:
+ if (mp < &musts[2])
+ goto done; /* "cannot happen" */
+ {
+ register char ** new;
+ register must * lmp;
+ register must * rmp;
+ register int j, ln, rn, n;
+
+ rmp = --mp;
+ lmp = --mp;
+ /* Guaranteed to be. Unlikely, but. . . */
+ if (strcmp(lmp->is, rmp->is) != 0)
+ lmp->is[0] = '\0';
+ /* Left side--easy */
+ i = 0;
+ while (lmp->left[i] != '\0' &&
+ lmp->left[i] == rmp->left[i])
+ ++i;
+ lmp->left[i] = '\0';
+ /* Right side */
+ ln = strlen(lmp->right);
+ rn = strlen(rmp->right);
+ n = ln;
+ if (n > rn)
+ n = rn;
+ for (i = 0; i < n; ++i)
+ if (lmp->right[ln - i - 1] !=
+ rmp->right[rn - i - 1])
+ break;
+ for (j = 0; j < i; ++j)
+ lmp->right[j] =
+ lmp->right[(ln - i) + j];
+ lmp->right[j] = '\0';
+ new = inboth(lmp->in, rmp->in);
+ if (new == NULL)
+ goto done;
+ freelist(lmp->in);
+ free((char *) lmp->in);
+ lmp->in = new;
+ }
+ break;
+ case _PLUS:
+ if (mp <= musts)
+ goto done; /* "cannot happen" */
+ --mp;
+ mp->is[0] = '\0';
+ break;
+ case _END:
+ if (mp != &musts[1])
+ goto done; /* "cannot happen" */
+ for (i = 0; musts[0].in[i] != NULL; ++i)
+ if (strlen(musts[0].in[i]) > strlen(result))
+ result = musts[0].in[i];
+ goto done;
+ case _CAT:
+ if (mp < &musts[2])
+ goto done; /* "cannot happen" */
+ {
+ register must * lmp;
+ register must * rmp;
+
+ rmp = --mp;
+ lmp = --mp;
+ /*
+ ** In. Everything in left, plus everything in
+ ** right, plus catenation of
+ ** left's right and right's left.
+ */
+ lmp->in = addlists(lmp->in, rmp->in);
+ if (lmp->in == NULL)
+ goto done;
+ if (lmp->right[0] != '\0' &&
+ rmp->left[0] != '\0') {
+ register char * tp;
+
+ tp = icpyalloc(lmp->right);
+ if (tp == NULL)
+ goto done;
+ tp = icatalloc(tp, rmp->left);
+ if (tp == NULL)
+ goto done;
+ lmp->in = enlist(lmp->in, tp,
+ strlen(tp));
+ free(tp);
+ if (lmp->in == NULL)
+ goto done;
+ }
+ /* Left-hand */
+ if (lmp->is[0] != '\0') {
+ lmp->left = icatalloc(lmp->left,
+ rmp->left);
+ if (lmp->left == NULL)
+ goto done;
+ }
+ /* Right-hand */
+ if (rmp->is[0] == '\0')
+ lmp->right[0] = '\0';
+ lmp->right = icatalloc(lmp->right, rmp->right);
+ if (lmp->right == NULL)
+ goto done;
+ /* Guaranteed to be */
+ if (lmp->is[0] != '\0' && rmp->is[0] != '\0') {
+ lmp->is = icatalloc(lmp->is, rmp->is);
+ if (lmp->is == NULL)
+ goto done;
+ }
+ }
+ break;
+ default:
+ if (t < _END) {
+ /* "cannot happen" */
+ goto done;
+ } else if (t == '\0') {
+ /* not on *my* shift */
+ goto done;
+ } else if (t >= _SET) {
+ /* easy enough */
+ resetmust(mp);
+ } else {
+ /* plain character */
+ resetmust(mp);
+ mp->is[0] = mp->left[0] = mp->right[0] = t;
+ mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+ mp->in = enlist(mp->in, mp->is, 1);
+ if (mp->in == NULL)
+ goto done;
+ }
+ break;
+ }
+ ++mp;
+ }
+done:
+ (void) strncpy(reg->must, result, MUST_MAX - 1);
+ reg->must[MUST_MAX - 1] = '\0';
+ reg->mustn = strlen(reg->must);
+ mp = musts;
+ for (i = 0; i <= reg->tindex; ++i) {
+ freelist(mp[i].in);
+ ifree((char *) mp[i].in);
+ ifree(mp[i].left);
+ ifree(mp[i].right);
+ ifree(mp[i].is);
+ }
+ free((char *) mp);
+}
diff --git a/gnu/usr.bin/awk/dfa.h b/gnu/usr.bin/awk/dfa.h
new file mode 100644
index 000000000000..65fc49565a7c
--- /dev/null
+++ b/gnu/usr.bin/awk/dfa.h
@@ -0,0 +1,543 @@
+/* dfa.h - declarations for GNU deterministic regexp compiler
+ Copyright (C) 1988 Free Software Foundation, Inc.
+ Written June, 1988 by Mike Haertel
+
+ NO WARRANTY
+
+ BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
+NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
+WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
+RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
+WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
+AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
+CORRECTION.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
+STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
+WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
+LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
+OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
+DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
+A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
+PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
+
+ GENERAL PUBLIC LICENSE TO COPY
+
+ 1. You may copy and distribute verbatim copies of this source file
+as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy a valid copyright notice "Copyright
+ (C) 1988 Free Software Foundation, Inc."; and include following the
+copyright notice a verbatim copy of the above disclaimer of warranty
+and of this License. You may charge a distribution fee for the
+physical act of transferring a copy.
+
+ 2. You may modify your copy or copies of this source file or
+any portion of it, and copy and distribute such modifications under
+the terms of Paragraph 1 above, provided that you also do the following:
+
+ a) cause the modified files to carry prominent notices stating
+ that you changed the files and the date of any change; and
+
+ b) cause the whole of any work that you distribute or publish,
+ that in whole or in part contains or is a derivative of this
+ program or any part thereof, to be licensed at no charge to all
+ third parties on terms identical to those contained in this
+ License Agreement (except that you may choose to grant more extensive
+ warranty protection to some or all third parties, at your option).
+
+ c) You may charge a distribution fee for the physical act of
+ transferring a copy, and you may at your option offer warranty
+ protection in exchange for a fee.
+
+Mere aggregation of another unrelated program with this program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other program under the scope of these terms.
+
+ 3. You may copy and distribute this program or any portion of it in
+compiled, executable or object code form under the terms of Paragraphs
+1 and 2 above provided that you do the following:
+
+ a) accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ b) accompany it with a written offer, valid for at least three
+ years, to give any third party free (except for a nominal
+ shipping charge) a complete machine-readable copy of the
+ corresponding source code, to be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ c) accompany it with the information you received as to where the
+ corresponding source code may be obtained. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form alone.)
+
+For an executable file, complete source code means all the source code for
+all modules it contains; but, as a special exception, it need not include
+source code for modules which are standard libraries that accompany the
+operating system on which the executable file runs.
+
+ 4. You may not copy, sublicense, distribute or transfer this program
+except as expressly provided under this License Agreement. Any attempt
+otherwise to copy, sublicense, distribute or transfer this program is void and
+your rights to use the program under this License agreement shall be
+automatically terminated. However, parties who have received computer
+software programs from you with this License Agreement will not have
+their licenses terminated so long as such parties remain in full compliance.
+
+ 5. If you wish to incorporate parts of this program into other free
+programs whose distribution conditions are different, write to the Free
+Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
+worked out a simple rule that can be stated here, but we will often permit
+this. We will be guided by the two goals of preserving the free status of
+all derivatives our free software and of promoting the sharing and reuse of
+software.
+
+
+In other words, you are welcome to use, share and improve this program.
+You are forbidden to forbid anyone else to use, share and improve
+what you give them. Help stamp out software-hoarding! */
+
+#ifdef __STDC__
+
+#ifdef SOMEDAY
+#define ISALNUM(c) isalnum(c)
+#define ISALPHA(c) isalpha(c)
+#define ISUPPER(c) isupper(c)
+#else
+#define ISALNUM(c) (isascii(c) && isalnum(c))
+#define ISALPHA(c) (isascii(c) && isalpha(c))
+#define ISUPPER(c) (isascii(c) && isupper(c))
+#endif
+
+#else /* ! __STDC__ */
+
+#define const
+
+#define ISALNUM(c) (isascii(c) && isalnum(c))
+#define ISALPHA(c) (isascii(c) && isalpha(c))
+#define ISUPPER(c) (isascii(c) && isupper(c))
+
+#endif /* ! __STDC__ */
+
+/* 1 means plain parentheses serve as grouping, and backslash
+ parentheses are needed for literal searching.
+ 0 means backslash-parentheses are grouping, and plain parentheses
+ are for literal searching. */
+#define RE_NO_BK_PARENS 1L
+
+/* 1 means plain | serves as the "or"-operator, and \| is a literal.
+ 0 means \| serves as the "or"-operator, and | is a literal. */
+#define RE_NO_BK_VBAR (1L << 1)
+
+/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
+ 1 means \+, \? are operators and plain +, ? are literals. */
+#define RE_BK_PLUS_QM (1L << 2)
+
+/* 1 means | binds tighter than ^ or $.
+ 0 means the contrary. */
+#define RE_TIGHT_VBAR (1L << 3)
+
+/* 1 means treat \n as an _OR operator
+ 0 means treat it as a normal character */
+#define RE_NEWLINE_OR (1L << 4)
+
+/* 0 means that a special characters (such as *, ^, and $) always have
+ their special meaning regardless of the surrounding context.
+ 1 means that special characters may act as normal characters in some
+ contexts. Specifically, this applies to:
+ ^ - only special at the beginning, or after ( or |
+ $ - only special at the end, or before ) or |
+ *, +, ? - only special when not after the beginning, (, or | */
+#define RE_CONTEXT_INDEP_OPS (1L << 5)
+
+/* 1 means that \ in a character class escapes the next character (typically
+ a hyphen. It also is overloaded to mean that hyphen at the end of the range
+ is allowable and means that the hyphen is to be taken literally. */
+#define RE_AWK_CLASS_HACK (1L << 6)
+
+/* Now define combinations of bits for the standard possibilities. */
+#ifdef notdef
+#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
+#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR)
+#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
+#define RE_SYNTAX_EMACS 0
+#endif
+
+/* The NULL pointer. */
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* Number of bits in an unsigned char. */
+#ifndef CHARBITS
+#define CHARBITS 8
+#endif
+
+/* First integer value that is greater than any character code. */
+#define _NOTCHAR (1 << CHARBITS)
+
+/* INTBITS need not be exact, just a lower bound. */
+#ifndef INTBITS
+#define INTBITS (CHARBITS * sizeof (int))
+#endif
+
+/* Number of ints required to hold a bit for every character. */
+#define _CHARSET_INTS ((_NOTCHAR + INTBITS - 1) / INTBITS)
+
+/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
+typedef int _charset[_CHARSET_INTS];
+
+/* The regexp is parsed into an array of tokens in postfix form. Some tokens
+ are operators and others are terminal symbols. Most (but not all) of these
+ codes are returned by the lexical analyzer. */
+#ifdef __STDC__
+
+typedef enum
+{
+ _END = -1, /* _END is a terminal symbol that matches the
+ end of input; any value of _END or less in
+ the parse tree is such a symbol. Accepting
+ states of the DFA are those that would have
+ a transition on _END. */
+
+ /* Ordinary character values are terminal symbols that match themselves. */
+
+ _EMPTY = _NOTCHAR, /* _EMPTY is a terminal symbol that matches
+ the empty string. */
+
+ _BACKREF, /* _BACKREF is generated by \<digit>; it
+ it not completely handled. If the scanner
+ detects a transition on backref, it returns
+ a kind of "semi-success" indicating that
+ the match will have to be verified with
+ a backtracking matcher. */
+
+ _BEGLINE, /* _BEGLINE is a terminal symbol that matches
+ the empty string if it is at the beginning
+ of a line. */
+
+ _ALLBEGLINE, /* _ALLBEGLINE is a terminal symbol that
+ matches the empty string if it is at the
+ beginning of a line; _ALLBEGLINE applies
+ to the entire regexp and can only occur
+ as the first token thereof. _ALLBEGLINE
+ never appears in the parse tree; a _BEGLINE
+ is prepended with _CAT to the entire
+ regexp instead. */
+
+ _ENDLINE, /* _ENDLINE is a terminal symbol that matches
+ the empty string if it is at the end of
+ a line. */
+
+ _ALLENDLINE, /* _ALLENDLINE is to _ENDLINE as _ALLBEGLINE
+ is to _BEGLINE. */
+
+ _BEGWORD, /* _BEGWORD is a terminal symbol that matches
+ the empty string if it is at the beginning
+ of a word. */
+
+ _ENDWORD, /* _ENDWORD is a terminal symbol that matches
+ the empty string if it is at the end of
+ a word. */
+
+ _LIMWORD, /* _LIMWORD is a terminal symbol that matches
+ the empty string if it is at the beginning
+ or the end of a word. */
+
+ _NOTLIMWORD, /* _NOTLIMWORD is a terminal symbol that
+ matches the empty string if it is not at
+ the beginning or end of a word. */
+
+ _QMARK, /* _QMARK is an operator of one argument that
+ matches zero or one occurences of its
+ argument. */
+
+ _STAR, /* _STAR is an operator of one argument that
+ matches the Kleene closure (zero or more
+ occurrences) of its argument. */
+
+ _PLUS, /* _PLUS is an operator of one argument that
+ matches the positive closure (one or more
+ occurrences) of its argument. */
+
+ _CAT, /* _CAT is an operator of two arguments that
+ matches the concatenation of its
+ arguments. _CAT is never returned by the
+ lexical analyzer. */
+
+ _OR, /* _OR is an operator of two arguments that
+ matches either of its arguments. */
+
+ _LPAREN, /* _LPAREN never appears in the parse tree,
+ it is only a lexeme. */
+
+ _RPAREN, /* _RPAREN never appears in the parse tree. */
+
+ _SET /* _SET and (and any value greater) is a
+ terminal symbol that matches any of a
+ class of characters. */
+} _token;
+
+#else /* ! __STDC__ */
+
+typedef short _token;
+
+#define _END -1
+#define _EMPTY _NOTCHAR
+#define _BACKREF (_EMPTY + 1)
+#define _BEGLINE (_EMPTY + 2)
+#define _ALLBEGLINE (_EMPTY + 3)
+#define _ENDLINE (_EMPTY + 4)
+#define _ALLENDLINE (_EMPTY + 5)
+#define _BEGWORD (_EMPTY + 6)
+#define _ENDWORD (_EMPTY + 7)
+#define _LIMWORD (_EMPTY + 8)
+#define _NOTLIMWORD (_EMPTY + 9)
+#define _QMARK (_EMPTY + 10)
+#define _STAR (_EMPTY + 11)
+#define _PLUS (_EMPTY + 12)
+#define _CAT (_EMPTY + 13)
+#define _OR (_EMPTY + 14)
+#define _LPAREN (_EMPTY + 15)
+#define _RPAREN (_EMPTY + 16)
+#define _SET (_EMPTY + 17)
+
+#endif /* ! __STDC__ */
+
+/* Sets are stored in an array in the compiled regexp; the index of the
+ array corresponding to a given set token is given by _SET_INDEX(t). */
+#define _SET_INDEX(t) ((t) - _SET)
+
+/* Sometimes characters can only be matched depending on the surrounding
+ context. Such context decisions depend on what the previous character
+ was, and the value of the current (lookahead) character. Context
+ dependent constraints are encoded as 8 bit integers. Each bit that
+ is set indicates that the constraint succeeds in the corresponding
+ context.
+
+ bit 7 - previous and current are newlines
+ bit 6 - previous was newline, current isn't
+ bit 5 - previous wasn't newline, current is
+ bit 4 - neither previous nor current is a newline
+ bit 3 - previous and current are word-constituents
+ bit 2 - previous was word-constituent, current isn't
+ bit 1 - previous wasn't word-constituent, current is
+ bit 0 - neither previous nor current is word-constituent
+
+ Word-constituent characters are those that satisfy isalnum().
+
+ The macro _SUCCEEDS_IN_CONTEXT determines whether a a given constraint
+ succeeds in a particular context. Prevn is true if the previous character
+ was a newline, currn is true if the lookahead character is a newline.
+ Prevl and currl similarly depend upon whether the previous and current
+ characters are word-constituent letters. */
+#define _MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+ ((constraint) & (1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)))
+#define _MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
+ ((constraint) & (1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0))))
+#define _SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
+ (_MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+ && _MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
+
+/* The following macros give information about what a constraint depends on. */
+#define _PREV_NEWLINE_DEPENDENT(constraint) \
+ (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
+#define _PREV_LETTER_DEPENDENT(constraint) \
+ (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
+
+/* Tokens that match the empty string subject to some constraint actually
+ work by applying that constraint to determine what may follow them,
+ taking into account what has gone before. The following values are
+ the constraints corresponding to the special tokens previously defined. */
+#define _NO_CONSTRAINT 0xff
+#define _BEGLINE_CONSTRAINT 0xcf
+#define _ENDLINE_CONSTRAINT 0xaf
+#define _BEGWORD_CONSTRAINT 0xf2
+#define _ENDWORD_CONSTRAINT 0xf4
+#define _LIMWORD_CONSTRAINT 0xf6
+#define _NOTLIMWORD_CONSTRAINT 0xf9
+
+/* States of the recognizer correspond to sets of positions in the parse
+ tree, together with the constraints under which they may be matched.
+ So a position is encoded as an index into the parse tree together with
+ a constraint. */
+typedef struct
+{
+ unsigned index; /* Index into the parse array. */
+ unsigned constraint; /* Constraint for matching this position. */
+} _position;
+
+/* Sets of positions are stored as arrays. */
+typedef struct
+{
+ _position *elems; /* Elements of this position set. */
+ int nelem; /* Number of elements in this set. */
+} _position_set;
+
+/* A state of the regexp consists of a set of positions, some flags,
+ and the token value of the lowest-numbered position of the state that
+ contains an _END token. */
+typedef struct
+{
+ int hash; /* Hash of the positions of this state. */
+ _position_set elems; /* Positions this state could match. */
+ char newline; /* True if previous state matched newline. */
+ char letter; /* True if previous state matched a letter. */
+ char backref; /* True if this state matches a \<digit>. */
+ unsigned char constraint; /* Constraint for this state to accept. */
+ int first_end; /* Token value of the first _END in elems. */
+} _dfa_state;
+
+/* If an r.e. is at most MUST_MAX characters long, we look for a string which
+ must appear in it; whatever's found is dropped into the struct reg. */
+
+#define MUST_MAX 50
+
+/* A compiled regular expression. */
+struct regexp
+{
+ /* Stuff built by the scanner. */
+ _charset *charsets; /* Array of character sets for _SET tokens. */
+ int cindex; /* Index for adding new charsets. */
+ int calloc; /* Number of charsets currently allocated. */
+
+ /* Stuff built by the parser. */
+ _token *tokens; /* Postfix parse array. */
+ int tindex; /* Index for adding new tokens. */
+ int talloc; /* Number of tokens currently allocated. */
+ int depth; /* Depth required of an evaluation stack
+ used for depth-first traversal of the
+ parse tree. */
+ int nleaves; /* Number of leaves on the parse tree. */
+ int nregexps; /* Count of parallel regexps being built
+ with regparse(). */
+
+ /* Stuff owned by the state builder. */
+ _dfa_state *states; /* States of the regexp. */
+ int sindex; /* Index for adding new states. */
+ int salloc; /* Number of states currently allocated. */
+
+ /* Stuff built by the structure analyzer. */
+ _position_set *follows; /* Array of follow sets, indexed by position
+ index. The follow of a position is the set
+ of positions containing characters that
+ could conceivably follow a character
+ matching the given position in a string
+ matching the regexp. Allocated to the
+ maximum possible position index. */
+ int searchflag; /* True if we are supposed to build a searching
+ as opposed to an exact matcher. A searching
+ matcher finds the first and shortest string
+ matching a regexp anywhere in the buffer,
+ whereas an exact matcher finds the longest
+ string matching, but anchored to the
+ beginning of the buffer. */
+
+ /* Stuff owned by the executor. */
+ int tralloc; /* Number of transition tables that have
+ slots so far. */
+ int trcount; /* Number of transition tables that have
+ actually been built. */
+ int **trans; /* Transition tables for states that can
+ never accept. If the transitions for a
+ state have not yet been computed, or the
+ state could possibly accept, its entry in
+ this table is NULL. */
+ int **realtrans; /* Trans always points to realtrans + 1; this
+ is so trans[-1] can contain NULL. */
+ int **fails; /* Transition tables after failing to accept
+ on a state that potentially could do so. */
+ int *success; /* Table of acceptance conditions used in
+ regexecute and computed in build_state. */
+ int *newlines; /* Transitions on newlines. The entry for a
+ newline in any transition table is always
+ -1 so we can count lines without wasting
+ too many cycles. The transition for a
+ newline is stored separately and handled
+ as a special case. Newline is also used
+ as a sentinel at the end of the buffer. */
+ char must[MUST_MAX];
+ int mustn;
+};
+
+/* Some macros for user access to regexp internals. */
+
+/* ACCEPTING returns true if s could possibly be an accepting state of r. */
+#define ACCEPTING(s, r) ((r).states[s].constraint)
+
+/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
+ specified context. */
+#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, reg) \
+ _SUCCEEDS_IN_CONTEXT((reg).states[state].constraint, \
+ prevn, currn, prevl, currl)
+
+/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
+ regexps that a given state could accept. Parallel regexps are numbered
+ starting at 1. */
+#define FIRST_MATCHING_REGEXP(state, reg) (-(reg).states[state].first_end)
+
+/* Entry points. */
+
+#ifdef __STDC__
+
+/* Regsyntax() takes two arguments; the first sets the syntax bits described
+ earlier in this file, and the second sets the case-folding flag. */
+extern void regsyntax(long, int);
+
+/* Compile the given string of the given length into the given struct regexp.
+ Final argument is a flag specifying whether to build a searching or an
+ exact matcher. */
+extern void regcompile(const char *, size_t, struct regexp *, int);
+
+/* Execute the given struct regexp on the buffer of characters. The
+ first char * points to the beginning, and the second points to the
+ first character after the end of the buffer, which must be a writable
+ place so a sentinel end-of-buffer marker can be stored there. The
+ second-to-last argument is a flag telling whether to allow newlines to
+ be part of a string matching the regexp. The next-to-last argument,
+ if non-NULL, points to a place to increment every time we see a
+ newline. The final argument, if non-NULL, points to a flag that will
+ be set if further examination by a backtracking matcher is needed in
+ order to verify backreferencing; otherwise the flag will be cleared.
+ Returns NULL if no match is found, or a pointer to the first
+ character after the first & shortest matching string in the buffer. */
+extern char *regexecute(struct regexp *, char *, char *, int, int *, int *);
+
+/* Free the storage held by the components of a struct regexp. */
+extern void reg_free(struct regexp *);
+
+/* Entry points for people who know what they're doing. */
+
+/* Initialize the components of a struct regexp. */
+extern void reginit(struct regexp *);
+
+/* Incrementally parse a string of given length into a struct regexp. */
+extern void regparse(const char *, size_t, struct regexp *);
+
+/* Analyze a parsed regexp; second argument tells whether to build a searching
+ or an exact matcher. */
+extern void reganalyze(struct regexp *, int);
+
+/* Compute, for each possible character, the transitions out of a given
+ state, storing them in an array of integers. */
+extern void regstate(int, struct regexp *, int []);
+
+/* Error handling. */
+
+/* Regerror() is called by the regexp routines whenever an error occurs. It
+ takes a single argument, a NUL-terminated string describing the error.
+ The default reg_error() prints the error message to stderr and exits.
+ The user can provide a different reg_free() if so desired. */
+extern void reg_error(const char *);
+
+#else /* ! __STDC__ */
+extern void regsyntax(), regcompile(), reg_free(), reginit(), regparse();
+extern void reganalyze(), regstate(), reg_error();
+extern char *regexecute();
+#endif
diff --git a/gnu/usr.bin/awk/eval.c b/gnu/usr.bin/awk/eval.c
new file mode 100644
index 000000000000..f640f3733ada
--- /dev/null
+++ b/gnu/usr.bin/awk/eval.c
@@ -0,0 +1,1225 @@
+/*
+ * eval.c - gawk parse tree interpreter
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+extern double pow P((double x, double y));
+extern double modf P((double x, double *yp));
+extern double fmod P((double x, double y));
+
+static int eval_condition P((NODE *tree));
+static NODE *op_assign P((NODE *tree));
+static NODE *func_call P((NODE *name, NODE *arg_list));
+static NODE *match_op P((NODE *tree));
+
+NODE *_t; /* used as a temporary in macros */
+#ifdef MSDOS
+double _msc51bug; /* to get around a bug in MSC 5.1 */
+#endif
+NODE *ret_node;
+int OFSlen;
+int ORSlen;
+int OFMTidx;
+int CONVFMTidx;
+
+/* Macros and variables to save and restore function and loop bindings */
+/*
+ * the val variable allows return/continue/break-out-of-context to be
+ * caught and diagnosed
+ */
+#define PUSH_BINDING(stack, x, val) (memcpy ((char *)(stack), (char *)(x), sizeof (jmp_buf)), val++)
+#define RESTORE_BINDING(stack, x, val) (memcpy ((char *)(x), (char *)(stack), sizeof (jmp_buf)), val--)
+
+static jmp_buf loop_tag; /* always the current binding */
+static int loop_tag_valid = 0; /* nonzero when loop_tag valid */
+static int func_tag_valid = 0;
+static jmp_buf func_tag;
+extern int exiting, exit_val;
+
+/*
+ * This table is used by the regexp routines to do case independant
+ * matching. Basically, every ascii character maps to itself, except
+ * uppercase letters map to lower case ones. This table has 256
+ * entries, which may be overkill. Note also that if the system this
+ * is compiled on doesn't use 7-bit ascii, casetable[] should not be
+ * defined to the linker, so gawk should not load.
+ *
+ * Do NOT make this array static, it is used in several spots, not
+ * just in this file.
+ */
+#if 'a' == 97 /* it's ascii */
+char casetable[] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ /* ' ' '!' '"' '#' '$' '%' '&' ''' */
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ /* '(' ')' '*' '+' ',' '-' '.' '/' */
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ /* '0' '1' '2' '3' '4' '5' '6' '7' */
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ /* '8' '9' ':' ';' '<' '=' '>' '?' */
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ /* 'x' 'y' 'z' '{' '|' '}' '~' */
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+#else
+#include "You lose. You will need a translation table for your character set."
+#endif
+
+/*
+ * Tree is a bunch of rules to run. Returns zero if it hit an exit()
+ * statement
+ */
+int
+interpret(tree)
+register NODE *volatile tree;
+{
+ jmp_buf volatile loop_tag_stack; /* shallow binding stack for loop_tag */
+ static jmp_buf rule_tag; /* tag the rule currently being run, for NEXT
+ * and EXIT statements. It is static because
+ * there are no nested rules */
+ register NODE *volatile t = NULL; /* temporary */
+ NODE **volatile lhs; /* lhs == Left Hand Side for assigns, etc */
+ NODE *volatile stable_tree;
+ int volatile traverse = 1; /* True => loop thru tree (Node_rule_list) */
+
+ if (tree == NULL)
+ return 1;
+ sourceline = tree->source_line;
+ source = tree->source_file;
+ switch (tree->type) {
+ case Node_rule_node:
+ traverse = 0; /* False => one for-loop iteration only */
+ /* FALL THROUGH */
+ case Node_rule_list:
+ for (t = tree; t != NULL; t = t->rnode) {
+ if (traverse)
+ tree = t->lnode;
+ sourceline = tree->source_line;
+ source = tree->source_file;
+ switch (setjmp(rule_tag)) {
+ case 0: /* normal non-jump */
+ /* test pattern, if any */
+ if (tree->lnode == NULL ||
+ eval_condition(tree->lnode))
+ (void) interpret(tree->rnode);
+ break;
+ case TAG_CONTINUE: /* NEXT statement */
+ return 1;
+ case TAG_BREAK:
+ return 0;
+ default:
+ cant_happen();
+ }
+ if (!traverse) /* case Node_rule_node */
+ break; /* don't loop */
+ }
+ break;
+
+ case Node_statement_list:
+ for (t = tree; t != NULL; t = t->rnode)
+ (void) interpret(t->lnode);
+ break;
+
+ case Node_K_if:
+ if (eval_condition(tree->lnode)) {
+ (void) interpret(tree->rnode->lnode);
+ } else {
+ (void) interpret(tree->rnode->rnode);
+ }
+ break;
+
+ case Node_K_while:
+ PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+
+ stable_tree = tree;
+ while (eval_condition(stable_tree->lnode)) {
+ switch (setjmp(loop_tag)) {
+ case 0: /* normal non-jump */
+ (void) interpret(stable_tree->rnode);
+ break;
+ case TAG_CONTINUE: /* continue statement */
+ break;
+ case TAG_BREAK: /* break statement */
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ return 1;
+ default:
+ cant_happen();
+ }
+ }
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ break;
+
+ case Node_K_do:
+ PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ stable_tree = tree;
+ do {
+ switch (setjmp(loop_tag)) {
+ case 0: /* normal non-jump */
+ (void) interpret(stable_tree->rnode);
+ break;
+ case TAG_CONTINUE: /* continue statement */
+ break;
+ case TAG_BREAK: /* break statement */
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ return 1;
+ default:
+ cant_happen();
+ }
+ } while (eval_condition(stable_tree->lnode));
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ break;
+
+ case Node_K_for:
+ PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ (void) interpret(tree->forloop->init);
+ stable_tree = tree;
+ while (eval_condition(stable_tree->forloop->cond)) {
+ switch (setjmp(loop_tag)) {
+ case 0: /* normal non-jump */
+ (void) interpret(stable_tree->lnode);
+ /* fall through */
+ case TAG_CONTINUE: /* continue statement */
+ (void) interpret(stable_tree->forloop->incr);
+ break;
+ case TAG_BREAK: /* break statement */
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ return 1;
+ default:
+ cant_happen();
+ }
+ }
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ break;
+
+ case Node_K_arrayfor:
+ {
+ volatile struct search l; /* For array_for */
+ Func_ptr after_assign = NULL;
+
+#define hakvar forloop->init
+#define arrvar forloop->incr
+ PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ lhs = get_lhs(tree->hakvar, &after_assign);
+ t = tree->arrvar;
+ if (t->type == Node_param_list)
+ t = stack_ptr[t->param_cnt];
+ stable_tree = tree;
+ for (assoc_scan(t, (struct search *)&l);
+ l.retval;
+ assoc_next((struct search *)&l)) {
+ unref(*((NODE **) lhs));
+ *lhs = dupnode(l.retval);
+ if (after_assign)
+ (*after_assign)();
+ switch (setjmp(loop_tag)) {
+ case 0:
+ (void) interpret(stable_tree->lnode);
+ case TAG_CONTINUE:
+ break;
+
+ case TAG_BREAK:
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ return 1;
+ default:
+ cant_happen();
+ }
+ }
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ break;
+ }
+
+ case Node_K_break:
+ if (loop_tag_valid == 0)
+ fatal("unexpected break");
+ longjmp(loop_tag, TAG_BREAK);
+ break;
+
+ case Node_K_continue:
+ if (loop_tag_valid == 0) {
+ /*
+ * AT&T nawk treats continue outside of loops like
+ * next. Allow it if not posix, and complain if
+ * lint.
+ */
+ static int warned = 0;
+
+ if (do_lint && ! warned) {
+ warning("use of `continue' outside of loop is not portable");
+ warned = 1;
+ }
+ if (do_posix)
+ fatal("use of `continue' outside of loop is not allowed");
+ longjmp(rule_tag, TAG_CONTINUE);
+ } else
+ longjmp(loop_tag, TAG_CONTINUE);
+ break;
+
+ case Node_K_print:
+ do_print(tree);
+ break;
+
+ case Node_K_printf:
+ do_printf(tree);
+ break;
+
+ case Node_K_delete:
+ do_delete(tree->lnode, tree->rnode);
+ break;
+
+ case Node_K_next:
+ longjmp(rule_tag, TAG_CONTINUE);
+ break;
+
+ case Node_K_nextfile:
+ do_nextfile();
+ break;
+
+ case Node_K_exit:
+ /*
+ * In A,K,&W, p. 49, it says that an exit statement "...
+ * causes the program to behave as if the end of input had
+ * occurred; no more input is read, and the END actions, if
+ * any are executed." This implies that the rest of the rules
+ * are not done. So we immediately break out of the main loop.
+ */
+ exiting = 1;
+ if (tree) {
+ t = tree_eval(tree->lnode);
+ exit_val = (int) force_number(t);
+ }
+ free_temp(t);
+ longjmp(rule_tag, TAG_BREAK);
+ break;
+
+ case Node_K_return:
+ t = tree_eval(tree->lnode);
+ ret_node = dupnode(t);
+ free_temp(t);
+ longjmp(func_tag, TAG_RETURN);
+ break;
+
+ default:
+ /*
+ * Appears to be an expression statement. Throw away the
+ * value.
+ */
+ if (do_lint && tree->type == Node_var)
+ warning("statement has no effect");
+ t = tree_eval(tree);
+ free_temp(t);
+ break;
+ }
+ return 1;
+}
+
+/* evaluate a subtree */
+
+NODE *
+r_tree_eval(tree)
+register NODE *tree;
+{
+ register NODE *r, *t1, *t2; /* return value & temporary subtrees */
+ register NODE **lhs;
+ register int di;
+ AWKNUM x, x1, x2;
+ long lx;
+#ifdef CRAY
+ long lx2;
+#endif
+
+#ifdef DEBUG
+ if (tree == NULL)
+ return Nnull_string;
+ if (tree->type == Node_val) {
+ if (tree->stref <= 0) cant_happen();
+ return tree;
+ }
+ if (tree->type == Node_var) {
+ if (tree->var_value->stref <= 0) cant_happen();
+ return tree->var_value;
+ }
+ if (tree->type == Node_param_list) {
+ if (stack_ptr[tree->param_cnt] == NULL)
+ return Nnull_string;
+ else
+ return stack_ptr[tree->param_cnt]->var_value;
+ }
+#endif
+ switch (tree->type) {
+ case Node_and:
+ return tmp_number((AWKNUM) (eval_condition(tree->lnode)
+ && eval_condition(tree->rnode)));
+
+ case Node_or:
+ return tmp_number((AWKNUM) (eval_condition(tree->lnode)
+ || eval_condition(tree->rnode)));
+
+ case Node_not:
+ return tmp_number((AWKNUM) ! eval_condition(tree->lnode));
+
+ /* Builtins */
+ case Node_builtin:
+ return ((*tree->proc) (tree->subnode));
+
+ case Node_K_getline:
+ return (do_getline(tree));
+
+ case Node_in_array:
+ return tmp_number((AWKNUM) in_array(tree->lnode, tree->rnode));
+
+ case Node_func_call:
+ return func_call(tree->rnode, tree->lnode);
+
+ /* unary operations */
+ case Node_NR:
+ case Node_FNR:
+ case Node_NF:
+ case Node_FIELDWIDTHS:
+ case Node_FS:
+ case Node_RS:
+ case Node_field_spec:
+ case Node_subscript:
+ case Node_IGNORECASE:
+ case Node_OFS:
+ case Node_ORS:
+ case Node_OFMT:
+ case Node_CONVFMT:
+ lhs = get_lhs(tree, (Func_ptr *)0);
+ return *lhs;
+
+ case Node_var_array:
+ fatal("attempt to use an array in a scalar context");
+
+ case Node_unary_minus:
+ t1 = tree_eval(tree->subnode);
+ x = -force_number(t1);
+ free_temp(t1);
+ return tmp_number(x);
+
+ case Node_cond_exp:
+ if (eval_condition(tree->lnode))
+ return tree_eval(tree->rnode->lnode);
+ return tree_eval(tree->rnode->rnode);
+
+ case Node_match:
+ case Node_nomatch:
+ case Node_regex:
+ return match_op(tree);
+
+ case Node_func:
+ fatal("function `%s' called with space between name and (,\n%s",
+ tree->lnode->param,
+ "or used in other expression context");
+
+ /* assignments */
+ case Node_assign:
+ {
+ Func_ptr after_assign = NULL;
+
+ r = tree_eval(tree->rnode);
+ lhs = get_lhs(tree->lnode, &after_assign);
+ if (r != *lhs) {
+ NODE *save;
+
+ save = *lhs;
+ *lhs = dupnode(r);
+ unref(save);
+ }
+ free_temp(r);
+ if (after_assign)
+ (*after_assign)();
+ return *lhs;
+ }
+
+ case Node_concat:
+ {
+#define STACKSIZE 10
+ NODE *stack[STACKSIZE];
+ register NODE **sp;
+ register int len;
+ char *str;
+ register char *dest;
+
+ sp = stack;
+ len = 0;
+ while (tree->type == Node_concat) {
+ *sp = force_string(tree_eval(tree->lnode));
+ tree = tree->rnode;
+ len += (*sp)->stlen;
+ if (++sp == &stack[STACKSIZE-2]) /* one more and NULL */
+ break;
+ }
+ *sp = force_string(tree_eval(tree));
+ len += (*sp)->stlen;
+ *++sp = NULL;
+ emalloc(str, char *, len+2, "tree_eval");
+ dest = str;
+ sp = stack;
+ while (*sp) {
+ memcpy(dest, (*sp)->stptr, (*sp)->stlen);
+ dest += (*sp)->stlen;
+ free_temp(*sp);
+ sp++;
+ }
+ r = make_str_node(str, len, ALREADY_MALLOCED);
+ r->flags |= TEMP;
+ }
+ return r;
+
+ /* other assignment types are easier because they are numeric */
+ case Node_preincrement:
+ case Node_predecrement:
+ case Node_postincrement:
+ case Node_postdecrement:
+ case Node_assign_exp:
+ case Node_assign_times:
+ case Node_assign_quotient:
+ case Node_assign_mod:
+ case Node_assign_plus:
+ case Node_assign_minus:
+ return op_assign(tree);
+ default:
+ break; /* handled below */
+ }
+
+ /* evaluate subtrees in order to do binary operation, then keep going */
+ t1 = tree_eval(tree->lnode);
+ t2 = tree_eval(tree->rnode);
+
+ switch (tree->type) {
+ case Node_geq:
+ case Node_leq:
+ case Node_greater:
+ case Node_less:
+ case Node_notequal:
+ case Node_equal:
+ di = cmp_nodes(t1, t2);
+ free_temp(t1);
+ free_temp(t2);
+ switch (tree->type) {
+ case Node_equal:
+ return tmp_number((AWKNUM) (di == 0));
+ case Node_notequal:
+ return tmp_number((AWKNUM) (di != 0));
+ case Node_less:
+ return tmp_number((AWKNUM) (di < 0));
+ case Node_greater:
+ return tmp_number((AWKNUM) (di > 0));
+ case Node_leq:
+ return tmp_number((AWKNUM) (di <= 0));
+ case Node_geq:
+ return tmp_number((AWKNUM) (di >= 0));
+ default:
+ cant_happen();
+ }
+ break;
+ default:
+ break; /* handled below */
+ }
+
+ x1 = force_number(t1);
+ free_temp(t1);
+ x2 = force_number(t2);
+ free_temp(t2);
+ switch (tree->type) {
+ case Node_exp:
+ if ((lx = x2) == x2 && lx >= 0) { /* integer exponent */
+ if (lx == 0)
+ x = 1;
+ else if (lx == 1)
+ x = x1;
+ else {
+ /* doing it this way should be more precise */
+ for (x = x1; --lx; )
+ x *= x1;
+ }
+ } else
+ x = pow((double) x1, (double) x2);
+ return tmp_number(x);
+
+ case Node_times:
+ return tmp_number(x1 * x2);
+
+ case Node_quotient:
+ if (x2 == 0)
+ fatal("division by zero attempted");
+#ifdef _CRAY
+ /*
+ * special case for integer division, put in for Cray
+ */
+ lx2 = x2;
+ if (lx2 == 0)
+ return tmp_number(x1 / x2);
+ lx = (long) x1 / lx2;
+ if (lx * x2 == x1)
+ return tmp_number((AWKNUM) lx);
+ else
+#endif
+ return tmp_number(x1 / x2);
+
+ case Node_mod:
+ if (x2 == 0)
+ fatal("division by zero attempted in mod");
+#ifndef FMOD_MISSING
+ return tmp_number(fmod (x1, x2));
+#else
+ (void) modf(x1 / x2, &x);
+ return tmp_number(x1 - x * x2);
+#endif
+
+ case Node_plus:
+ return tmp_number(x1 + x2);
+
+ case Node_minus:
+ return tmp_number(x1 - x2);
+
+ case Node_var_array:
+ fatal("attempt to use an array in a scalar context");
+
+ default:
+ fatal("illegal type (%d) in tree_eval", tree->type);
+ }
+ return 0;
+}
+
+/* Is TREE true or false? Returns 0==false, non-zero==true */
+static int
+eval_condition(tree)
+register NODE *tree;
+{
+ register NODE *t1;
+ register int ret;
+
+ if (tree == NULL) /* Null trees are the easiest kinds */
+ return 1;
+ if (tree->type == Node_line_range) {
+ /*
+ * Node_line_range is kind of like Node_match, EXCEPT: the
+ * lnode field (more properly, the condpair field) is a node
+ * of a Node_cond_pair; whether we evaluate the lnode of that
+ * node or the rnode depends on the triggered word. More
+ * precisely: if we are not yet triggered, we tree_eval the
+ * lnode; if that returns true, we set the triggered word.
+ * If we are triggered (not ELSE IF, note), we tree_eval the
+ * rnode, clear triggered if it succeeds, and perform our
+ * action (regardless of success or failure). We want to be
+ * able to begin and end on a single input record, so this
+ * isn't an ELSE IF, as noted above.
+ */
+ if (!tree->triggered)
+ if (!eval_condition(tree->condpair->lnode))
+ return 0;
+ else
+ tree->triggered = 1;
+ /* Else we are triggered */
+ if (eval_condition(tree->condpair->rnode))
+ tree->triggered = 0;
+ return 1;
+ }
+
+ /*
+ * Could just be J.random expression. in which case, null and 0 are
+ * false, anything else is true
+ */
+
+ t1 = tree_eval(tree);
+ if (t1->flags & MAYBE_NUM)
+ (void) force_number(t1);
+ if (t1->flags & NUMBER)
+ ret = t1->numbr != 0.0;
+ else
+ ret = t1->stlen != 0;
+ free_temp(t1);
+ return ret;
+}
+
+/*
+ * compare two nodes, returning negative, 0, positive
+ */
+int
+cmp_nodes(t1, t2)
+register NODE *t1, *t2;
+{
+ register int ret;
+ register int len1, len2;
+
+ if (t1 == t2)
+ return 0;
+ if (t1->flags & MAYBE_NUM)
+ (void) force_number(t1);
+ if (t2->flags & MAYBE_NUM)
+ (void) force_number(t2);
+ if ((t1->flags & NUMBER) && (t2->flags & NUMBER)) {
+ if (t1->numbr == t2->numbr) return 0;
+ else if (t1->numbr - t2->numbr < 0) return -1;
+ else return 1;
+ }
+ (void) force_string(t1);
+ (void) force_string(t2);
+ len1 = t1->stlen;
+ len2 = t2->stlen;
+ if (len1 == 0 || len2 == 0)
+ return len1 - len2;
+ ret = memcmp(t1->stptr, t2->stptr, len1 <= len2 ? len1 : len2);
+ return ret == 0 ? len1-len2 : ret;
+}
+
+static NODE *
+op_assign(tree)
+register NODE *tree;
+{
+ AWKNUM rval, lval;
+ NODE **lhs;
+ AWKNUM t1, t2;
+ long ltemp;
+ NODE *tmp;
+ Func_ptr after_assign = NULL;
+
+ lhs = get_lhs(tree->lnode, &after_assign);
+ lval = force_number(*lhs);
+
+ /*
+ * Can't unref *lhs until we know the type; doing so
+ * too early breaks x += x sorts of things.
+ */
+ switch(tree->type) {
+ case Node_preincrement:
+ case Node_predecrement:
+ unref(*lhs);
+ *lhs = make_number(lval +
+ (tree->type == Node_preincrement ? 1.0 : -1.0));
+ if (after_assign)
+ (*after_assign)();
+ return *lhs;
+
+ case Node_postincrement:
+ case Node_postdecrement:
+ unref(*lhs);
+ *lhs = make_number(lval +
+ (tree->type == Node_postincrement ? 1.0 : -1.0));
+ if (after_assign)
+ (*after_assign)();
+ return tmp_number(lval);
+ default:
+ break; /* handled below */
+ }
+
+ tmp = tree_eval(tree->rnode);
+ rval = force_number(tmp);
+ free_temp(tmp);
+ unref(*lhs);
+ switch(tree->type) {
+ case Node_assign_exp:
+ if ((ltemp = rval) == rval) { /* integer exponent */
+ if (ltemp == 0)
+ *lhs = make_number((AWKNUM) 1);
+ else if (ltemp == 1)
+ *lhs = make_number(lval);
+ else {
+ /* doing it this way should be more precise */
+ for (t1 = t2 = lval; --ltemp; )
+ t1 *= t2;
+ *lhs = make_number(t1);
+ }
+ } else
+ *lhs = make_number((AWKNUM) pow((double) lval, (double) rval));
+ break;
+
+ case Node_assign_times:
+ *lhs = make_number(lval * rval);
+ break;
+
+ case Node_assign_quotient:
+ if (rval == (AWKNUM) 0)
+ fatal("division by zero attempted in /=");
+#ifdef _CRAY
+ /*
+ * special case for integer division, put in for Cray
+ */
+ ltemp = rval;
+ if (ltemp == 0) {
+ *lhs = make_number(lval / rval);
+ break;
+ }
+ ltemp = (long) lval / ltemp;
+ if (ltemp * lval == rval)
+ *lhs = make_number((AWKNUM) ltemp);
+ else
+#endif
+ *lhs = make_number(lval / rval);
+ break;
+
+ case Node_assign_mod:
+ if (rval == (AWKNUM) 0)
+ fatal("division by zero attempted in %=");
+#ifndef FMOD_MISSING
+ *lhs = make_number(fmod(lval, rval));
+#else
+ (void) modf(lval / rval, &t1);
+ t2 = lval - rval * t1;
+ *lhs = make_number(t2);
+#endif
+ break;
+
+ case Node_assign_plus:
+ *lhs = make_number(lval + rval);
+ break;
+
+ case Node_assign_minus:
+ *lhs = make_number(lval - rval);
+ break;
+ default:
+ cant_happen();
+ }
+ if (after_assign)
+ (*after_assign)();
+ return *lhs;
+}
+
+NODE **stack_ptr;
+
+static NODE *
+func_call(name, arg_list)
+NODE *name; /* name is a Node_val giving function name */
+NODE *arg_list; /* Node_expression_list of calling args. */
+{
+ register NODE *arg, *argp, *r;
+ NODE *n, *f;
+ jmp_buf volatile func_tag_stack;
+ jmp_buf volatile loop_tag_stack;
+ int volatile save_loop_tag_valid = 0;
+ NODE **volatile save_stack, *save_ret_node;
+ NODE **volatile local_stack = NULL, **sp;
+ int count;
+ extern NODE *ret_node;
+
+ /*
+ * retrieve function definition node
+ */
+ f = lookup(name->stptr);
+ if (!f || f->type != Node_func)
+ fatal("function `%s' not defined", name->stptr);
+#ifdef FUNC_TRACE
+ fprintf(stderr, "function %s called\n", name->stptr);
+#endif
+ count = f->lnode->param_cnt;
+ if (count)
+ emalloc(local_stack, NODE **, count*sizeof(NODE *), "func_call");
+ sp = local_stack;
+
+ /*
+ * for each calling arg. add NODE * on stack
+ */
+ for (argp = arg_list; count && argp != NULL; argp = argp->rnode) {
+ arg = argp->lnode;
+ getnode(r);
+ r->type = Node_var;
+ /*
+ * call by reference for arrays; see below also
+ */
+ if (arg->type == Node_param_list)
+ arg = stack_ptr[arg->param_cnt];
+ if (arg->type == Node_var_array)
+ *r = *arg;
+ else {
+ n = tree_eval(arg);
+ r->lnode = dupnode(n);
+ r->rnode = (NODE *) NULL;
+ free_temp(n);
+ }
+ *sp++ = r;
+ count--;
+ }
+ if (argp != NULL) /* left over calling args. */
+ warning(
+ "function `%s' called with more arguments than declared",
+ name->stptr);
+ /*
+ * add remaining params. on stack with null value
+ */
+ while (count-- > 0) {
+ getnode(r);
+ r->type = Node_var;
+ r->lnode = Nnull_string;
+ r->rnode = (NODE *) NULL;
+ *sp++ = r;
+ }
+
+ /*
+ * Execute function body, saving context, as a return statement
+ * will longjmp back here.
+ *
+ * Have to save and restore the loop_tag stuff so that a return
+ * inside a loop in a function body doesn't scrog any loops going
+ * on in the main program. We save the necessary info in variables
+ * local to this function so that function nesting works OK.
+ * We also only bother to save the loop stuff if we're in a loop
+ * when the function is called.
+ */
+ if (loop_tag_valid) {
+ int junk = 0;
+
+ save_loop_tag_valid = (volatile int) loop_tag_valid;
+ PUSH_BINDING(loop_tag_stack, loop_tag, junk);
+ loop_tag_valid = 0;
+ }
+ save_stack = stack_ptr;
+ stack_ptr = local_stack;
+ PUSH_BINDING(func_tag_stack, func_tag, func_tag_valid);
+ save_ret_node = ret_node;
+ ret_node = Nnull_string; /* default return value */
+ if (setjmp(func_tag) == 0)
+ (void) interpret(f->rnode);
+
+ r = ret_node;
+ ret_node = (NODE *) save_ret_node;
+ RESTORE_BINDING(func_tag_stack, func_tag, func_tag_valid);
+ stack_ptr = (NODE **) save_stack;
+
+ /*
+ * here, we pop each parameter and check whether
+ * it was an array. If so, and if the arg. passed in was
+ * a simple variable, then the value should be copied back.
+ * This achieves "call-by-reference" for arrays.
+ */
+ sp = local_stack;
+ count = f->lnode->param_cnt;
+ for (argp = arg_list; count > 0 && argp != NULL; argp = argp->rnode) {
+ arg = argp->lnode;
+ if (arg->type == Node_param_list)
+ arg = stack_ptr[arg->param_cnt];
+ n = *sp++;
+ if (arg->type == Node_var && n->type == Node_var_array) {
+ /* should we free arg->var_value ? */
+ arg->var_array = n->var_array;
+ arg->type = Node_var_array;
+ }
+ unref(n->lnode);
+ freenode(n);
+ count--;
+ }
+ while (count-- > 0) {
+ n = *sp++;
+ /* if n is an (local) array, all the elements should be freed */
+ if (n->type == Node_var_array) {
+ assoc_clear(n);
+ free(n->var_array);
+ }
+ unref(n->lnode);
+ freenode(n);
+ }
+ if (local_stack)
+ free((char *) local_stack);
+
+ /* Restore the loop_tag stuff if necessary. */
+ if (save_loop_tag_valid) {
+ int junk = 0;
+
+ loop_tag_valid = (int) save_loop_tag_valid;
+ RESTORE_BINDING(loop_tag_stack, loop_tag, junk);
+ }
+
+ if (!(r->flags & PERM))
+ r->flags |= TEMP;
+ return r;
+}
+
+/*
+ * This returns a POINTER to a node pointer. get_lhs(ptr) is the current
+ * value of the var, or where to store the var's new value
+ */
+
+NODE **
+get_lhs(ptr, assign)
+register NODE *ptr;
+Func_ptr *assign;
+{
+ register NODE **aptr = NULL;
+ register NODE *n;
+
+ switch (ptr->type) {
+ case Node_var_array:
+ fatal("attempt to use an array in a scalar context");
+ case Node_var:
+ aptr = &(ptr->var_value);
+#ifdef DEBUG
+ if (ptr->var_value->stref <= 0)
+ cant_happen();
+#endif
+ break;
+
+ case Node_FIELDWIDTHS:
+ aptr = &(FIELDWIDTHS_node->var_value);
+ if (assign)
+ *assign = set_FIELDWIDTHS;
+ break;
+
+ case Node_RS:
+ aptr = &(RS_node->var_value);
+ if (assign)
+ *assign = set_RS;
+ break;
+
+ case Node_FS:
+ aptr = &(FS_node->var_value);
+ if (assign)
+ *assign = set_FS;
+ break;
+
+ case Node_FNR:
+ unref(FNR_node->var_value);
+ FNR_node->var_value = make_number((AWKNUM) FNR);
+ aptr = &(FNR_node->var_value);
+ if (assign)
+ *assign = set_FNR;
+ break;
+
+ case Node_NR:
+ unref(NR_node->var_value);
+ NR_node->var_value = make_number((AWKNUM) NR);
+ aptr = &(NR_node->var_value);
+ if (assign)
+ *assign = set_NR;
+ break;
+
+ case Node_NF:
+ if (NF == -1)
+ (void) get_field(HUGE-1, assign); /* parse record */
+ unref(NF_node->var_value);
+ NF_node->var_value = make_number((AWKNUM) NF);
+ aptr = &(NF_node->var_value);
+ if (assign)
+ *assign = set_NF;
+ break;
+
+ case Node_IGNORECASE:
+ unref(IGNORECASE_node->var_value);
+ IGNORECASE_node->var_value = make_number((AWKNUM) IGNORECASE);
+ aptr = &(IGNORECASE_node->var_value);
+ if (assign)
+ *assign = set_IGNORECASE;
+ break;
+
+ case Node_OFMT:
+ aptr = &(OFMT_node->var_value);
+ if (assign)
+ *assign = set_OFMT;
+ break;
+
+ case Node_CONVFMT:
+ aptr = &(CONVFMT_node->var_value);
+ if (assign)
+ *assign = set_CONVFMT;
+ break;
+
+ case Node_ORS:
+ aptr = &(ORS_node->var_value);
+ if (assign)
+ *assign = set_ORS;
+ break;
+
+ case Node_OFS:
+ aptr = &(OFS_node->var_value);
+ if (assign)
+ *assign = set_OFS;
+ break;
+
+ case Node_param_list:
+ aptr = &(stack_ptr[ptr->param_cnt]->var_value);
+ break;
+
+ case Node_field_spec:
+ {
+ int field_num;
+
+ n = tree_eval(ptr->lnode);
+ field_num = (int) force_number(n);
+ free_temp(n);
+ if (field_num < 0)
+ fatal("attempt to access field %d", field_num);
+ if (field_num == 0 && field0_valid) { /* short circuit */
+ aptr = &fields_arr[0];
+ if (assign)
+ *assign = reset_record;
+ break;
+ }
+ aptr = get_field(field_num, assign);
+ break;
+ }
+ case Node_subscript:
+ n = ptr->lnode;
+ if (n->type == Node_param_list)
+ n = stack_ptr[n->param_cnt];
+ aptr = assoc_lookup(n, concat_exp(ptr->rnode));
+ break;
+
+ case Node_func:
+ fatal ("`%s' is a function, assignment is not allowed",
+ ptr->lnode->param);
+ default:
+ cant_happen();
+ }
+ return aptr;
+}
+
+static NODE *
+match_op(tree)
+register NODE *tree;
+{
+ register NODE *t1;
+ register Regexp *rp;
+ int i;
+ int match = 1;
+
+ if (tree->type == Node_nomatch)
+ match = 0;
+ if (tree->type == Node_regex)
+ t1 = *get_field(0, (Func_ptr *) 0);
+ else {
+ t1 = force_string(tree_eval(tree->lnode));
+ tree = tree->rnode;
+ }
+ rp = re_update(tree);
+ i = research(rp, t1->stptr, 0, t1->stlen, 0);
+ i = (i == -1) ^ (match == 1);
+ free_temp(t1);
+ return tmp_number((AWKNUM) i);
+}
+
+void
+set_IGNORECASE()
+{
+ static int warned = 0;
+
+ if ((do_lint || do_unix) && ! warned) {
+ warned = 1;
+ warning("IGNORECASE not supported in compatibility mode");
+ }
+ IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0);
+ set_FS();
+}
+
+void
+set_OFS()
+{
+ OFS = force_string(OFS_node->var_value)->stptr;
+ OFSlen = OFS_node->var_value->stlen;
+ OFS[OFSlen] = '\0';
+}
+
+void
+set_ORS()
+{
+ ORS = force_string(ORS_node->var_value)->stptr;
+ ORSlen = ORS_node->var_value->stlen;
+ ORS[ORSlen] = '\0';
+}
+
+static NODE **fmt_list = NULL;
+static int fmt_ok P((NODE *n));
+static int fmt_index P((NODE *n));
+
+static int
+fmt_ok(n)
+NODE *n;
+{
+ /* to be done later */
+ return 1;
+}
+
+static int
+fmt_index(n)
+NODE *n;
+{
+ register int ix = 0;
+ static int fmt_num = 4;
+ static int fmt_hiwater = 0;
+
+ if (fmt_list == NULL)
+ emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index");
+ (void) force_string(n);
+ while (ix < fmt_hiwater) {
+ if (cmp_nodes(fmt_list[ix], n) == 0)
+ return ix;
+ ix++;
+ }
+ /* not found */
+ n->stptr[n->stlen] = '\0';
+ if (!fmt_ok(n))
+ warning("bad FMT specification");
+ if (fmt_hiwater >= fmt_num) {
+ fmt_num *= 2;
+ emalloc(fmt_list, NODE **, fmt_num, "fmt_index");
+ }
+ fmt_list[fmt_hiwater] = dupnode(n);
+ return fmt_hiwater++;
+}
+
+void
+set_OFMT()
+{
+ OFMTidx = fmt_index(OFMT_node->var_value);
+ OFMT = fmt_list[OFMTidx]->stptr;
+}
+
+void
+set_CONVFMT()
+{
+ CONVFMTidx = fmt_index(CONVFMT_node->var_value);
+ CONVFMT = fmt_list[CONVFMTidx]->stptr;
+}
diff --git a/gnu/usr.bin/awk/field.c b/gnu/usr.bin/awk/field.c
new file mode 100644
index 000000000000..d8f9a5455631
--- /dev/null
+++ b/gnu/usr.bin/awk/field.c
@@ -0,0 +1,645 @@
+/*
+ * field.c - routines for dealing with fields and record parsing
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+static int (*parse_field) P((int, char **, int, NODE *,
+ Regexp *, void (*)(), NODE *));
+static void rebuild_record P((void));
+static int re_parse_field P((int, char **, int, NODE *,
+ Regexp *, void (*)(), NODE *));
+static int def_parse_field P((int, char **, int, NODE *,
+ Regexp *, void (*)(), NODE *));
+static int sc_parse_field P((int, char **, int, NODE *,
+ Regexp *, void (*)(), NODE *));
+static int fw_parse_field P((int, char **, int, NODE *,
+ Regexp *, void (*)(), NODE *));
+static void set_element P((int, char *, int, NODE *));
+static void grow_fields_arr P((int num));
+static void set_field P((int num, char *str, int len, NODE *dummy));
+
+
+static Regexp *FS_regexp = NULL;
+static char *parse_extent; /* marks where to restart parse of record */
+static int parse_high_water=0; /* field number that we have parsed so far */
+static int nf_high_water = 0; /* size of fields_arr */
+static int resave_fs;
+static NODE *save_FS; /* save current value of FS when line is read,
+ * to be used in deferred parsing
+ */
+
+NODE **fields_arr; /* array of pointers to the field nodes */
+int field0_valid; /* $(>0) has not been changed yet */
+int default_FS;
+static NODE **nodes; /* permanent repository of field nodes */
+static int *FIELDWIDTHS = NULL;
+
+void
+init_fields()
+{
+ NODE *n;
+
+ emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
+ emalloc(nodes, NODE **, sizeof(NODE *), "init_fields");
+ getnode(n);
+ *n = *Nnull_string;
+ fields_arr[0] = nodes[0] = n;
+ parse_extent = fields_arr[0]->stptr;
+ save_FS = dupnode(FS_node->var_value);
+ field0_valid = 1;
+}
+
+
+static void
+grow_fields_arr(num)
+int num;
+{
+ register int t;
+ register NODE *n;
+
+ erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field");
+ erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "set_field");
+ for (t = nf_high_water+1; t <= num; t++) {
+ getnode(n);
+ *n = *Nnull_string;
+ fields_arr[t] = nodes[t] = n;
+ }
+ nf_high_water = num;
+}
+
+/*ARGSUSED*/
+static void
+set_field(num, str, len, dummy)
+int num;
+char *str;
+int len;
+NODE *dummy; /* not used -- just to make interface same as set_element */
+{
+ register NODE *n;
+
+ if (num > nf_high_water)
+ grow_fields_arr(num);
+ n = nodes[num];
+ n->stptr = str;
+ n->stlen = len;
+ n->flags = (PERM|STR|STRING|MAYBE_NUM);
+ fields_arr[num] = n;
+}
+
+/* Someone assigned a value to $(something). Fix up $0 to be right */
+static void
+rebuild_record()
+{
+ register int tlen;
+ register NODE *tmp;
+ NODE *ofs;
+ char *ops;
+ register char *cops;
+ register NODE **ptr;
+ register int ofslen;
+
+ tlen = 0;
+ ofs = force_string(OFS_node->var_value);
+ ofslen = ofs->stlen;
+ ptr = &fields_arr[NF];
+ while (ptr > &fields_arr[0]) {
+ tmp = force_string(*ptr);
+ tlen += tmp->stlen;
+ ptr--;
+ }
+ tlen += (NF - 1) * ofslen;
+ if (tlen < 0)
+ tlen = 0;
+ emalloc(ops, char *, tlen + 2, "fix_fields");
+ cops = ops;
+ ops[0] = '\0';
+ for (ptr = &fields_arr[1]; ptr <= &fields_arr[NF]; ptr++) {
+ tmp = *ptr;
+ if (tmp->stlen == 1)
+ *cops++ = tmp->stptr[0];
+ else if (tmp->stlen != 0) {
+ memcpy(cops, tmp->stptr, tmp->stlen);
+ cops += tmp->stlen;
+ }
+ if (ptr != &fields_arr[NF]) {
+ if (ofslen == 1)
+ *cops++ = ofs->stptr[0];
+ else if (ofslen != 0) {
+ memcpy(cops, ofs->stptr, ofslen);
+ cops += ofslen;
+ }
+ }
+ }
+ tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
+ unref(fields_arr[0]);
+ fields_arr[0] = tmp;
+ field0_valid = 1;
+}
+
+/*
+ * setup $0, but defer parsing rest of line until reference is made to $(>0)
+ * or to NF. At that point, parse only as much as necessary.
+ */
+void
+set_record(buf, cnt, freeold)
+char *buf;
+int cnt;
+int freeold;
+{
+ register int i;
+
+ NF = -1;
+ for (i = 1; i <= parse_high_water; i++) {
+ unref(fields_arr[i]);
+ }
+ parse_high_water = 0;
+ if (freeold) {
+ unref(fields_arr[0]);
+ if (resave_fs) {
+ resave_fs = 0;
+ unref(save_FS);
+ save_FS = dupnode(FS_node->var_value);
+ }
+ nodes[0]->stptr = buf;
+ nodes[0]->stlen = cnt;
+ nodes[0]->stref = 1;
+ nodes[0]->flags = (STRING|STR|PERM|MAYBE_NUM);
+ fields_arr[0] = nodes[0];
+ }
+ fields_arr[0]->flags |= MAYBE_NUM;
+ field0_valid = 1;
+}
+
+void
+reset_record()
+{
+ (void) force_string(fields_arr[0]);
+ set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, 0);
+}
+
+void
+set_NF()
+{
+ register int i;
+
+ NF = (int) force_number(NF_node->var_value);
+ if (NF > nf_high_water)
+ grow_fields_arr(NF);
+ for (i = parse_high_water + 1; i <= NF; i++) {
+ unref(fields_arr[i]);
+ fields_arr[i] = Nnull_string;
+ }
+ field0_valid = 0;
+}
+
+/*
+ * this is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a regular
+ * expression -- either user-defined or because RS=="" and FS==" "
+ */
+static int
+re_parse_field(up_to, buf, len, fs, rp, set, n)
+int up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+void (*set) (); /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register int nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ if (*RS == 0 && default_FS)
+ while (scan < end && isspace(*scan))
+ scan++;
+ field = scan;
+ while (scan < end
+ && research(rp, scan, 0, (int)(end - scan), 1) != -1
+ && nf < up_to) {
+ if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
+ scan++;
+ if (scan == end) {
+ (*set)(++nf, field, scan - field, n);
+ up_to = nf;
+ break;
+ }
+ continue;
+ }
+ (*set)(++nf, field, scan + RESTART(rp, scan) - field, n);
+ scan += REEND(rp, scan);
+ field = scan;
+ if (scan == end) /* FS at end of record */
+ (*set)(++nf, field, 0, n);
+ }
+ if (nf != up_to && scan < end) {
+ (*set)(++nf, scan, (int)(end - scan), n);
+ scan = end;
+ }
+ *buf = scan;
+ return (nf);
+}
+
+/*
+ * this is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a single space
+ * character.
+ */
+static int
+def_parse_field(up_to, buf, len, fs, rp, set, n)
+int up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+void (*set) (); /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register int nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+ char sav;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ /* before doing anything save the char at *end */
+ sav = *end;
+ /* because it will be destroyed now: */
+
+ *end = ' '; /* sentinel character */
+ for (; nf < up_to; scan++) {
+ /*
+ * special case: fs is single space, strip leading whitespace
+ */
+ while (scan < end && (*scan == ' ' || *scan == '\t'))
+ scan++;
+ if (scan >= end)
+ break;
+ field = scan;
+ while (*scan != ' ' && *scan != '\t')
+ scan++;
+ (*set)(++nf, field, (int)(scan - field), n);
+ if (scan == end)
+ break;
+ }
+
+ /* everything done, restore original char at *end */
+ *end = sav;
+
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * this is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a single character
+ * other than space.
+ */
+static int
+sc_parse_field(up_to, buf, len, fs, rp, set, n)
+int up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+void (*set) (); /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register char fschar;
+ register int nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+ char sav;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ if (*RS == 0 && fs->stlen == 0)
+ fschar = '\n';
+ else
+ fschar = fs->stptr[0];
+
+ /* before doing anything save the char at *end */
+ sav = *end;
+ /* because it will be destroyed now: */
+ *end = fschar; /* sentinel character */
+
+ for (; nf < up_to; scan++) {
+ field = scan;
+ while (*scan++ != fschar)
+ ;
+ scan--;
+ (*set)(++nf, field, (int)(scan - field), n);
+ if (scan == end)
+ break;
+ }
+
+ /* everything done, restore original char at *end */
+ *end = sav;
+
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * this is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for fields are fixed widths.
+ */
+static int
+fw_parse_field(up_to, buf, len, fs, rp, set, n)
+int up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+void (*set) (); /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register int nf = parse_high_water;
+ register char *end = scan + len;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+ for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
+ if (len > end - scan)
+ len = end - scan;
+ (*set)(++nf, scan, len, n);
+ scan += len;
+ }
+ if (len == -1)
+ *buf = end;
+ else
+ *buf = scan;
+ return nf;
+}
+
+NODE **
+get_field(requested, assign)
+register int requested;
+Func_ptr *assign; /* this field is on the LHS of an assign */
+{
+ /*
+ * if requesting whole line but some other field has been altered,
+ * then the whole line must be rebuilt
+ */
+ if (requested == 0) {
+ if (!field0_valid) {
+ /* first, parse remainder of input record */
+ if (NF == -1) {
+ NF = (*parse_field)(HUGE-1, &parse_extent,
+ fields_arr[0]->stlen -
+ (parse_extent - fields_arr[0]->stptr),
+ save_FS, FS_regexp, set_field,
+ (NODE *)NULL);
+ parse_high_water = NF;
+ }
+ rebuild_record();
+ }
+ if (assign)
+ *assign = reset_record;
+ return &fields_arr[0];
+ }
+
+ /* assert(requested > 0); */
+
+ if (assign)
+ field0_valid = 0; /* $0 needs reconstruction */
+
+ if (requested <= parse_high_water) /* already parsed this field */
+ return &fields_arr[requested];
+
+ if (NF == -1) { /* have not yet parsed to end of record */
+ /*
+ * parse up to requested fields, calling set_field() for each,
+ * saving in parse_extent the point where the parse left off
+ */
+ if (parse_high_water == 0) /* starting at the beginning */
+ parse_extent = fields_arr[0]->stptr;
+ parse_high_water = (*parse_field)(requested, &parse_extent,
+ fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr),
+ save_FS, FS_regexp, set_field, (NODE *)NULL);
+
+ /*
+ * if we reached the end of the record, set NF to the number of
+ * fields so far. Note that requested might actually refer to
+ * a field that is beyond the end of the record, but we won't
+ * set NF to that value at this point, since this is only a
+ * reference to the field and NF only gets set if the field
+ * is assigned to -- this case is handled below
+ */
+ if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
+ NF = parse_high_water;
+ if (requested == HUGE-1) /* HUGE-1 means set NF */
+ requested = parse_high_water;
+ }
+ if (parse_high_water < requested) { /* requested beyond end of record */
+ if (assign) { /* expand record */
+ register int i;
+
+ if (requested > nf_high_water)
+ grow_fields_arr(requested);
+
+ /* fill in fields that don't exist */
+ for (i = parse_high_water + 1; i <= requested; i++)
+ fields_arr[i] = Nnull_string;
+
+ NF = requested;
+ parse_high_water = requested;
+ } else
+ return &Nnull_string;
+ }
+
+ return &fields_arr[requested];
+}
+
+static void
+set_element(num, s, len, n)
+int num;
+char *s;
+int len;
+NODE *n;
+{
+ register NODE *it;
+
+ it = make_string(s, len);
+ it->flags |= MAYBE_NUM;
+ *assoc_lookup(n, tmp_number((AWKNUM) (num))) = it;
+}
+
+NODE *
+do_split(tree)
+NODE *tree;
+{
+ NODE *t1, *t2, *t3, *tmp;
+ NODE *fs;
+ char *s;
+ int (*parseit)P((int, char **, int, NODE *,
+ Regexp *, void (*)(), NODE *));
+ Regexp *rp = NULL;
+
+ t1 = tree_eval(tree->lnode);
+ t2 = tree->rnode->lnode;
+ t3 = tree->rnode->rnode->lnode;
+
+ (void) force_string(t1);
+
+ if (t2->type == Node_param_list)
+ t2 = stack_ptr[t2->param_cnt];
+ if (t2->type != Node_var && t2->type != Node_var_array)
+ fatal("second argument of split is not a variable");
+ assoc_clear(t2);
+
+ if (t3->re_flags & FS_DFLT) {
+ parseit = parse_field;
+ fs = force_string(FS_node->var_value);
+ rp = FS_regexp;
+ } else {
+ tmp = force_string(tree_eval(t3->re_exp));
+ if (tmp->stlen == 1) {
+ if (tmp->stptr[0] == ' ')
+ parseit = def_parse_field;
+ else
+ parseit = sc_parse_field;
+ } else {
+ parseit = re_parse_field;
+ rp = re_update(t3);
+ }
+ fs = tmp;
+ }
+
+ s = t1->stptr;
+ tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int)t1->stlen,
+ fs, rp, set_element, t2));
+ free_temp(t1);
+ free_temp(t3);
+ return tmp;
+}
+
+void
+set_FS()
+{
+ NODE *tmp = NULL;
+ char buf[10];
+ NODE *fs;
+
+ buf[0] = '\0';
+ default_FS = 0;
+ if (FS_regexp) {
+ refree(FS_regexp);
+ FS_regexp = NULL;
+ }
+ fs = force_string(FS_node->var_value);
+ if (fs->stlen > 1)
+ parse_field = re_parse_field;
+ else if (*RS == 0) {
+ parse_field = sc_parse_field;
+ if (fs->stlen == 1) {
+ if (fs->stptr[0] == ' ') {
+ default_FS = 1;
+ strcpy(buf, "[ \t\n]+");
+ } else if (fs->stptr[0] != '\n')
+ sprintf(buf, "[%c\n]", fs->stptr[0]);
+ }
+ } else {
+ parse_field = def_parse_field;
+ if (fs->stptr[0] == ' ' && fs->stlen == 1)
+ default_FS = 1;
+ else if (fs->stptr[0] != ' ' && fs->stlen == 1) {
+ if (IGNORECASE == 0)
+ parse_field = sc_parse_field;
+ else
+ sprintf(buf, "[%c]", fs->stptr[0]);
+ }
+ }
+ if (buf[0]) {
+ FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, 1);
+ parse_field = re_parse_field;
+ } else if (parse_field == re_parse_field) {
+ FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, 1);
+ } else
+ FS_regexp = NULL;
+ resave_fs = 1;
+}
+
+void
+set_RS()
+{
+ (void) force_string(RS_node->var_value);
+ RS = RS_node->var_value->stptr;
+ set_FS();
+}
+
+void
+set_FIELDWIDTHS()
+{
+ register char *scan;
+ char *end;
+ register int i;
+ static int fw_alloc = 1;
+ static int warned = 0;
+ extern double strtod();
+
+ if (do_lint && ! warned) {
+ warned = 1;
+ warning("use of FIELDWIDTHS is a gawk extension");
+ }
+ if (do_unix) /* quick and dirty, does the trick */
+ return;
+
+ parse_field = fw_parse_field;
+ scan = force_string(FIELDWIDTHS_node->var_value)->stptr;
+ end = scan + 1;
+ if (FIELDWIDTHS == NULL)
+ emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
+ FIELDWIDTHS[0] = 0;
+ for (i = 1; ; i++) {
+ if (i >= fw_alloc) {
+ fw_alloc *= 2;
+ erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
+ }
+ FIELDWIDTHS[i] = (int) strtod(scan, &end);
+ if (end == scan)
+ break;
+ scan = end;
+ }
+ FIELDWIDTHS[i] = -1;
+}
diff --git a/gnu/usr.bin/awk/gawk.texi b/gnu/usr.bin/awk/gawk.texi
new file mode 100644
index 000000000000..b2802623136d
--- /dev/null
+++ b/gnu/usr.bin/awk/gawk.texi
@@ -0,0 +1,11270 @@
+\input texinfo @c -*-texinfo-*-
+@c %**start of header (This is for running Texinfo on a region.)
+@setfilename gawk.info
+@settitle The GAWK Manual
+@c @smallbook
+@c %**end of header (This is for running Texinfo on a region.)
+
+@ifinfo
+@synindex fn cp
+@synindex vr cp
+@end ifinfo
+@iftex
+@syncodeindex fn cp
+@syncodeindex vr cp
+@end iftex
+
+@c If "finalout" is commented out, the printed output will show
+@c black boxes that mark lines that are too long. Thus, it is
+@c unwise to comment it out when running a master in case there are
+@c overfulls which are deemed okay.
+
+@iftex
+@finalout
+@end iftex
+
+@c ===> NOTE! <==
+@c Determine the edition number in *four* places by hand:
+@c 1. First ifinfo section 2. title page 3. copyright page 4. top node
+@c To find the locations, search for !!set
+
+@ifinfo
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+This is Edition 0.15 of @cite{The GAWK Manual}, @*
+for the 2.15 version of the GNU implementation @*
+of AWK.
+
+Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+@ignore
+Permission is granted to process this file through TeX and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+
+@end ignore
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end ifinfo
+
+@setchapternewpage odd
+
+@c !!set edition, date, version
+@titlepage
+@title The GAWK Manual
+@subtitle Edition 0.15
+@subtitle April 1993
+@author Diane Barlow Close
+@author Arnold D. Robbins
+@author Paul H. Rubin
+@author Richard Stallman
+
+@c Include the Distribution inside the titlepage environment so
+@c that headings are turned off. Headings on and off do not work.
+
+@page
+@vskip 0pt plus 1filll
+Copyright @copyright{} 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+@sp 2
+
+@c !!set edition, date, version
+This is Edition 0.15 of @cite{The GAWK Manual}, @*
+for the 2.15 version of the GNU implementation @*
+of AWK.
+
+@sp 2
+Published by the Free Software Foundation @*
+675 Massachusetts Avenue @*
+Cambridge, MA 02139 USA @*
+Printed copies are available for $20 each.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end titlepage
+
+@ifinfo
+@node Top, Preface, (dir), (dir)
+@comment node-name, next, previous, up
+@top General Introduction
+@c Preface or Licensing nodes should come right after the Top
+@c node, in `unnumbered' sections, then the chapter, `What is gawk'.
+
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+@c !!set edition, date, version
+This is Edition 0.15 of @cite{The GAWK Manual}, @*
+for the 2.15 version of the GNU implementation @*
+of AWK.
+
+@end ifinfo
+
+@menu
+* Preface:: What you can do with @code{awk}; brief history
+ and acknowledgements.
+* Copying:: Your right to copy and distribute @code{gawk}.
+* This Manual:: Using this manual.
+ Includes sample input files that you can use.
+* Getting Started:: A basic introduction to using @code{awk}.
+ How to run an @code{awk} program.
+ Command line syntax.
+* Reading Files:: How to read files and manipulate fields.
+* Printing:: How to print using @code{awk}. Describes the
+ @code{print} and @code{printf} statements.
+ Also describes redirection of output.
+* One-liners:: Short, sample @code{awk} programs.
+* Patterns:: The various types of patterns
+ explained in detail.
+* Actions:: The various types of actions are
+ introduced here. Describes
+ expressions and the various operators in
+ detail. Also describes comparison expressions.
+* Expressions:: Expressions are the basic building
+ blocks of statements.
+* Statements:: The various control statements are
+ described in detail.
+* Arrays:: The description and use of arrays.
+ Also includes array-oriented control
+ statements.
+* Built-in:: The built-in functions are summarized here.
+* User-defined:: User-defined functions are described in detail.
+* Built-in Variables:: Built-in Variables
+* Command Line:: How to run @code{gawk}.
+* Language History:: The evolution of the @code{awk} language.
+* Installation:: Installing @code{gawk} under
+ various operating systems.
+* Gawk Summary:: @code{gawk} Options and Language Summary.
+* Sample Program:: A sample @code{awk} program with a
+ complete explanation.
+* Bugs:: Reporting Problems and Bugs.
+* Notes:: Something about the
+ implementation of @code{gawk}.
+* Glossary:: An explanation of some unfamiliar terms.
+* Index::
+@end menu
+
+@node Preface, Copying, Top, Top
+@comment node-name, next, previous, up
+@unnumbered Preface
+
+@iftex
+@cindex what is @code{awk}
+@end iftex
+If you are like many computer users, you would frequently like to make
+changes in various text files wherever certain patterns appear, or
+extract data from parts of certain lines while discarding the rest. To
+write a program to do this in a language such as C or Pascal is a
+time-consuming inconvenience that may take many lines of code. The job
+may be easier with @code{awk}.
+
+The @code{awk} utility interprets a special-purpose programming language
+that makes it possible to handle simple data-reformatting jobs easily
+with just a few lines of code.
+
+The GNU implementation of @code{awk} is called @code{gawk}; it is fully
+upward compatible with the System V Release 4 version of
+@code{awk}. @code{gawk} is also upward compatible with the @sc{posix}
+(draft) specification of the @code{awk} language. This means that all
+properly written @code{awk} programs should work with @code{gawk}.
+Thus, we usually don't distinguish between @code{gawk} and other @code{awk}
+implementations in this manual.@refill
+
+@cindex uses of @code{awk}
+This manual teaches you what @code{awk} does and how you can use
+@code{awk} effectively. You should already be familiar with basic
+system commands such as @code{ls}. Using @code{awk} you can: @refill
+
+@itemize @bullet
+@item
+manage small, personal databases
+
+@item
+generate reports
+
+@item
+validate data
+@item
+produce indexes, and perform other document preparation tasks
+
+@item
+even experiment with algorithms that can be adapted later to other computer
+languages
+@end itemize
+
+@iftex
+This manual has the difficult task of being both tutorial and reference.
+If you are a novice, feel free to skip over details that seem too complex.
+You should also ignore the many cross references; they are for the
+expert user, and for the on-line Info version of the manual.
+@end iftex
+
+@menu
+* History:: The history of @code{gawk} and
+ @code{awk}. Acknowledgements.
+@end menu
+
+@node History, , Preface, Preface
+@comment node-name, next, previous, up
+@unnumberedsec History of @code{awk} and @code{gawk}
+
+@cindex acronym
+@cindex history of @code{awk}
+The name @code{awk} comes from the initials of its designers: Alfred V.
+Aho, Peter J. Weinberger, and Brian W. Kernighan. The original version of
+@code{awk} was written in 1977. In 1985 a new version made the programming
+language more powerful, introducing user-defined functions, multiple input
+streams, and computed regular expressions.
+This new version became generally available with System V Release 3.1.
+The version in System V Release 4 added some new features and also cleaned
+up the behavior in some of the ``dark corners'' of the language.
+The specification for @code{awk} in the @sc{posix} Command Language
+and Utilities standard further clarified the language based on feedback
+from both the @code{gawk} designers, and the original @code{awk}
+designers.@refill
+
+The GNU implementation, @code{gawk}, was written in 1986 by Paul Rubin
+and Jay Fenlason, with advice from Richard Stallman. John Woods
+contributed parts of the code as well. In 1988 and 1989, David Trueman, with
+help from Arnold Robbins, thoroughly reworked @code{gawk} for compatibility
+with the newer @code{awk}. Current development (1992) focuses on bug fixes,
+performance improvements, and standards compliance.
+
+We need to thank many people for their assistance in producing this
+manual. Jay Fenlason contributed many ideas and sample programs. Richard
+Mlynarik and Robert J. Chassell gave helpful comments on early drafts of this
+manual. The paper @cite{A Supplemental Document for @code{awk}} by John W.
+Pierce of the Chemistry Department at UC San Diego, pinpointed several
+issues relevant both to @code{awk} implementation and to this manual, that
+would otherwise have escaped us. David Trueman, Pat Rankin, and Michal
+Jaegermann also contributed sections of the manual.@refill
+
+The following people provided many helpful comments on this edition of
+the manual: Rick Adams, Michael Brennan, Rich Burridge, Diane Close,
+Christopher (``Topher'') Eliot, Michael Lijewski, Pat Rankin, Miriam Robbins,
+and Michal Jaegermann. Robert J. Chassell provided much valuable advice on
+the use of Texinfo.
+
+Finally, we would like to thank Brian Kernighan of Bell Labs for invaluable
+assistance during the testing and debugging of @code{gawk}, and for
+help in clarifying numerous points about the language.@refill
+
+@node Copying, This Manual, Preface, Top
+@unnumbered GNU GENERAL PUBLIC LICENSE
+@center Version 2, June 1991
+
+@display
+Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
+675 Mass Ave, Cambridge, MA 02139, USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@c fakenode --- for prepinfo
+@unnumberedsec Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software---to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+@iftex
+@c fakenode --- for prepinfo
+@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end iftex
+@ifinfo
+@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end ifinfo
+
+@enumerate
+@item
+This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The ``Program'', below,
+refers to any such program or work, and a ``work based on the Program''
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term ``modification''.) Each licensee is addressed as ``you''.
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+@item
+You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+@item
+You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+@enumerate a
+@item
+You must cause the modified files to carry prominent notices
+stating that you changed the files and the date of any change.
+
+@item
+You must cause any work that you distribute or publish, that in
+whole or in part contains or is derived from the Program or any
+part thereof, to be licensed as a whole at no charge to all third
+parties under the terms of this License.
+
+@item
+If the modified program normally reads commands interactively
+when run, you must cause it, when started running for such
+interactive use in the most ordinary way, to print or display an
+announcement including an appropriate copyright notice and a
+notice that there is no warranty (or else, saying that you provide
+a warranty) and that users may redistribute the program under
+these conditions, and telling the user how to view a copy of this
+License. (Exception: if the Program itself is interactive but
+does not normally print such an announcement, your work based on
+the Program is not required to print an announcement.)
+@end enumerate
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+@item
+You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+@enumerate a
+@item
+Accompany it with the complete corresponding machine-readable
+source code, which must be distributed under the terms of Sections
+1 and 2 above on a medium customarily used for software interchange; or,
+
+@item
+Accompany it with a written offer, valid for at least three
+years, to give any third party, for a charge no more than your
+cost of physically performing source distribution, a complete
+machine-readable copy of the corresponding source code, to be
+distributed under the terms of Sections 1 and 2 above on a medium
+customarily used for software interchange; or,
+
+@item
+Accompany it with the information you received as to the offer
+to distribute corresponding source code. (This alternative is
+allowed only for noncommercial distribution and only if you
+received the program in object code or executable form with such
+an offer, in accord with Subsection b above.)
+@end enumerate
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+@item
+You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+@item
+You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+@item
+Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+@item
+If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+@item
+If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+@item
+The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and ``any
+later version'', you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+@item
+If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+@iftex
+@c fakenode --- for prepinfo
+@heading NO WARRANTY
+@end iftex
+@ifinfo
+@center NO WARRANTY
+@end ifinfo
+
+@item
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+@item
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+@end enumerate
+
+@iftex
+@c fakenode --- for prepinfo
+@heading END OF TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center END OF TERMS AND CONDITIONS
+@end ifinfo
+
+@page
+@c fakenode --- for prepinfo
+@unnumberedsec How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the ``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and a brief idea of what it does.}
+Copyright (C) 19@var{yy} @var{name of author}
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+@smallexample
+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
+type `show w'.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type `show c' for details.
+@end smallexample
+
+The hypothetical commands @samp{show w} and @samp{show c} should show
+the appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than @samp{show w} and
+@samp{show c}; they could even be mouse-clicks or menu items---whatever
+suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a ``copyright disclaimer'' for the program, if
+necessary. Here is a sample; alter the names:
+
+@smallexample
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+`Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+@var{signature of Ty Coon}, 1 April 1989
+Ty Coon, President of Vice
+@end smallexample
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+@node This Manual, Getting Started, Copying, Top
+@chapter Using this Manual
+@cindex manual, using this
+@cindex using this manual
+@cindex language, @code{awk}
+@cindex program, @code{awk}
+@cindex @code{awk} language
+@cindex @code{awk} program
+
+The term @code{awk} refers to a particular program, and to the language you
+use to tell this program what to do. When we need to be careful, we call
+the program ``the @code{awk} utility'' and the language ``the @code{awk}
+language.'' The term @code{gawk} refers to a version of @code{awk} developed
+as part the GNU project. The purpose of this manual is to explain
+both the
+@code{awk} language and how to run the @code{awk} utility.@refill
+
+While concentrating on the features of @code{gawk}, the manual will also
+attempt to describe important differences between @code{gawk} and other
+@code{awk} implementations. In particular, any features that are not
+in the @sc{posix} standard for @code{awk} will be noted. @refill
+
+The term @dfn{@code{awk} program} refers to a program written by you in
+the @code{awk} programming language.@refill
+
+@xref{Getting Started, ,Getting Started with @code{awk}}, for the bare
+essentials you need to know to start using @code{awk}.
+
+Some useful ``one-liners'' are included to give you a feel for the
+@code{awk} language (@pxref{One-liners, ,Useful ``One-liners''}).
+
+@ignore
+@strong{I deleted four paragraphs here because they would confuse the
+beginner more than help him. They mention terms such as ``field,''
+``pattern,'' ``action,'' ``built-in function'' which the beginner
+doesn't know.}
+
+@strong{If you can find a way to introduce several of these concepts here,
+enough to give the reader a map of what is to follow, that might
+be useful. I'm not sure that can be done without taking up more
+space than ought to be used here. There may be no way to win.}
+
+@strong{ADR: I'd like to tackle this in phase 2 of my editing.}
+@end ignore
+
+A sample @code{awk} program has been provided for you
+(@pxref{Sample Program}).@refill
+
+If you find terms that you aren't familiar with, try looking them
+up in the glossary (@pxref{Glossary}).@refill
+
+The entire @code{awk} language is summarized for quick reference in
+@ref{Gawk Summary, ,@code{gawk} Summary}. Look there if you just need
+to refresh your memory about a particular feature.@refill
+
+Most of the time complete @code{awk} programs are used as examples, but in
+some of the more advanced sections, only the part of the @code{awk} program
+that illustrates the concept being described is shown.@refill
+
+@menu
+* Sample Data Files:: Sample data files for use in the @code{awk}
+ programs illustrated in this manual.
+@end menu
+
+@node Sample Data Files, , This Manual, This Manual
+@section Data Files for the Examples
+
+@cindex input file, sample
+@cindex sample input file
+@cindex @file{BBS-list} file
+Many of the examples in this manual take their input from two sample
+data files. The first, called @file{BBS-list}, represents a list of
+computer bulletin board systems together with information about those systems.
+The second data file, called @file{inventory-shipped}, contains
+information about shipments on a monthly basis. Each line of these
+files is one @dfn{record}.
+
+In the file @file{BBS-list}, each record contains the name of a computer
+bulletin board, its phone number, the board's baud rate, and a code for
+the number of hours it is operational. An @samp{A} in the last column
+means the board operates 24 hours a day. A @samp{B} in the last
+column means the board operates evening and weekend hours, only. A
+@samp{C} means the board operates only on weekends.
+
+@example
+aardvark 555-5553 1200/300 B
+alpo-net 555-3412 2400/1200/300 A
+barfly 555-7685 1200/300 A
+bites 555-1675 2400/1200/300 A
+camelot 555-0542 300 C
+core 555-2912 1200/300 C
+fooey 555-1234 2400/1200/300 B
+foot 555-6699 1200/300 B
+macfoo 555-6480 1200/300 A
+sdace 555-3430 2400/1200/300 A
+sabafoo 555-2127 1200/300 C
+@end example
+
+@cindex @file{inventory-shipped} file
+The second data file, called @file{inventory-shipped}, represents
+information about shipments during the year.
+Each record contains the month of the year, the number
+of green crates shipped, the number of red boxes shipped, the number of
+orange bags shipped, and the number of blue packages shipped,
+respectively. There are 16 entries, covering the 12 months of one year
+and 4 months of the next year.@refill
+
+@example
+Jan 13 25 15 115
+Feb 15 32 24 226
+Mar 15 24 34 228
+Apr 31 52 63 420
+May 16 34 29 208
+Jun 31 42 75 492
+Jul 24 34 67 436
+Aug 15 34 47 316
+Sep 13 55 37 277
+Oct 29 54 68 525
+Nov 20 87 82 577
+Dec 17 35 61 401
+
+Jan 21 36 64 620
+Feb 26 58 80 652
+Mar 24 75 70 495
+Apr 21 70 74 514
+@end example
+
+@ifinfo
+If you are reading this in GNU Emacs using Info, you can copy the regions
+of text showing these sample files into your own test files. This way you
+can try out the examples shown in the remainder of this document. You do
+this by using the command @kbd{M-x write-region} to copy text from the Info
+file into a file for use with @code{awk}
+(@xref{Misc File Ops, , , emacs, GNU Emacs Manual},
+for more information). Using this information, create your own
+@file{BBS-list} and @file{inventory-shipped} files, and practice what you
+learn in this manual.
+@end ifinfo
+
+@node Getting Started, Reading Files, This Manual, Top
+@chapter Getting Started with @code{awk}
+@cindex script, definition of
+@cindex rule, definition of
+@cindex program, definition of
+@cindex basic function of @code{gawk}
+
+The basic function of @code{awk} is to search files for lines (or other
+units of text) that contain certain patterns. When a line matches one
+of the patterns, @code{awk} performs specified actions on that line.
+@code{awk} keeps processing input lines in this way until the end of the
+input file is reached.@refill
+
+When you run @code{awk}, you specify an @code{awk} @dfn{program} which
+tells @code{awk} what to do. The program consists of a series of
+@dfn{rules}. (It may also contain @dfn{function definitions}, but that
+is an advanced feature, so we will ignore it for now.
+@xref{User-defined, ,User-defined Functions}.) Each rule specifies one
+pattern to search for, and one action to perform when that pattern is found.
+
+Syntactically, a rule consists of a pattern followed by an action. The
+action is enclosed in curly braces to separate it from the pattern.
+Rules are usually separated by newlines. Therefore, an @code{awk}
+program looks like this:
+
+@example
+@var{pattern} @{ @var{action} @}
+@var{pattern} @{ @var{action} @}
+@dots{}
+@end example
+
+@menu
+* Very Simple:: A very simple example.
+* Two Rules:: A less simple one-line example with two rules.
+* More Complex:: A more complex example.
+* Running gawk:: How to run @code{gawk} programs;
+ includes command line syntax.
+* Comments:: Adding documentation to @code{gawk} programs.
+* Statements/Lines:: Subdividing or combining statements into lines.
+* When:: When to use @code{gawk} and
+ when to use other things.
+@end menu
+
+@node Very Simple, Two Rules, Getting Started, Getting Started
+@section A Very Simple Example
+
+@cindex @samp{print $0}
+The following command runs a simple @code{awk} program that searches the
+input file @file{BBS-list} for the string of characters: @samp{foo}. (A
+string of characters is usually called, a @dfn{string}.
+The term @dfn{string} is perhaps based on similar usage in English, such
+as ``a string of pearls,'' or, ``a string of cars in a train.'')
+
+@example
+awk '/foo/ @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+When lines containing @samp{foo} are found, they are printed, because
+@w{@samp{print $0}} means print the current line. (Just @samp{print} by
+itself means the same thing, so we could have written that
+instead.)
+
+You will notice that slashes, @samp{/}, surround the string @samp{foo}
+in the actual @code{awk} program. The slashes indicate that @samp{foo}
+is a pattern to search for. This type of pattern is called a
+@dfn{regular expression}, and is covered in more detail later
+(@pxref{Regexp, ,Regular Expressions as Patterns}). There are
+single-quotes around the @code{awk} program so that the shell won't
+interpret any of it as special shell characters.@refill
+
+Here is what this program prints:
+
+@example
+@group
+fooey 555-1234 2400/1200/300 B
+foot 555-6699 1200/300 B
+macfoo 555-6480 1200/300 A
+sabafoo 555-2127 1200/300 C
+@end group
+@end example
+
+@cindex action, default
+@cindex pattern, default
+@cindex default action
+@cindex default pattern
+In an @code{awk} rule, either the pattern or the action can be omitted,
+but not both. If the pattern is omitted, then the action is performed
+for @emph{every} input line. If the action is omitted, the default
+action is to print all lines that match the pattern.
+
+Thus, we could leave out the action (the @code{print} statement and the curly
+braces) in the above example, and the result would be the same: all
+lines matching the pattern @samp{foo} would be printed. By comparison,
+omitting the @code{print} statement but retaining the curly braces makes an
+empty action that does nothing; then no lines would be printed.
+
+@node Two Rules, More Complex, Very Simple, Getting Started
+@section An Example with Two Rules
+@cindex how @code{awk} works
+
+The @code{awk} utility reads the input files one line at a
+time. For each line, @code{awk} tries the patterns of each of the rules.
+If several patterns match then several actions are run, in the order in
+which they appear in the @code{awk} program. If no patterns match, then
+no actions are run.
+
+After processing all the rules (perhaps none) that match the line,
+@code{awk} reads the next line (however,
+@pxref{Next Statement, ,The @code{next} Statement}). This continues
+until the end of the file is reached.@refill
+
+For example, the @code{awk} program:
+
+@example
+/12/ @{ print $0 @}
+/21/ @{ print $0 @}
+@end example
+
+@noindent
+contains two rules. The first rule has the string @samp{12} as the
+pattern and @samp{print $0} as the action. The second rule has the
+string @samp{21} as the pattern and also has @samp{print $0} as the
+action. Each rule's action is enclosed in its own pair of braces.
+
+This @code{awk} program prints every line that contains the string
+@samp{12} @emph{or} the string @samp{21}. If a line contains both
+strings, it is printed twice, once by each rule.
+
+If we run this program on our two sample data files, @file{BBS-list} and
+@file{inventory-shipped}, as shown here:
+
+@example
+awk '/12/ @{ print $0 @}
+ /21/ @{ print $0 @}' BBS-list inventory-shipped
+@end example
+
+@noindent
+we get the following output:
+
+@example
+aardvark 555-5553 1200/300 B
+alpo-net 555-3412 2400/1200/300 A
+barfly 555-7685 1200/300 A
+bites 555-1675 2400/1200/300 A
+core 555-2912 1200/300 C
+fooey 555-1234 2400/1200/300 B
+foot 555-6699 1200/300 B
+macfoo 555-6480 1200/300 A
+sdace 555-3430 2400/1200/300 A
+sabafoo 555-2127 1200/300 C
+sabafoo 555-2127 1200/300 C
+Jan 21 36 64 620
+Apr 21 70 74 514
+@end example
+
+@noindent
+Note how the line in @file{BBS-list} beginning with @samp{sabafoo}
+was printed twice, once for each rule.
+
+@node More Complex, Running gawk, Two Rules, Getting Started
+@comment node-name, next, previous, up
+@section A More Complex Example
+
+Here is an example to give you an idea of what typical @code{awk}
+programs do. This example shows how @code{awk} can be used to
+summarize, select, and rearrange the output of another utility. It uses
+features that haven't been covered yet, so don't worry if you don't
+understand all the details.
+
+@example
+ls -l | awk '$5 == "Nov" @{ sum += $4 @}
+ END @{ print sum @}'
+@end example
+
+This command prints the total number of bytes in all the files in the
+current directory that were last modified in November (of any year).
+(In the C shell you would need to type a semicolon and then a backslash
+at the end of the first line; in a @sc{posix}-compliant shell, such as the
+Bourne shell or the Bourne-Again shell, you can type the example as shown.)
+
+The @w{@samp{ls -l}} part of this example is a command that gives you a
+listing of the files in a directory, including file size and date.
+Its output looks like this:@refill
+
+@example
+-rw-r--r-- 1 close 1933 Nov 7 13:05 Makefile
+-rw-r--r-- 1 close 10809 Nov 7 13:03 gawk.h
+-rw-r--r-- 1 close 983 Apr 13 12:14 gawk.tab.h
+-rw-r--r-- 1 close 31869 Jun 15 12:20 gawk.y
+-rw-r--r-- 1 close 22414 Nov 7 13:03 gawk1.c
+-rw-r--r-- 1 close 37455 Nov 7 13:03 gawk2.c
+-rw-r--r-- 1 close 27511 Dec 9 13:07 gawk3.c
+-rw-r--r-- 1 close 7989 Nov 7 13:03 gawk4.c
+@end example
+
+@noindent
+The first field contains read-write permissions, the second field contains
+the number of links to the file, and the third field identifies the owner of
+the file. The fourth field contains the size of the file in bytes. The
+fifth, sixth, and seventh fields contain the month, day, and time,
+respectively, that the file was last modified. Finally, the eighth field
+contains the name of the file.
+
+The @code{$5 == "Nov"} in our @code{awk} program is an expression that
+tests whether the fifth field of the output from @w{@samp{ls -l}}
+matches the string @samp{Nov}. Each time a line has the string
+@samp{Nov} in its fifth field, the action @samp{@{ sum += $4 @}} is
+performed. This adds the fourth field (the file size) to the variable
+@code{sum}. As a result, when @code{awk} has finished reading all the
+input lines, @code{sum} is the sum of the sizes of files whose
+lines matched the pattern. (This works because @code{awk} variables
+are automatically initialized to zero.)@refill
+
+After the last line of output from @code{ls} has been processed, the
+@code{END} rule is executed, and the value of @code{sum} is
+printed. In this example, the value of @code{sum} would be 80600.@refill
+
+These more advanced @code{awk} techniques are covered in later sections
+(@pxref{Actions, ,Overview of Actions}). Before you can move on to more
+advanced @code{awk} programming, you have to know how @code{awk} interprets
+your input and displays your output. By manipulating fields and using
+@code{print} statements, you can produce some very useful and spectacular
+looking reports.@refill
+
+@node Running gawk, Comments, More Complex, Getting Started
+@section How to Run @code{awk} Programs
+
+@ignore
+Date: Mon, 26 Aug 91 09:48:10 +0200
+From: gatech!vsoc07.cern.ch!matheys (Jean-Pol Matheys (CERN - ECP Division))
+To: uunet.UU.NET!skeeve!arnold
+Subject: RE: status check
+
+The introduction of Chapter 2 (i.e. before 2.1) should include
+the whole of section 2.4 - it's better to tell people how to run awk programs
+before giving any examples
+
+ADR --- he's right. but for now, don't do this because the rest of the
+chapter would need some rewriting.
+@end ignore
+
+@cindex command line formats
+@cindex running @code{awk} programs
+There are several ways to run an @code{awk} program. If the program is
+short, it is easiest to include it in the command that runs @code{awk},
+like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of patterns and actions, as
+described earlier.
+
+When the program is long, it is usually more convenient to put it in a file
+and run it with a command like this:
+
+@example
+awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@menu
+* One-shot:: Running a short throw-away @code{awk} program.
+* Read Terminal:: Using no input files (input from
+ terminal instead).
+* Long:: Putting permanent @code{awk} programs in files.
+* Executable Scripts:: Making self-contained @code{awk} programs.
+@end menu
+
+@node One-shot, Read Terminal, Running gawk, Running gawk
+@subsection One-shot Throw-away @code{awk} Programs
+
+Once you are familiar with @code{awk}, you will often type simple
+programs at the moment you want to use them. Then you can write the
+program as the first argument of the @code{awk} command, like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of @var{patterns} and
+@var{actions}, as described earlier.
+
+@cindex single quotes, why needed
+This command format instructs the shell to start @code{awk} and use the
+@var{program} to process records in the input file(s). There are single
+quotes around @var{program} so that the shell doesn't interpret any
+@code{awk} characters as special shell characters. They also cause the
+shell to treat all of @var{program} as a single argument for
+@code{awk} and allow @var{program} to be more than one line long.@refill
+
+This format is also useful for running short or medium-sized @code{awk}
+programs from shell scripts, because it avoids the need for a separate
+file for the @code{awk} program. A self-contained shell script is more
+reliable since there are no other files to misplace.
+
+@node Read Terminal, Long, One-shot, Running gawk
+@subsection Running @code{awk} without Input Files
+
+@cindex standard input
+@cindex input, standard
+You can also run @code{awk} without any input files. If you type the
+command line:@refill
+
+@example
+awk '@var{program}'
+@end example
+
+@noindent
+then @code{awk} applies the @var{program} to the @dfn{standard input},
+which usually means whatever you type on the terminal. This continues
+until you indicate end-of-file by typing @kbd{Control-d}.
+
+For example, if you execute this command:
+
+@example
+awk '/th/'
+@end example
+
+@noindent
+whatever you type next is taken as data for that @code{awk}
+program. If you go on to type the following data:
+
+@example
+Kathy
+Ben
+Tom
+Beth
+Seth
+Karen
+Thomas
+@kbd{Control-d}
+@end example
+
+@noindent
+then @code{awk} prints this output:
+
+@example
+Kathy
+Beth
+Seth
+@end example
+
+@noindent
+@cindex case sensitivity
+@cindex pattern, case sensitive
+as matching the pattern @samp{th}. Notice that it did not recognize
+@samp{Thomas} as matching the pattern. The @code{awk} language is
+@dfn{case sensitive}, and matches patterns exactly. (However, you can
+override this with the variable @code{IGNORECASE}.
+@xref{Case-sensitivity, ,Case-sensitivity in Matching}.)
+
+@node Long, Executable Scripts, Read Terminal, Running gawk
+@subsection Running Long Programs
+
+@cindex running long programs
+@cindex @samp{-f} option
+@cindex program file
+@cindex file, @code{awk} program
+Sometimes your @code{awk} programs can be very long. In this case it is
+more convenient to put the program into a separate file. To tell
+@code{awk} to use that file for its program, you type:@refill
+
+@example
+awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+The @samp{-f} instructs the @code{awk} utility to get the @code{awk} program
+from the file @var{source-file}. Any file name can be used for
+@var{source-file}. For example, you could put the program:@refill
+
+@example
+/th/
+@end example
+
+@noindent
+into the file @file{th-prog}. Then this command:
+
+@example
+awk -f th-prog
+@end example
+
+@noindent
+does the same thing as this one:
+
+@example
+awk '/th/'
+@end example
+
+@noindent
+which was explained earlier (@pxref{Read Terminal, ,Running @code{awk} without Input Files}).
+Note that you don't usually need single quotes around the file name that you
+specify with @samp{-f}, because most file names don't contain any of the shell's
+special characters. Notice that in @file{th-prog}, the @code{awk}
+program did not have single quotes around it. The quotes are only needed
+for programs that are provided on the @code{awk} command line.
+
+If you want to identify your @code{awk} program files clearly as such,
+you can add the extension @file{.awk} to the file name. This doesn't
+affect the execution of the @code{awk} program, but it does make
+``housekeeping'' easier.
+
+@node Executable Scripts, , Long, Running gawk
+@c node-name, next, previous, up
+@subsection Executable @code{awk} Programs
+@cindex executable scripts
+@cindex scripts, executable
+@cindex self contained programs
+@cindex program, self contained
+@cindex @samp{#!}
+
+Once you have learned @code{awk}, you may want to write self-contained
+@code{awk} scripts, using the @samp{#!} script mechanism. You can do
+this on many Unix systems @footnote{The @samp{#!} mechanism works on
+Unix systems derived from Berkeley Unix, System V Release 4, and some System
+V Release 3 systems.} (and someday on GNU).@refill
+
+For example, you could create a text file named @file{hello}, containing
+the following (where @samp{BEGIN} is a feature we have not yet
+discussed):
+
+@example
+#! /bin/awk -f
+
+# a sample awk program
+BEGIN @{ print "hello, world" @}
+@end example
+
+@noindent
+After making this file executable (with the @code{chmod} command), you
+can simply type:
+
+@example
+hello
+@end example
+
+@noindent
+at the shell, and the system will arrange to run @code{awk} @footnote{The
+line beginning with @samp{#!} lists the full pathname of an interpreter
+to be run, and an optional initial command line argument to pass to that
+interpreter. The operating system then runs the interpreter with the given
+argument and the full argument list of the executed program. The first argument
+in the list is the full pathname of the @code{awk} program. The rest of the
+argument list will either be options to @code{awk}, or data files,
+or both.} as if you had typed:@refill
+
+@example
+awk -f hello
+@end example
+
+@noindent
+Self-contained @code{awk} scripts are useful when you want to write a
+program which users can invoke without knowing that the program is
+written in @code{awk}.
+
+@cindex shell scripts
+@cindex scripts, shell
+If your system does not support the @samp{#!} mechanism, you can get a
+similar effect using a regular shell script. It would look something
+like this:
+
+@example
+: The colon makes sure this script is executed by the Bourne shell.
+awk '@var{program}' "$@@"
+@end example
+
+Using this technique, it is @emph{vital} to enclose the @var{program} in
+single quotes to protect it from interpretation by the shell. If you
+omit the quotes, only a shell wizard can predict the results.
+
+The @samp{"$@@"} causes the shell to forward all the command line
+arguments to the @code{awk} program, without interpretation. The first
+line, which starts with a colon, is used so that this shell script will
+work even if invoked by a user who uses the C shell.
+@c Someday: (See @cite{The Bourne Again Shell}, by ??.)
+
+@node Comments, Statements/Lines, Running gawk, Getting Started
+@section Comments in @code{awk} Programs
+@cindex @samp{#}
+@cindex comments
+@cindex use of comments
+@cindex documenting @code{awk} programs
+@cindex programs, documenting
+
+A @dfn{comment} is some text that is included in a program for the sake
+of human readers, and that is not really part of the program. Comments
+can explain what the program does, and how it works. Nearly all
+programming languages have provisions for comments, because programs are
+typically hard to understand without their extra help.
+
+In the @code{awk} language, a comment starts with the sharp sign
+character, @samp{#}, and continues to the end of the line. The
+@code{awk} language ignores the rest of a line following a sharp sign.
+For example, we could have put the following into @file{th-prog}:@refill
+
+@smallexample
+# This program finds records containing the pattern @samp{th}. This is how
+# you continue comments on additional lines.
+/th/
+@end smallexample
+
+You can put comment lines into keyboard-composed throw-away @code{awk}
+programs also, but this usually isn't very useful; the purpose of a
+comment is to help you or another person understand the program at
+a later time.@refill
+
+@node Statements/Lines, When, Comments, Getting Started
+@section @code{awk} Statements versus Lines
+
+Most often, each line in an @code{awk} program is a separate statement or
+separate rule, like this:
+
+@example
+awk '/12/ @{ print $0 @}
+ /21/ @{ print $0 @}' BBS-list inventory-shipped
+@end example
+
+But sometimes statements can be more than one line, and lines can
+contain several statements. You can split a statement into multiple
+lines by inserting a newline after any of the following:@refill
+
+@example
+, @{ ? : || && do else
+@end example
+
+@noindent
+A newline at any other point is considered the end of the statement.
+(Splitting lines after @samp{?} and @samp{:} is a minor @code{gawk}
+extension. The @samp{?} and @samp{:} referred to here is the
+three operand conditional expression described in
+@ref{Conditional Exp, ,Conditional Expressions}.)@refill
+
+@cindex backslash continuation
+@cindex continuation of lines
+If you would like to split a single statement into two lines at a point
+where a newline would terminate it, you can @dfn{continue} it by ending the
+first line with a backslash character, @samp{\}. This is allowed
+absolutely anywhere in the statement, even in the middle of a string or
+regular expression. For example:
+
+@example
+awk '/This program is too long, so continue it\
+ on the next line/ @{ print $1 @}'
+@end example
+
+@noindent
+We have generally not used backslash continuation in the sample programs in
+this manual. Since in @code{gawk} there is no limit on the length of a line,
+it is never strictly necessary; it just makes programs prettier. We have
+preferred to make them even more pretty by keeping the statements short.
+Backslash continuation is most useful when your @code{awk} program is in a
+separate source file, instead of typed in on the command line. You should
+also note that many @code{awk} implementations are more picky about where
+you may use backslash continuation. For maximal portability of your @code{awk}
+programs, it is best not to split your lines in the middle of a regular
+expression or a string.@refill
+
+@strong{Warning: backslash continuation does not work as described above
+with the C shell.} Continuation with backslash works for @code{awk}
+programs in files, and also for one-shot programs @emph{provided} you
+are using a @sc{posix}-compliant shell, such as the Bourne shell or the
+Bourne-again shell. But the C shell used on Berkeley Unix behaves
+differently! There, you must use two backslashes in a row, followed by
+a newline.@refill
+
+@cindex multiple statements on one line
+When @code{awk} statements within one rule are short, you might want to put
+more than one of them on a line. You do this by separating the statements
+with a semicolon, @samp{;}.
+This also applies to the rules themselves.
+Thus, the previous program could have been written:@refill
+
+@example
+/12/ @{ print $0 @} ; /21/ @{ print $0 @}
+@end example
+
+@noindent
+@strong{Note:} the requirement that rules on the same line must be
+separated with a semicolon is a recent change in the @code{awk}
+language; it was done for consistency with the treatment of statements
+within an action.
+
+@node When, , Statements/Lines, Getting Started
+@section When to Use @code{awk}
+
+@cindex when to use @code{awk}
+@cindex applications of @code{awk}
+You might wonder how @code{awk} might be useful for you. Using additional
+utility programs, more advanced patterns, field separators, arithmetic
+statements, and other selection criteria, you can produce much more
+complex output. The @code{awk} language is very useful for producing
+reports from large amounts of raw data, such as summarizing information
+from the output of other utility programs like @code{ls}.
+(@xref{More Complex, ,A More Complex Example}.)
+
+Programs written with @code{awk} are usually much smaller than they would
+be in other languages. This makes @code{awk} programs easy to compose and
+use. Often @code{awk} programs can be quickly composed at your terminal,
+used once, and thrown away. Since @code{awk} programs are interpreted, you
+can avoid the usually lengthy edit-compile-test-debug cycle of software
+development.
+
+Complex programs have been written in @code{awk}, including a complete
+retargetable assembler for 8-bit microprocessors (@pxref{Glossary}, for
+more information) and a microcode assembler for a special purpose Prolog
+computer. However, @code{awk}'s capabilities are strained by tasks of
+such complexity.
+
+If you find yourself writing @code{awk} scripts of more than, say, a few
+hundred lines, you might consider using a different programming
+language. Emacs Lisp is a good choice if you need sophisticated string
+or pattern matching capabilities. The shell is also good at string and
+pattern matching; in addition, it allows powerful use of the system
+utilities. More conventional languages, such as C, C++, and Lisp, offer
+better facilities for system programming and for managing the complexity
+of large programs. Programs in these languages may require more lines
+of source code than the equivalent @code{awk} programs, but they are
+easier to maintain and usually run more efficiently.@refill
+
+@node Reading Files, Printing, Getting Started, Top
+@chapter Reading Input Files
+
+@cindex reading files
+@cindex input
+@cindex standard input
+@vindex FILENAME
+In the typical @code{awk} program, all input is read either from the
+standard input (by default the keyboard, but often a pipe from another
+command) or from files whose names you specify on the @code{awk} command
+line. If you specify input files, @code{awk} reads them in order, reading
+all the data from one before going on to the next. The name of the current
+input file can be found in the built-in variable @code{FILENAME}
+(@pxref{Built-in Variables}).@refill
+
+The input is read in units called records, and processed by the
+rules one record at a time. By default, each record is one line. Each
+record is split automatically into fields, to make it more
+convenient for a rule to work on its parts.
+
+On rare occasions you will need to use the @code{getline} command,
+which can do explicit input from any number of files
+(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
+
+@menu
+* Records:: Controlling how data is split into records.
+* Fields:: An introduction to fields.
+* Non-Constant Fields:: Non-constant Field Numbers.
+* Changing Fields:: Changing the Contents of a Field.
+* Field Separators:: The field separator and how to change it.
+* Constant Size:: Reading constant width data.
+* Multiple Line:: Reading multi-line records.
+* Getline:: Reading files under explicit program control
+ using the @code{getline} function.
+* Close Input:: Closing an input file (so you can read from
+ the beginning once more).
+@end menu
+
+@node Records, Fields, Reading Files, Reading Files
+@section How Input is Split into Records
+
+@cindex record separator
+The @code{awk} language divides its input into records and fields.
+Records are separated by a character called the @dfn{record separator}.
+By default, the record separator is the newline character, defining
+a record to be a single line of text.@refill
+
+@iftex
+@cindex changing the record separator
+@end iftex
+@vindex RS
+Sometimes you may want to use a different character to separate your
+records. You can use a different character by changing the built-in
+variable @code{RS}. The value of @code{RS} is a string that says how
+to separate records; the default value is @code{"\n"}, the string containing
+just a newline character. This is why records are, by default, single lines.
+
+@code{RS} can have any string as its value, but only the first character
+of the string is used as the record separator. The other characters are
+ignored. @code{RS} is exceptional in this regard; @code{awk} uses the
+full value of all its other built-in variables.@refill
+
+@ignore
+Someday this should be true!
+
+The value of @code{RS} is not limited to a one-character string. It can
+be any regular expression (@pxref{Regexp, ,Regular Expressions as Patterns}).
+In general, each record
+ends at the next string that matches the regular expression; the next
+record starts at the end of the matching string. This general rule is
+actually at work in the usual case, where @code{RS} contains just a
+newline: a record ends at the beginning of the next matching string (the
+next newline in the input) and the following record starts just after
+the end of this string (at the first character of the following line).
+The newline, since it matches @code{RS}, is not part of either record.@refill
+@end ignore
+
+You can change the value of @code{RS} in the @code{awk} program with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+The new record-separator character should be enclosed in quotation marks to make
+a string constant. Often the right time to do this is at the beginning
+of execution, before any input has been processed, so that the very
+first record will be read with the proper separator. To do this, use
+the special @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}). For
+example:@refill
+
+@example
+awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+changes the value of @code{RS} to @code{"/"}, before reading any input.
+This is a string whose first character is a slash; as a result, records
+are separated by slashes. Then the input file is read, and the second
+rule in the @code{awk} program (the action with no pattern) prints each
+record. Since each @code{print} statement adds a newline at the end of
+its output, the effect of this @code{awk} program is to copy the input
+with each slash changed to a newline.
+
+Another way to change the record separator is on the command line,
+using the variable-assignment feature
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@example
+awk '@{ print $0 @}' RS="/" BBS-list
+@end example
+
+@noindent
+This sets @code{RS} to @samp{/} before processing @file{BBS-list}.
+
+Reaching the end of an input file terminates the current input record,
+even if the last character in the file is not the character in @code{RS}.
+
+@ignore
+@c merge the preceding paragraph and this stuff into one paragraph
+@c and put it in an `expert info' section.
+This produces correct behavior in the vast majority of cases, although
+the following (extreme) pipeline prints a surprising @samp{1}. (There
+is one field, consisting of a newline.)
+
+@example
+echo | awk 'BEGIN @{ RS = "a" @} ; @{ print NF @}'
+@end example
+
+@end ignore
+
+The empty string, @code{""} (a string of no characters), has a special meaning
+as the value of @code{RS}: it means that records are separated only
+by blank lines. @xref{Multiple Line, ,Multiple-Line Records}, for more details.
+
+@cindex number of records, @code{NR} or @code{FNR}
+@vindex NR
+@vindex FNR
+The @code{awk} utility keeps track of the number of records that have
+been read so far from the current input file. This value is stored in a
+built-in variable called @code{FNR}. It is reset to zero when a new
+file is started. Another built-in variable, @code{NR}, is the total
+number of input records read so far from all files. It starts at zero
+but is never automatically reset to zero.
+
+If you change the value of @code{RS} in the middle of an @code{awk} run,
+the new value is used to delimit subsequent records, but the record
+currently being processed (and records already processed) are not
+affected.
+
+@node Fields, Non-Constant Fields, Records, Reading Files
+@section Examining Fields
+
+@cindex examining fields
+@cindex fields
+@cindex accessing fields
+When @code{awk} reads an input record, the record is
+automatically separated or @dfn{parsed} by the interpreter into chunks
+called @dfn{fields}. By default, fields are separated by whitespace,
+like words in a line.
+Whitespace in @code{awk} means any string of one or more spaces and/or
+tabs; other characters such as newline, formfeed, and so on, that are
+considered whitespace by other languages are @emph{not} considered
+whitespace by @code{awk}.@refill
+
+The purpose of fields is to make it more convenient for you to refer to
+these pieces of the record. You don't have to use them---you can
+operate on the whole record if you wish---but fields are what make
+simple @code{awk} programs so powerful.
+
+@cindex @code{$} (field operator)
+@cindex operators, @code{$}
+To refer to a field in an @code{awk} program, you use a dollar-sign,
+@samp{$}, followed by the number of the field you want. Thus, @code{$1}
+refers to the first field, @code{$2} to the second, and so on. For
+example, suppose the following is a line of input:@refill
+
+@example
+This seems like a pretty nice example.
+@end example
+
+@noindent
+Here the first field, or @code{$1}, is @samp{This}; the second field, or
+@code{$2}, is @samp{seems}; and so on. Note that the last field,
+@code{$7}, is @samp{example.}. Because there is no space between the
+@samp{e} and the @samp{.}, the period is considered part of the seventh
+field.@refill
+
+No matter how many fields there are, the last field in a record can be
+represented by @code{$NF}. So, in the example above, @code{$NF} would
+be the same as @code{$7}, which is @samp{example.}. Why this works is
+explained below (@pxref{Non-Constant Fields, ,Non-constant Field Numbers}).
+If you try to refer to a field beyond the last one, such as @code{$8}
+when the record has only 7 fields, you get the empty string.@refill
+
+@vindex NF
+@cindex number of fields, @code{NF}
+Plain @code{NF}, with no @samp{$}, is a built-in variable whose value
+is the number of fields in the current record.
+
+@code{$0}, which looks like an attempt to refer to the zeroth field, is
+a special case: it represents the whole input record. This is what you
+would use if you weren't interested in fields.
+
+Here are some more examples:
+
+@example
+awk '$1 ~ /foo/ @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+This example prints each record in the file @file{BBS-list} whose first
+field contains the string @samp{foo}. The operator @samp{~} is called a
+@dfn{matching operator} (@pxref{Comparison Ops, ,Comparison Expressions});
+it tests whether a string (here, the field @code{$1}) matches a given regular
+expression.@refill
+
+By contrast, the following example:
+
+@example
+awk '/foo/ @{ print $1, $NF @}' BBS-list
+@end example
+
+@noindent
+looks for @samp{foo} in @emph{the entire record} and prints the first
+field and the last field for each input record containing a
+match.@refill
+
+@node Non-Constant Fields, Changing Fields, Fields, Reading Files
+@section Non-constant Field Numbers
+
+The number of a field does not need to be a constant. Any expression in
+the @code{awk} language can be used after a @samp{$} to refer to a
+field. The value of the expression specifies the field number. If the
+value is a string, rather than a number, it is converted to a number.
+Consider this example:@refill
+
+@example
+awk '@{ print $NR @}'
+@end example
+
+@noindent
+Recall that @code{NR} is the number of records read so far: 1 in the
+first record, 2 in the second, etc. So this example prints the first
+field of the first record, the second field of the second record, and so
+on. For the twentieth record, field number 20 is printed; most likely,
+the record has fewer than 20 fields, so this prints a blank line.
+
+Here is another example of using expressions as field numbers:
+
+@example
+awk '@{ print $(2*2) @}' BBS-list
+@end example
+
+The @code{awk} language must evaluate the expression @code{(2*2)} and use
+its value as the number of the field to print. The @samp{*} sign
+represents multiplication, so the expression @code{2*2} evaluates to 4.
+The parentheses are used so that the multiplication is done before the
+@samp{$} operation; they are necessary whenever there is a binary
+operator in the field-number expression. This example, then, prints the
+hours of operation (the fourth field) for every line of the file
+@file{BBS-list}.@refill
+
+If the field number you compute is zero, you get the entire record.
+Thus, @code{$(2-2)} has the same value as @code{$0}. Negative field
+numbers are not allowed.
+
+The number of fields in the current record is stored in the built-in
+variable @code{NF} (@pxref{Built-in Variables}). The expression
+@code{$NF} is not a special feature: it is the direct consequence of
+evaluating @code{NF} and using its value as a field number.
+
+@node Changing Fields, Field Separators, Non-Constant Fields, Reading Files
+@section Changing the Contents of a Field
+
+@cindex field, changing contents of
+@cindex changing contents of a field
+@cindex assignment to fields
+You can change the contents of a field as seen by @code{awk} within an
+@code{awk} program; this changes what @code{awk} perceives as the
+current input record. (The actual input is untouched: @code{awk} never
+modifies the input file.)
+
+Consider this example:
+
+@smallexample
+awk '@{ $3 = $2 - 10; print $2, $3 @}' inventory-shipped
+@end smallexample
+
+@noindent
+The @samp{-} sign represents subtraction, so this program reassigns
+field three, @code{$3}, to be the value of field two minus ten,
+@code{$2 - 10}. (@xref{Arithmetic Ops, ,Arithmetic Operators}.)
+Then field two, and the new value for field three, are printed.
+
+In order for this to work, the text in field @code{$2} must make sense
+as a number; the string of characters must be converted to a number in
+order for the computer to do arithmetic on it. The number resulting
+from the subtraction is converted back to a string of characters which
+then becomes field three.
+@xref{Conversion, ,Conversion of Strings and Numbers}.@refill
+
+When you change the value of a field (as perceived by @code{awk}), the
+text of the input record is recalculated to contain the new field where
+the old one was. Therefore, @code{$0} changes to reflect the altered
+field. Thus,
+
+@smallexample
+awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped
+@end smallexample
+
+@noindent
+prints a copy of the input file, with 10 subtracted from the second
+field of each line.
+
+You can also assign contents to fields that are out of range. For
+example:
+
+@smallexample
+awk '@{ $6 = ($5 + $4 + $3 + $2) ; print $6 @}' inventory-shipped
+@end smallexample
+
+@noindent
+We've just created @code{$6}, whose value is the sum of fields
+@code{$2}, @code{$3}, @code{$4}, and @code{$5}. The @samp{+} sign
+represents addition. For the file @file{inventory-shipped}, @code{$6}
+represents the total number of parcels shipped for a particular month.
+
+Creating a new field changes the internal @code{awk} copy of the current
+input record---the value of @code{$0}. Thus, if you do @samp{print $0}
+after adding a field, the record printed includes the new field, with
+the appropriate number of field separators between it and the previously
+existing fields.
+
+This recomputation affects and is affected by several features not yet
+discussed, in particular, the @dfn{output field separator}, @code{OFS},
+which is used to separate the fields (@pxref{Output Separators}), and
+@code{NF} (the number of fields; @pxref{Fields, ,Examining Fields}).
+For example, the value of @code{NF} is set to the number of the highest
+field you create.@refill
+
+Note, however, that merely @emph{referencing} an out-of-range field
+does @emph{not} change the value of either @code{$0} or @code{NF}.
+Referencing an out-of-range field merely produces a null string. For
+example:@refill
+
+@smallexample
+if ($(NF+1) != "")
+ print "can't happen"
+else
+ print "everything is normal"
+@end smallexample
+
+@noindent
+should print @samp{everything is normal}, because @code{NF+1} is certain
+to be out of range. (@xref{If Statement, ,The @code{if} Statement},
+for more information about @code{awk}'s @code{if-else} statements.)@refill
+
+It is important to note that assigning to a field will change the
+value of @code{$0}, but will not change the value of @code{NF},
+even when you assign the null string to a field. For example:
+
+@smallexample
+echo a b c d | awk '@{ OFS = ":"; $2 = "" ; print ; print NF @}'
+@end smallexample
+
+@noindent
+prints
+
+@smallexample
+a::c:d
+4
+@end smallexample
+
+@noindent
+The field is still there, it just has an empty value. You can tell
+because there are two colons in a row.
+
+@node Field Separators, Constant Size, Changing Fields, Reading Files
+@section Specifying how Fields are Separated
+@vindex FS
+@cindex fields, separating
+@cindex field separator, @code{FS}
+@cindex @samp{-F} option
+
+(This section is rather long; it describes one of the most fundamental
+operations in @code{awk}. If you are a novice with @code{awk}, we
+recommend that you re-read this section after you have studied the
+section on regular expressions, @ref{Regexp, ,Regular Expressions as Patterns}.)
+
+The way @code{awk} splits an input record into fields is controlled by
+the @dfn{field separator}, which is a single character or a regular
+expression. @code{awk} scans the input record for matches for the
+separator; the fields themselves are the text between the matches. For
+example, if the field separator is @samp{oo}, then the following line:
+
+@smallexample
+moo goo gai pan
+@end smallexample
+
+@noindent
+would be split into three fields: @samp{m}, @samp{@ g} and @samp{@ gai@
+pan}.
+
+The field separator is represented by the built-in variable @code{FS}.
+Shell programmers take note! @code{awk} does not use the name @code{IFS}
+which is used by the shell.@refill
+
+You can change the value of @code{FS} in the @code{awk} program with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+Often the right time to do this is at the beginning of execution,
+before any input has been processed, so that the very first record
+will be read with the proper separator. To do this, use the special
+@code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).
+For example, here we set the value of @code{FS} to the string
+@code{","}:@refill
+
+@smallexample
+awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}'
+@end smallexample
+
+@noindent
+Given the input line,
+
+@smallexample
+John Q. Smith, 29 Oak St., Walamazoo, MI 42139
+@end smallexample
+
+@noindent
+this @code{awk} program extracts the string @samp{@ 29 Oak St.}.
+
+@cindex field separator, choice of
+@cindex regular expressions as field separators
+Sometimes your input data will contain separator characters that don't
+separate fields the way you thought they would. For instance, the
+person's name in the example we've been using might have a title or
+suffix attached, such as @samp{John Q. Smith, LXIX}. From input
+containing such a name:
+
+@smallexample
+John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139
+@end smallexample
+
+@noindent
+the previous sample program would extract @samp{@ LXIX}, instead of
+@samp{@ 29 Oak St.}. If you were expecting the program to print the
+address, you would be surprised. So choose your data layout and
+separator characters carefully to prevent such problems.
+
+As you know, by default, fields are separated by whitespace sequences
+(spaces and tabs), not by single spaces: two spaces in a row do not
+delimit an empty field. The default value of the field separator is a
+string @w{@code{" "}} containing a single space. If this value were
+interpreted in the usual way, each space character would separate
+fields, so two spaces in a row would make an empty field between them.
+The reason this does not happen is that a single space as the value of
+@code{FS} is a special case: it is taken to specify the default manner
+of delimiting fields.
+
+If @code{FS} is any other single character, such as @code{","}, then
+each occurrence of that character separates two fields. Two consecutive
+occurrences delimit an empty field. If the character occurs at the
+beginning or the end of the line, that too delimits an empty field. The
+space character is the only single character which does not follow these
+rules.
+
+More generally, the value of @code{FS} may be a string containing any
+regular expression. Then each match in the record for the regular
+expression separates fields. For example, the assignment:@refill
+
+@smallexample
+FS = ", \t"
+@end smallexample
+
+@noindent
+makes every area of an input line that consists of a comma followed by a
+space and a tab, into a field separator. (@samp{\t} stands for a
+tab.)@refill
+
+For a less trivial example of a regular expression, suppose you want
+single spaces to separate fields the way single commas were used above.
+You can set @code{FS} to @w{@code{"[@ ]"}}. This regular expression
+matches a single space and nothing else.
+
+@c the following index entry is an overfull hbox. --mew 30jan1992
+@cindex field separator: on command line
+@cindex command line, setting @code{FS} on
+@code{FS} can be set on the command line. You use the @samp{-F} argument to
+do so. For example:
+
+@smallexample
+awk -F, '@var{program}' @var{input-files}
+@end smallexample
+
+@noindent
+sets @code{FS} to be the @samp{,} character. Notice that the argument uses
+a capital @samp{F}. Contrast this with @samp{-f}, which specifies a file
+containing an @code{awk} program. Case is significant in command options:
+the @samp{-F} and @samp{-f} options have nothing to do with each other.
+You can use both options at the same time to set the @code{FS} argument
+@emph{and} get an @code{awk} program from a file.@refill
+
+@c begin expert info
+The value used for the argument to @samp{-F} is processed in exactly the
+same way as assignments to the built-in variable @code{FS}. This means that
+if the field separator contains special characters, they must be escaped
+appropriately. For example, to use a @samp{\} as the field separator, you
+would have to type:
+
+@smallexample
+# same as FS = "\\"
+awk -F\\\\ '@dots{}' files @dots{}
+@end smallexample
+
+@noindent
+Since @samp{\} is used for quoting in the shell, @code{awk} will see
+@samp{-F\\}. Then @code{awk} processes the @samp{\\} for escape
+characters (@pxref{Constants, ,Constant Expressions}), finally yielding
+a single @samp{\} to be used for the field separator.
+@c end expert info
+
+As a special case, in compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), if the
+argument to @samp{-F} is @samp{t}, then @code{FS} is set to the tab
+character. (This is because if you type @samp{-F\t}, without the quotes,
+at the shell, the @samp{\} gets deleted, so @code{awk} figures that you
+really want your fields to be separated with tabs, and not @samp{t}s.
+Use @samp{-v FS="t"} on the command line if you really do want to separate
+your fields with @samp{t}s.)@refill
+
+For example, let's use an @code{awk} program file called @file{baud.awk}
+that contains the pattern @code{/300/}, and the action @samp{print $1}.
+Here is the program:
+
+@smallexample
+/300/ @{ print $1 @}
+@end smallexample
+
+Let's also set @code{FS} to be the @samp{-} character, and run the
+program on the file @file{BBS-list}. The following command prints a
+list of the names of the bulletin boards that operate at 300 baud and
+the first three digits of their phone numbers:@refill
+
+@smallexample
+awk -F- -f baud.awk BBS-list
+@end smallexample
+
+@noindent
+It produces this output:
+
+@smallexample
+aardvark 555
+alpo
+barfly 555
+bites 555
+camelot 555
+core 555
+fooey 555
+foot 555
+macfoo 555
+sdace 555
+sabafoo 555
+@end smallexample
+
+@noindent
+Note the second line of output. If you check the original file, you will
+see that the second line looked like this:
+
+@smallexample
+alpo-net 555-3412 2400/1200/300 A
+@end smallexample
+
+The @samp{-} as part of the system's name was used as the field
+separator, instead of the @samp{-} in the phone number that was
+originally intended. This demonstrates why you have to be careful in
+choosing your field and record separators.
+
+The following program searches the system password file, and prints
+the entries for users who have no password:
+
+@smallexample
+awk -F: '$2 == ""' /etc/passwd
+@end smallexample
+
+@noindent
+Here we use the @samp{-F} option on the command line to set the field
+separator. Note that fields in @file{/etc/passwd} are separated by
+colons. The second field represents a user's encrypted password, but if
+the field is empty, that user has no password.
+
+@c begin expert info
+According to the @sc{posix} standard, @code{awk} is supposed to behave
+as if each record is split into fields at the time that it is read.
+In particular, this means that you can change the value of @code{FS}
+after a record is read, but before any of the fields are referenced.
+The value of the fields (i.e. how they were split) should reflect the
+old value of @code{FS}, not the new one.
+
+However, many implementations of @code{awk} do not do this. Instead,
+they defer splitting the fields until a field reference actually happens,
+using the @emph{current} value of @code{FS}! This behavior can be difficult
+to diagnose. The following example illustrates the results of the two methods.
+(The @code{sed} command prints just the first line of @file{/etc/passwd}.)
+
+@smallexample
+sed 1q /etc/passwd | awk '@{ FS = ":" ; print $1 @}'
+@end smallexample
+
+@noindent
+will usually print
+
+@smallexample
+root
+@end smallexample
+
+@noindent
+on an incorrect implementation of @code{awk}, while @code{gawk}
+will print something like
+
+@smallexample
+root:nSijPlPhZZwgE:0:0:Root:/:
+@end smallexample
+@c end expert info
+
+@c begin expert info
+There is an important difference between the two cases of @samp{FS = @w{" "}}
+(a single blank) and @samp{FS = @w{"[ \t]+"}} (which is a regular expression
+matching one or more blanks or tabs). For both values of @code{FS}, fields
+are separated by runs of blanks and/or tabs. However, when the value of
+@code{FS} is @code{" "}, @code{awk} will strip leading and trailing whitespace
+from the record, and then decide where the fields are.
+
+For example, the following expression prints @samp{b}:
+
+@smallexample
+echo ' a b c d ' | awk '@{ print $2 @}'
+@end smallexample
+
+@noindent
+However, the following prints @samp{a}:
+
+@smallexample
+echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t]+" @} ; @{ print $2 @}'
+@end smallexample
+
+@noindent
+In this case, the first field is null.
+
+The stripping of leading and trailing whitespace also comes into
+play whenever @code{$0} is recomputed. For instance, this pipeline
+
+@smallexample
+echo ' a b c d' | awk '@{ print; $2 = $2; print @}'
+@end smallexample
+
+@noindent
+produces this output:
+
+@smallexample
+ a b c d
+a b c d
+@end smallexample
+
+@noindent
+The first @code{print} statement prints the record as it was read,
+with leading whitespace intact. The assignment to @code{$2} rebuilds
+@code{$0} by concatenating @code{$1} through @code{$NF} together,
+separated by the value of @code{OFS}. Since the leading whitespace
+was ignored when finding @code{$1}, it is not part of the new @code{$0}.
+Finally, the last @code{print} statement prints the new @code{$0}.
+@c end expert info
+
+The following table summarizes how fields are split, based on the
+value of @code{FS}.
+
+@table @code
+@item FS == " "
+Fields are separated by runs of whitespace. Leading and trailing
+whitespace are ignored. This is the default.
+
+@item FS == @var{any single character}
+Fields are separated by each occurrence of the character. Multiple
+successive occurrences delimit empty fields, as do leading and
+trailing occurrences.
+
+@item FS == @var{regexp}
+Fields are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty fields.
+@end table
+
+@node Constant Size, Multiple Line, Field Separators, Reading Files
+@section Reading Fixed-width Data
+
+(This section discusses an advanced, experimental feature. If you are
+a novice @code{awk} user, you may wish to skip it on the first reading.)
+
+@code{gawk} 2.13 introduced a new facility for dealing with fixed-width fields
+with no distinctive field separator. Data of this nature arises typically
+in one of at least two ways: the input for old FORTRAN programs where
+numbers are run together, and the output of programs that did not anticipate
+the use of their output as input for other programs.
+
+An example of the latter is a table where all the columns are lined up by
+the use of a variable number of spaces and @emph{empty fields are just
+spaces}. Clearly, @code{awk}'s normal field splitting based on @code{FS}
+will not work well in this case. (Although a portable @code{awk} program
+can use a series of @code{substr} calls on @code{$0}, this is awkward and
+inefficient for a large number of fields.)@refill
+
+The splitting of an input record into fixed-width fields is specified by
+assigning a string containing space-separated numbers to the built-in
+variable @code{FIELDWIDTHS}. Each number specifies the width of the field
+@emph{including} columns between fields. If you want to ignore the columns
+between fields, you can specify the width as a separate field that is
+subsequently ignored.
+
+The following data is the output of the @code{w} utility. It is useful
+to illustrate the use of @code{FIELDWIDTHS}.
+
+@smallexample
+ 10:06pm up 21 days, 14:04, 23 users
+User tty login@ idle JCPU PCPU what
+hzuo ttyV0 8:58pm 9 5 vi p24.tex
+hzang ttyV3 6:37pm 50 -csh
+eklye ttyV5 9:53pm 7 1 em thes.tex
+dportein ttyV6 8:17pm 1:47 -csh
+gierd ttyD3 10:00pm 1 elm
+dave ttyD4 9:47pm 4 4 w
+brent ttyp0 26Jun91 4:46 26:46 4:41 bash
+dave ttyq4 26Jun9115days 46 46 wnewmail
+@end smallexample
+
+The following program takes the above input, converts the idle time to
+number of seconds and prints out the first two fields and the calculated
+idle time. (This program uses a number of @code{awk} features that
+haven't been introduced yet.)@refill
+
+@smallexample
+BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
+NR > 2 @{
+ idle = $4
+ sub(/^ */, "", idle) # strip leading spaces
+ if (idle == "") idle = 0
+ if (idle ~ /:/) @{ split(idle, t, ":"); idle = t[1] * 60 + t[2] @}
+ if (idle ~ /days/) @{ idle *= 24 * 60 * 60 @}
+
+ print $1, $2, idle
+@}
+@end smallexample
+
+Here is the result of running the program on the data:
+
+@smallexample
+hzuo ttyV0 0
+hzang ttyV3 50
+eklye ttyV5 0
+dportein ttyV6 107
+gierd ttyD3 1
+dave ttyD4 0
+brent ttyp0 286
+dave ttyq4 1296000
+@end smallexample
+
+Another (possibly more practical) example of fixed-width input data
+would be the input from a deck of balloting cards. In some parts of
+the United States, voters make their choices by punching holes in computer
+cards. These cards are then processed to count the votes for any particular
+candidate or on any particular issue. Since a voter may choose not to
+vote on some issue, any column on the card may be empty. An @code{awk}
+program for processing such data could use the @code{FIELDWIDTHS} feature
+to simplify reading the data.@refill
+
+@c of course, getting gawk to run on a system with card readers is
+@c another story!
+
+This feature is still experimental, and will likely evolve over time.
+
+@node Multiple Line, Getline, Constant Size, Reading Files
+@section Multiple-Line Records
+
+@cindex multiple line records
+@cindex input, multiple line records
+@cindex reading files, multiple line records
+@cindex records, multiple line
+In some data bases, a single line cannot conveniently hold all the
+information in one entry. In such cases, you can use multi-line
+records.
+
+The first step in doing this is to choose your data format: when records
+are not defined as single lines, how do you want to define them?
+What should separate records?
+
+One technique is to use an unusual character or string to separate
+records. For example, you could use the formfeed character (written
+@code{\f} in @code{awk}, as in C) to separate them, making each record
+a page of the file. To do this, just set the variable @code{RS} to
+@code{"\f"} (a string containing the formfeed character). Any
+other character could equally well be used, as long as it won't be part
+of the data in a record.@refill
+
+@ignore
+Another technique is to have blank lines separate records. The string
+@code{"^\n+"} is a regular expression that matches any sequence of
+newlines starting at the beginning of a line---in other words, it
+matches a sequence of blank lines. If you set @code{RS} to this string,
+a record always ends at the first blank line encountered. In
+addition, a regular expression always matches the longest possible
+sequence when there is a choice. So the next record doesn't start until
+the first nonblank line that follows---no matter how many blank lines
+appear in a row, they are considered one record-separator.
+@end ignore
+
+Another technique is to have blank lines separate records. By a special
+dispensation, a null string as the value of @code{RS} indicates that
+records are separated by one or more blank lines. If you set @code{RS}
+to the null string, a record always ends at the first blank line
+encountered. And the next record doesn't start until the first nonblank
+line that follows---no matter how many blank lines appear in a row, they
+are considered one record-separator. (End of file is also considered
+a record separator.)@refill
+@c !!! This use of `end of file' is confusing. Needs to be clarified.
+
+The second step is to separate the fields in the record. One way to do
+this is to put each field on a separate line: to do this, just set the
+variable @code{FS} to the string @code{"\n"}. (This simple regular
+expression matches a single newline.)
+
+Another way to separate fields is to divide each of the lines into fields
+in the normal manner. This happens by default as a result of a special
+feature: when @code{RS} is set to the null string, the newline character
+@emph{always} acts as a field separator. This is in addition to whatever
+field separations result from @code{FS}.
+
+The original motivation for this special exception was probably so that
+you get useful behavior in the default case (i.e., @w{@code{FS == " "}}).
+This feature can be a problem if you really don't want the
+newline character to separate fields, since there is no way to
+prevent it. However, you can work around this by using the @code{split}
+function to break up the record manually
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
+
+@ignore
+Here are two ways to use records separated by blank lines and break each
+line into fields normally:
+
+@example
+awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} @{ print $1 @}' BBS-list
+
+@exdent @r{or}
+
+awk 'BEGIN @{ RS = "^\n+"; FS = "[ \t\n]+" @} @{ print $1 @}' BBS-list
+@end example
+@end ignore
+
+@ignore
+Here is how to use records separated by blank lines and break each
+line into fields normally:
+
+@example
+awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} ; @{ print $1 @}' BBS-list
+@end example
+@end ignore
+
+@node Getline, Close Input, Multiple Line, Reading Files
+@section Explicit Input with @code{getline}
+
+@findex getline
+@cindex input, explicit
+@cindex explicit input
+@cindex input, @code{getline} command
+@cindex reading files, @code{getline} command
+So far we have been getting our input files from @code{awk}'s main
+input stream---either the standard input (usually your terminal) or the
+files specified on the command line. The @code{awk} language has a
+special built-in command called @code{getline} that
+can be used to read input under your explicit control.@refill
+
+This command is quite complex and should @emph{not} be used by
+beginners. It is covered here because this is the chapter on input.
+The examples that follow the explanation of the @code{getline} command
+include material that has not been covered yet. Therefore, come back
+and study the @code{getline} command @emph{after} you have reviewed the
+rest of this manual and have a good knowledge of how @code{awk} works.
+
+@vindex ERRNO
+@cindex differences: @code{gawk} and @code{awk}
+@code{getline} returns 1 if it finds a record, and 0 if the end of the
+file is encountered. If there is some error in getting a record, such
+as a file that cannot be opened, then @code{getline} returns @minus{}1.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+In the following examples, @var{command} stands for a string value that
+represents a shell command.
+
+@table @code
+@item getline
+The @code{getline} command can be used without arguments to read input
+from the current input file. All it does in this case is read the next
+input record and split it up into fields. This is useful if you've
+finished processing the current record, but you want to do some special
+processing @emph{right now} on the next record. Here's an
+example:@refill
+
+@example
+awk '@{
+ if (t = index($0, "/*")) @{
+ if (t > 1)
+ tmp = substr($0, 1, t - 1)
+ else
+ tmp = ""
+ u = index(substr($0, t + 2), "*/")
+ while (u == 0) @{
+ getline
+ t = -1
+ u = index($0, "*/")
+ @}
+ if (u <= length($0) - 2)
+ $0 = tmp substr($0, t + u + 3)
+ else
+ $0 = tmp
+ @}
+ print $0
+@}'
+@end example
+
+This @code{awk} program deletes all C-style comments, @samp{/* @dots{}
+*/}, from the input. By replacing the @samp{print $0} with other
+statements, you could perform more complicated processing on the
+decommented input, like searching for matches of a regular
+expression. (This program has a subtle problem---can you spot it?)
+
+@c the program to remove comments doesn't work if one
+@c comment ends and another begins on the same line. (Your
+@c idea for restart would be useful here). --- brennan@boeing.com
+
+This form of the @code{getline} command sets @code{NF} (the number of
+fields; @pxref{Fields, ,Examining Fields}), @code{NR} (the number of
+records read so far; @pxref{Records, ,How Input is Split into Records}),
+@code{FNR} (the number of records read from this input file), and the
+value of @code{$0}.
+
+@strong{Note:} the new value of @code{$0} is used in testing
+the patterns of any subsequent rules. The original value
+of @code{$0} that triggered the rule which executed @code{getline}
+is lost. By contrast, the @code{next} statement reads a new record
+but immediately begins processing it normally, starting with the first
+rule in the program. @xref{Next Statement, ,The @code{next} Statement}.
+
+@item getline @var{var}
+This form of @code{getline} reads a record into the variable @var{var}.
+This is useful when you want your program to read the next record from
+the current input file, but you don't want to subject the record to the
+normal input processing.
+
+For example, suppose the next line is a comment, or a special string,
+and you want to read it, but you must make certain that it won't trigger
+any rules. This version of @code{getline} allows you to read that line
+and store it in a variable so that the main
+read-a-line-and-check-each-rule loop of @code{awk} never sees it.
+
+The following example swaps every two lines of input. For example, given:
+
+@example
+wan
+tew
+free
+phore
+@end example
+
+@noindent
+it outputs:
+
+@example
+tew
+wan
+phore
+free
+@end example
+
+@noindent
+Here's the program:
+
+@example
+@group
+awk '@{
+ if ((getline tmp) > 0) @{
+ print tmp
+ print $0
+ @} else
+ print $0
+@}'
+@end group
+@end example
+
+The @code{getline} function used in this way sets only the variables
+@code{NR} and @code{FNR} (and of course, @var{var}). The record is not
+split into fields, so the values of the fields (including @code{$0}) and
+the value of @code{NF} do not change.@refill
+
+@item getline < @var{file}
+@cindex input redirection
+@cindex redirection of input
+This form of the @code{getline} function takes its input from the file
+@var{file}. Here @var{file} is a string-valued expression that
+specifies the file name. @samp{< @var{file}} is called a @dfn{redirection}
+since it directs input to come from a different place.
+
+This form is useful if you want to read your input from a particular
+file, instead of from the main input stream. For example, the following
+program reads its input record from the file @file{foo.input} when it
+encounters a first field with a value equal to 10 in the current input
+file.@refill
+
+@example
+awk '@{
+ if ($1 == 10) @{
+ getline < "foo.input"
+ print
+ @} else
+ print
+@}'
+@end example
+
+Since the main input stream is not used, the values of @code{NR} and
+@code{FNR} are not changed. But the record read is split into fields in
+the normal manner, so the values of @code{$0} and other fields are
+changed. So is the value of @code{NF}.
+
+This does not cause the record to be tested against all the patterns
+in the @code{awk} program, in the way that would happen if the record
+were read normally by the main processing loop of @code{awk}. However
+the new record is tested against any subsequent rules, just as when
+@code{getline} is used without a redirection.
+
+@item getline @var{var} < @var{file}
+This form of the @code{getline} function takes its input from the file
+@var{file} and puts it in the variable @var{var}. As above, @var{file}
+is a string-valued expression that specifies the file from which to read.
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields. The only variable
+changed is @var{var}.
+
+For example, the following program copies all the input files to the
+output, except for records that say @w{@samp{@@include @var{filename}}}.
+Such a record is replaced by the contents of the file
+@var{filename}.@refill
+
+@example
+awk '@{
+ if (NF == 2 && $1 == "@@include") @{
+ while ((getline line < $2) > 0)
+ print line
+ close($2)
+ @} else
+ print
+@}'
+@end example
+
+Note here how the name of the extra input file is not built into
+the program; it is taken from the data, from the second field on
+the @samp{@@include} line.@refill
+
+The @code{close} function is called to ensure that if two identical
+@samp{@@include} lines appear in the input, the entire specified file is
+included twice. @xref{Close Input, ,Closing Input Files and Pipes}.@refill
+
+One deficiency of this program is that it does not process nested
+@samp{@@include} statements the way a true macro preprocessor would.
+
+@item @var{command} | getline
+You can @dfn{pipe} the output of a command into @code{getline}. A pipe is
+simply a way to link the output of one program to the input of another. In
+this case, the string @var{command} is run as a shell command and its output
+is piped into @code{awk} to be used as input. This form of @code{getline}
+reads one record from the pipe.
+
+For example, the following program copies input to output, except for lines
+that begin with @samp{@@execute}, which are replaced by the output produced by
+running the rest of the line as a shell command:
+
+@example
+awk '@{
+ if ($1 == "@@execute") @{
+ tmp = substr($0, 10)
+ while ((tmp | getline) > 0)
+ print
+ close(tmp)
+ @} else
+ print
+@}'
+@end example
+
+@noindent
+The @code{close} function is called to ensure that if two identical
+@samp{@@execute} lines appear in the input, the command is run for
+each one. @xref{Close Input, ,Closing Input Files and Pipes}.
+
+Given the input:
+
+@example
+foo
+bar
+baz
+@@execute who
+bletch
+@end example
+
+@noindent
+the program might produce:
+
+@example
+foo
+bar
+baz
+hack ttyv0 Jul 13 14:22
+hack ttyp0 Jul 13 14:23 (gnu:0)
+hack ttyp1 Jul 13 14:23 (gnu:0)
+hack ttyp2 Jul 13 14:23 (gnu:0)
+hack ttyp3 Jul 13 14:23 (gnu:0)
+bletch
+@end example
+
+@noindent
+Notice that this program ran the command @code{who} and printed the result.
+(If you try this program yourself, you will get different results, showing
+you who is logged in on your system.)
+
+This variation of @code{getline} splits the record into fields, sets the
+value of @code{NF} and recomputes the value of @code{$0}. The values of
+@code{NR} and @code{FNR} are not changed.
+
+@item @var{command} | getline @var{var}
+The output of the command @var{command} is sent through a pipe to
+@code{getline} and into the variable @var{var}. For example, the
+following program reads the current date and time into the variable
+@code{current_time}, using the @code{date} utility, and then
+prints it.@refill
+
+@example
+awk 'BEGIN @{
+ "date" | getline current_time
+ close("date")
+ print "Report printed on " current_time
+@}'
+@end example
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields.
+@end table
+
+@node Close Input, , Getline, Reading Files
+@section Closing Input Files and Pipes
+@cindex closing input files and pipes
+@findex close
+
+If the same file name or the same shell command is used with
+@code{getline} more than once during the execution of an @code{awk}
+program, the file is opened (or the command is executed) only the first time.
+At that time, the first record of input is read from that file or command.
+The next time the same file or command is used in @code{getline}, another
+record is read from it, and so on.
+
+This implies that if you want to start reading the same file again from
+the beginning, or if you want to rerun a shell command (rather than
+reading more output from the command), you must take special steps.
+What you must do is use the @code{close} function, as follows:
+
+@example
+close(@var{filename})
+@end example
+
+@noindent
+or
+
+@example
+close(@var{command})
+@end example
+
+The argument @var{filename} or @var{command} can be any expression. Its
+value must exactly equal the string that was used to open the file or
+start the command---for example, if you open a pipe with this:
+
+@example
+"sort -r names" | getline foo
+@end example
+
+@noindent
+then you must close it with this:
+
+@example
+close("sort -r names")
+@end example
+
+Once this function call is executed, the next @code{getline} from that
+file or command will reopen the file or rerun the command.
+
+@iftex
+@vindex ERRNO
+@cindex differences: @code{gawk} and @code{awk}
+@end iftex
+@code{close} returns a value of zero if the close succeeded.
+Otherwise, the value will be non-zero.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+@node Printing, One-liners, Reading Files, Top
+@chapter Printing Output
+
+@cindex printing
+@cindex output
+One of the most common things that actions do is to output or @dfn{print}
+some or all of the input. For simple output, use the @code{print}
+statement. For fancier formatting use the @code{printf} statement.
+Both are described in this chapter.
+
+@menu
+* Print:: The @code{print} statement.
+* Print Examples:: Simple examples of @code{print} statements.
+* Output Separators:: The output separators and how to change them.
+* OFMT:: Controlling Numeric Output With @code{print}.
+* Printf:: The @code{printf} statement.
+* Redirection:: How to redirect output to multiple
+ files and pipes.
+* Special Files:: File name interpretation in @code{gawk}.
+ @code{gawk} allows access to
+ inherited file descriptors.
+@end menu
+
+@node Print, Print Examples, Printing, Printing
+@section The @code{print} Statement
+@cindex @code{print} statement
+
+The @code{print} statement does output with simple, standardized
+formatting. You specify only the strings or numbers to be printed, in a
+list separated by commas. They are output, separated by single spaces,
+followed by a newline. The statement looks like this:
+
+@example
+print @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of items may optionally be enclosed in parentheses. The
+parentheses are necessary if any of the item expressions uses a
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+The relational operators are @samp{==},
+@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
+@samp{!~} (@pxref{Comparison Ops, ,Comparison Expressions}).@refill
+
+The items printed can be constant strings or numbers, fields of the
+current record (such as @code{$1}), variables, or any @code{awk}
+expressions. The @code{print} statement is completely general for
+computing @emph{what} values to print. With two exceptions,
+you cannot specify @emph{how} to print them---how many
+columns, whether to use exponential notation or not, and so on.
+(@xref{Output Separators}, and
+@ref{OFMT, ,Controlling Numeric Output with @code{print}}.)
+For that, you need the @code{printf} statement
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
+
+The simple statement @samp{print} with no items is equivalent to
+@samp{print $0}: it prints the entire current record. To print a blank
+line, use @samp{print ""}, where @code{""} is the null, or empty,
+string.
+
+To print a fixed piece of text, use a string constant such as
+@w{@code{"Hello there"}} as one item. If you forget to use the
+double-quote characters, your text will be taken as an @code{awk}
+expression, and you will probably get an error. Keep in mind that a
+space is printed between any two items.
+
+Most often, each @code{print} statement makes one line of output. But it
+isn't limited to one line. If an item value is a string that contains a
+newline, the newline is output along with the rest of the string. A
+single @code{print} can make any number of lines this way.
+
+@node Print Examples, Output Separators, Print, Printing
+@section Examples of @code{print} Statements
+
+Here is an example of printing a string that contains embedded newlines:
+
+@example
+awk 'BEGIN @{ print "line one\nline two\nline three" @}'
+@end example
+
+@noindent
+produces output like this:
+
+@example
+line one
+line two
+line three
+@end example
+
+Here is an example that prints the first two fields of each input record,
+with a space between them:
+
+@example
+awk '@{ print $1, $2 @}' inventory-shipped
+@end example
+
+@noindent
+Its output looks like this:
+
+@example
+Jan 13
+Feb 15
+Mar 15
+@dots{}
+@end example
+
+A common mistake in using the @code{print} statement is to omit the comma
+between two items. This often has the effect of making the items run
+together in the output, with no space. The reason for this is that
+juxtaposing two string expressions in @code{awk} means to concatenate
+them. For example, without the comma:
+
+@example
+awk '@{ print $1 $2 @}' inventory-shipped
+@end example
+
+@noindent
+prints:
+
+@example
+@group
+Jan13
+Feb15
+Mar15
+@dots{}
+@end group
+@end example
+
+Neither example's output makes much sense to someone unfamiliar with the
+file @file{inventory-shipped}. A heading line at the beginning would make
+it clearer. Let's add some headings to our table of months (@code{$1}) and
+green crates shipped (@code{$2}). We do this using the @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}) to force the headings to be printed only once:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, $2 @}' inventory-shipped
+@end example
+
+@noindent
+Did you already guess what happens? This program prints the following:
+
+@example
+@group
+Month Crates
+----- ------
+Jan 13
+Feb 15
+Mar 15
+@dots{}
+@end group
+@end example
+
+@noindent
+The headings and the table data don't line up! We can fix this by printing
+some spaces between the two fields:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, " ", $2 @}' inventory-shipped
+@end example
+
+You can imagine that this way of lining up columns can get pretty
+complicated when you have many columns to fix. Counting spaces for two
+or three columns can be simple, but more than this and you can get
+``lost'' quite easily. This is why the @code{printf} statement was
+created (@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing});
+one of its specialties is lining up columns of data.@refill
+
+@node Output Separators, OFMT, Print Examples, Printing
+@section Output Separators
+
+@cindex output field separator, @code{OFS}
+@vindex OFS
+@vindex ORS
+@cindex output record separator, @code{ORS}
+As mentioned previously, a @code{print} statement contains a list
+of items, separated by commas. In the output, the items are normally
+separated by single spaces. But they do not have to be spaces; a
+single space is only the default. You can specify any string of
+characters to use as the @dfn{output field separator} by setting the
+built-in variable @code{OFS}. The initial value of this variable
+is the string @w{@code{" "}}, that is, just a single space.@refill
+
+The output from an entire @code{print} statement is called an
+@dfn{output record}. Each @code{print} statement outputs one output
+record and then outputs a string called the @dfn{output record separator}.
+The built-in variable @code{ORS} specifies this string. The initial
+value of the variable is the string @code{"\n"} containing a newline
+character; thus, normally each @code{print} statement makes a separate line.
+
+You can change how output fields and records are separated by assigning
+new values to the variables @code{OFS} and/or @code{ORS}. The usual
+place to do this is in the @code{BEGIN} rule
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}), so
+that it happens before any input is processed. You may also do this
+with assignments on the command line, before the names of your input
+files.@refill
+
+The following example prints the first and second fields of each input
+record separated by a semicolon, with a blank line added after each
+line:@refill
+
+@example
+@group
+awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}
+ @{ print $1, $2 @}' BBS-list
+@end group
+@end example
+
+If the value of @code{ORS} does not contain a newline, all your output
+will be run together on a single line, unless you output newlines some
+other way.
+
+@node OFMT, Printf, Output Separators, Printing
+@section Controlling Numeric Output with @code{print}
+@vindex OFMT
+When you use the @code{print} statement to print numeric values,
+@code{awk} internally converts the number to a string of characters,
+and prints that string. @code{awk} uses the @code{sprintf} function
+to do this conversion. For now, it suffices to say that the @code{sprintf}
+function accepts a @dfn{format specification} that tells it how to format
+numbers (or strings), and that there are a number of different ways that
+numbers can be formatted. The different format specifications are discussed
+more fully in
+@ref{Printf, ,Using @code{printf} Statements for Fancier Printing}.@refill
+
+The built-in variable @code{OFMT} contains the default format specification
+that @code{print} uses with @code{sprintf} when it wants to convert a
+number to a string for printing. By supplying different format specifications
+as the value of @code{OFMT}, you can change how @code{print} will print
+your numbers. As a brief example:
+
+@example
+@group
+awk 'BEGIN @{ OFMT = "%d" # print numbers as integers
+ print 17.23 @}'
+@end group
+@end example
+
+@noindent
+will print @samp{17}.
+
+@node Printf, Redirection, OFMT, Printing
+@section Using @code{printf} Statements for Fancier Printing
+@cindex formatted output
+@cindex output, formatted
+
+If you want more precise control over the output format than
+@code{print} gives you, use @code{printf}. With @code{printf} you can
+specify the width to use for each item, and you can specify various
+stylistic choices for numbers (such as what radix to use, whether to
+print an exponent, whether to print a sign, and how many digits to print
+after the decimal point). You do this by specifying a string, called
+the @dfn{format string}, which controls how and where to print the other
+arguments.
+
+@menu
+* Basic Printf:: Syntax of the @code{printf} statement.
+* Control Letters:: Format-control letters.
+* Format Modifiers:: Format-specification modifiers.
+* Printf Examples:: Several examples.
+@end menu
+
+@node Basic Printf, Control Letters, Printf, Printf
+@subsection Introduction to the @code{printf} Statement
+
+@cindex @code{printf} statement, syntax of
+The @code{printf} statement looks like this:@refill
+
+@example
+printf @var{format}, @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of arguments may optionally be enclosed in parentheses. The
+parentheses are necessary if any of the item expressions uses a
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+The relational operators are @samp{==},
+@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
+@samp{!~} (@pxref{Comparison Ops, ,Comparison Expressions}).@refill
+
+@cindex format string
+The difference between @code{printf} and @code{print} is the argument
+@var{format}. This is an expression whose value is taken as a string; it
+specifies how to output each of the other arguments. It is called
+the @dfn{format string}.
+
+The format string is the same as in the @sc{ansi} C library function
+@code{printf}. Most of @var{format} is text to be output verbatim.
+Scattered among this text are @dfn{format specifiers}, one per item.
+Each format specifier says to output the next item at that place in the
+format.@refill
+
+The @code{printf} statement does not automatically append a newline to its
+output. It outputs only what the format specifies. So if you want
+a newline, you must include one in the format. The output separator
+variables @code{OFS} and @code{ORS} have no effect on @code{printf}
+statements.@refill
+
+@node Control Letters, Format Modifiers, Basic Printf, Printf
+@subsection Format-Control Letters
+@cindex @code{printf}, format-control characters
+@cindex format specifier
+
+A format specifier starts with the character @samp{%} and ends with a
+@dfn{format-control letter}; it tells the @code{printf} statement how
+to output one item. (If you actually want to output a @samp{%}, write
+@samp{%%}.) The format-control letter specifies what kind of value to
+print. The rest of the format specifier is made up of optional
+@dfn{modifiers} which are parameters such as the field width to use.@refill
+
+Here is a list of the format-control letters:
+
+@table @samp
+@item c
+This prints a number as an ASCII character. Thus, @samp{printf "%c",
+65} outputs the letter @samp{A}. The output for a string value is
+the first character of the string.
+
+@item d
+This prints a decimal integer.
+
+@item i
+This also prints a decimal integer.
+
+@item e
+This prints a number in scientific (exponential) notation.
+For example,
+
+@example
+printf "%4.3e", 1950
+@end example
+
+@noindent
+prints @samp{1.950e+03}, with a total of four significant figures of
+which three follow the decimal point. The @samp{4.3} are @dfn{modifiers},
+discussed below.
+
+@item f
+This prints a number in floating point notation.
+
+@item g
+This prints a number in either scientific notation or floating point
+notation, whichever uses fewer characters.
+@ignore
+From: gatech!ames!elroy!cit-vax!EQL.Caltech.Edu!rankin (Pat Rankin)
+
+In the description of printf formats (p.43), the information for %g
+is incorrect (mainly, it's too much of an oversimplification). It's
+wrong in the AWK book too, and in the gawk man page. I suggested to
+David Trueman before 2.13 was released that the latter be revised, so
+that it matched gawk's behavior (rather than trying to change gawk to
+match the docs ;-). The documented description is nice and simple, but
+it doesn't match the actual underlying behavior of %g in the various C
+run-time libraries that gawk relies on. The precision value for g format
+is different than for f and e formats, so it's inaccurate to say 'g' is
+the shorter of 'e' or 'f'. For 'g', precision represents the number of
+significant digits rather than the number of decimal places, and it has
+special rules about how to format numbers with range between 10E-1 and
+10E-4. All in all, it's pretty messy, and I had to add that clumsy
+GFMT_WORKAROUND code because the VMS run-time library doesn't conform to
+the ANSI-C specifications.
+@end ignore
+
+@item o
+This prints an unsigned octal integer.
+
+@item s
+This prints a string.
+
+@item x
+This prints an unsigned hexadecimal integer.
+
+@item X
+This prints an unsigned hexadecimal integer. However, for the values 10
+through 15, it uses the letters @samp{A} through @samp{F} instead of
+@samp{a} through @samp{f}.
+
+@item %
+This isn't really a format-control letter, but it does have a meaning
+when used after a @samp{%}: the sequence @samp{%%} outputs one
+@samp{%}. It does not consume an argument.
+@end table
+
+@node Format Modifiers, Printf Examples, Control Letters, Printf
+@subsection Modifiers for @code{printf} Formats
+
+@cindex @code{printf}, modifiers
+@cindex modifiers (in format specifiers)
+A format specification can also include @dfn{modifiers} that can control
+how much of the item's value is printed and how much space it gets. The
+modifiers come between the @samp{%} and the format-control letter. Here
+are the possible modifiers, in the order in which they may appear:
+
+@table @samp
+@item -
+The minus sign, used before the width modifier, says to left-justify
+the argument within its specified width. Normally the argument
+is printed right-justified in the specified width. Thus,
+
+@example
+printf "%-4s", "foo"
+@end example
+
+@noindent
+prints @samp{foo }.
+
+@item @var{width}
+This is a number representing the desired width of a field. Inserting any
+number between the @samp{%} sign and the format control character forces the
+field to be expanded to this width. The default way to do this is to
+pad with spaces on the left. For example,
+
+@example
+printf "%4s", "foo"
+@end example
+
+@noindent
+prints @samp{ foo}.
+
+The value of @var{width} is a minimum width, not a maximum. If the item
+value requires more than @var{width} characters, it can be as wide as
+necessary. Thus,
+
+@example
+printf "%4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foobar}.
+
+Preceding the @var{width} with a minus sign causes the output to be
+padded with spaces on the right, instead of on the left.
+
+@item .@var{prec}
+This is a number that specifies the precision to use when printing.
+This specifies the number of digits you want printed to the right of the
+decimal point. For a string, it specifies the maximum number of
+characters from the string that should be printed.
+@end table
+
+The C library @code{printf}'s dynamic @var{width} and @var{prec}
+capability (for example, @code{"%*.*s"}) is supported. Instead of
+supplying explicit @var{width} and/or @var{prec} values in the format
+string, you pass them in the argument list. For example:@refill
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "<%*.*s>\n", w, p, s
+@end example
+
+@noindent
+is exactly equivalent to
+
+@example
+s = "abcdefg"
+printf "<%5.3s>\n", s
+@end example
+
+@noindent
+Both programs output @samp{@w{<@bullet{}@bullet{}abc>}}. (We have
+used the bullet symbol ``@bullet{}'' to represent a space, to clearly
+show you that there are two spaces in the output.)@refill
+
+Earlier versions of @code{awk} did not support this capability. You may
+simulate it by using concatenation to build up the format string,
+like so:@refill
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "<%" w "." p "s>\n", s
+@end example
+
+@noindent
+This is not particularly easy to read, however.
+
+@node Printf Examples, , Format Modifiers, Printf
+@subsection Examples of Using @code{printf}
+
+Here is how to use @code{printf} to make an aligned table:
+
+@example
+awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end example
+
+@noindent
+prints the names of bulletin boards (@code{$1}) of the file
+@file{BBS-list} as a string of 10 characters, left justified. It also
+prints the phone numbers (@code{$2}) afterward on the line. This
+produces an aligned two-column table of names and phone numbers:@refill
+
+@example
+@group
+aardvark 555-5553
+alpo-net 555-3412
+barfly 555-7685
+bites 555-1675
+camelot 555-0542
+core 555-2912
+fooey 555-1234
+foot 555-6699
+macfoo 555-6480
+sdace 555-3430
+sabafoo 555-2127
+@end group
+@end example
+
+Did you notice that we did not specify that the phone numbers be printed
+as numbers? They had to be printed as strings because the numbers are
+separated by a dash. This dash would be interpreted as a minus sign if
+we had tried to print the phone numbers as numbers. This would have led
+to some pretty confusing results.
+
+We did not specify a width for the phone numbers because they are the
+last things on their lines. We don't need to put spaces after them.
+
+We could make our table look even nicer by adding headings to the tops
+of the columns. To do this, use the @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns})
+to force the header to be printed only once, at the beginning of
+the @code{awk} program:@refill
+
+@example
+@group
+awk 'BEGIN @{ print "Name Number"
+ print "---- ------" @}
+ @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+Did you notice that we mixed @code{print} and @code{printf} statements in
+the above example? We could have used just @code{printf} statements to get
+the same results:
+
+@example
+@group
+awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
+ printf "%-10s %s\n", "----", "------" @}
+ @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+@noindent
+By outputting each column heading with the same format specification
+used for the elements of the column, we have made sure that the headings
+are aligned just like the columns.
+
+The fact that the same format specification is used three times can be
+emphasized by storing it in a variable, like this:
+
+@example
+awk 'BEGIN @{ format = "%-10s %s\n"
+ printf format, "Name", "Number"
+ printf format, "----", "------" @}
+ @{ printf format, $1, $2 @}' BBS-list
+@end example
+
+See if you can use the @code{printf} statement to line up the headings and
+table data for our @file{inventory-shipped} example covered earlier in the
+section on the @code{print} statement
+(@pxref{Print, ,The @code{print} Statement}).@refill
+
+@node Redirection, Special Files, Printf, Printing
+@section Redirecting Output of @code{print} and @code{printf}
+
+@cindex output redirection
+@cindex redirection of output
+So far we have been dealing only with output that prints to the standard
+output, usually your terminal. Both @code{print} and @code{printf} can
+also send their output to other places.
+This is called @dfn{redirection}.@refill
+
+A redirection appears after the @code{print} or @code{printf} statement.
+Redirections in @code{awk} are written just like redirections in shell
+commands, except that they are written inside the @code{awk} program.
+
+@menu
+* File/Pipe Redirection:: Redirecting Output to Files and Pipes.
+* Close Output:: How to close output files and pipes.
+@end menu
+
+@node File/Pipe Redirection, Close Output, Redirection, Redirection
+@subsection Redirecting Output to Files and Pipes
+
+Here are the three forms of output redirection. They are all shown for
+the @code{print} statement, but they work identically for @code{printf}
+also.@refill
+
+@table @code
+@item print @var{items} > @var{output-file}
+This type of redirection prints the items onto the output file
+@var{output-file}. The file name @var{output-file} can be any
+expression. Its value is changed to a string and then used as a
+file name (@pxref{Expressions, ,Expressions as Action Statements}).@refill
+
+When this type of redirection is used, the @var{output-file} is erased
+before the first output is written to it. Subsequent writes do not
+erase @var{output-file}, but append to it. If @var{output-file} does
+not exist, then it is created.@refill
+
+For example, here is how one @code{awk} program can write a list of
+BBS names to a file @file{name-list} and a list of phone numbers to a
+file @file{phone-list}. Each output file contains one name or number
+per line.
+
+@smallexample
+awk '@{ print $2 > "phone-list"
+ print $1 > "name-list" @}' BBS-list
+@end smallexample
+
+@item print @var{items} >> @var{output-file}
+This type of redirection prints the items onto the output file
+@var{output-file}. The difference between this and the
+single-@samp{>} redirection is that the old contents (if any) of
+@var{output-file} are not erased. Instead, the @code{awk} output is
+appended to the file.
+
+@cindex pipes for output
+@cindex output, piping
+@item print @var{items} | @var{command}
+It is also possible to send output through a @dfn{pipe} instead of into a
+file. This type of redirection opens a pipe to @var{command} and writes
+the values of @var{items} through this pipe, to another process created
+to execute @var{command}.@refill
+
+The redirection argument @var{command} is actually an @code{awk}
+expression. Its value is converted to a string, whose contents give the
+shell command to be run.
+
+For example, this produces two files, one unsorted list of BBS names
+and one list sorted in reverse alphabetical order:
+
+@smallexample
+awk '@{ print $1 > "names.unsorted"
+ print $1 | "sort -r > names.sorted" @}' BBS-list
+@end smallexample
+
+Here the unsorted list is written with an ordinary redirection while
+the sorted list is written by piping through the @code{sort} utility.
+
+Here is an example that uses redirection to mail a message to a mailing
+list @samp{bug-system}. This might be useful when trouble is encountered
+in an @code{awk} script run periodically for system maintenance.
+
+@smallexample
+report = "mail bug-system"
+print "Awk script failed:", $0 | report
+print "at record number", FNR, "of", FILENAME | report
+close(report)
+@end smallexample
+
+We call the @code{close} function here because it's a good idea to close
+the pipe as soon as all the intended output has been sent to it.
+@xref{Close Output, ,Closing Output Files and Pipes}, for more information
+on this. This example also illustrates the use of a variable to represent
+a @var{file} or @var{command}: it is not necessary to always
+use a string constant. Using a variable is generally a good idea,
+since @code{awk} requires you to spell the string value identically
+every time.
+@end table
+
+Redirecting output using @samp{>}, @samp{>>}, or @samp{|} asks the system
+to open a file or pipe only if the particular @var{file} or @var{command}
+you've specified has not already been written to by your program, or if
+it has been closed since it was last written to.@refill
+
+@node Close Output, , File/Pipe Redirection, Redirection
+@subsection Closing Output Files and Pipes
+@cindex closing output files and pipes
+@findex close
+
+When a file or pipe is opened, the file name or command associated with
+it is remembered by @code{awk} and subsequent writes to the same file or
+command are appended to the previous writes. The file or pipe stays
+open until @code{awk} exits. This is usually convenient.
+
+Sometimes there is a reason to close an output file or pipe earlier
+than that. To do this, use the @code{close} function, as follows:
+
+@example
+close(@var{filename})
+@end example
+
+@noindent
+or
+
+@example
+close(@var{command})
+@end example
+
+The argument @var{filename} or @var{command} can be any expression.
+Its value must exactly equal the string used to open the file or pipe
+to begin with---for example, if you open a pipe with this:
+
+@example
+print $1 | "sort -r > names.sorted"
+@end example
+
+@noindent
+then you must close it with this:
+
+@example
+close("sort -r > names.sorted")
+@end example
+
+Here are some reasons why you might need to close an output file:
+
+@itemize @bullet
+@item
+To write a file and read it back later on in the same @code{awk}
+program. Close the file when you are finished writing it; then
+you can start reading it with @code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
+
+@item
+To write numerous files, successively, in the same @code{awk}
+program. If you don't close the files, eventually you may exceed a
+system limit on the number of open files in one process. So close
+each one when you are finished writing it.
+
+@item
+To make a command finish. When you redirect output through a pipe,
+the command reading the pipe normally continues to try to read input
+as long as the pipe is open. Often this means the command cannot
+really do its work until the pipe is closed. For example, if you
+redirect output to the @code{mail} program, the message is not
+actually sent until the pipe is closed.
+
+@item
+To run the same program a second time, with the same arguments.
+This is not the same thing as giving more input to the first run!
+
+For example, suppose you pipe output to the @code{mail} program. If you
+output several lines redirected to this pipe without closing it, they make
+a single message of several lines. By contrast, if you close the pipe
+after each line of output, then each line makes a separate message.
+@end itemize
+
+@iftex
+@vindex ERRNO
+@cindex differences: @code{gawk} and @code{awk}
+@end iftex
+@code{close} returns a value of zero if the close succeeded.
+Otherwise, the value will be non-zero.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+@node Special Files, , Redirection, Printing
+@section Standard I/O Streams
+@cindex standard input
+@cindex standard output
+@cindex standard error output
+@cindex file descriptors
+
+Running programs conventionally have three input and output streams
+already available to them for reading and writing. These are known as
+the @dfn{standard input}, @dfn{standard output}, and @dfn{standard error
+output}. These streams are, by default, terminal input and output, but
+they are often redirected with the shell, via the @samp{<}, @samp{<<},
+@samp{>}, @samp{>>}, @samp{>&} and @samp{|} operators. Standard error
+is used only for writing error messages; the reason we have two separate
+streams, standard output and standard error, is so that they can be
+redirected separately.
+
+@iftex
+@cindex differences: @code{gawk} and @code{awk}
+@end iftex
+In other implementations of @code{awk}, the only way to write an error
+message to standard error in an @code{awk} program is as follows:
+
+@smallexample
+print "Serious error detected!\n" | "cat 1>&2"
+@end smallexample
+
+@noindent
+This works by opening a pipeline to a shell command which can access the
+standard error stream which it inherits from the @code{awk} process.
+This is far from elegant, and is also inefficient, since it requires a
+separate process. So people writing @code{awk} programs have often
+neglected to do this. Instead, they have sent the error messages to the
+terminal, like this:
+
+@smallexample
+@group
+NF != 4 @{
+ printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/tty"
+@}
+@end group
+@end smallexample
+
+@noindent
+This has the same effect most of the time, but not always: although the
+standard error stream is usually the terminal, it can be redirected, and
+when that happens, writing to the terminal is not correct. In fact, if
+@code{awk} is run from a background job, it may not have a terminal at all.
+Then opening @file{/dev/tty} will fail.
+
+@code{gawk} provides special file names for accessing the three standard
+streams. When you redirect input or output in @code{gawk}, if the file name
+matches one of these special names, then @code{gawk} directly uses the
+stream it stands for.
+
+@cindex @file{/dev/stdin}
+@cindex @file{/dev/stdout}
+@cindex @file{/dev/stderr}
+@cindex @file{/dev/fd/}
+@table @file
+@item /dev/stdin
+The standard input (file descriptor 0).
+
+@item /dev/stdout
+The standard output (file descriptor 1).
+
+@item /dev/stderr
+The standard error output (file descriptor 2).
+
+@item /dev/fd/@var{N}
+The file associated with file descriptor @var{N}. Such a file must have
+been opened by the program initiating the @code{awk} execution (typically
+the shell). Unless you take special pains, only descriptors 0, 1 and 2
+are available.
+@end table
+
+The file names @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
+respectively, but they are more self-explanatory.
+
+The proper way to write an error message in a @code{gawk} program
+is to use @file{/dev/stderr}, like this:
+
+@smallexample
+NF != 4 @{
+ printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/stderr"
+@}
+@end smallexample
+
+@code{gawk} also provides special file names that give access to information
+about the running @code{gawk} process. Each of these ``files'' provides
+a single record of information. To read them more than once, you must
+first close them with the @code{close} function
+(@pxref{Close Input, ,Closing Input Files and Pipes}).
+The filenames are:
+
+@cindex @file{/dev/pid}
+@cindex @file{/dev/pgrpid}
+@cindex @file{/dev/ppid}
+@cindex @file{/dev/user}
+@table @file
+@item /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with blanks. The fields represent the
+following information:
+
+@table @code
+@item $1
+The value of the @code{getuid} system call.
+
+@item $2
+The value of the @code{geteuid} system call.
+
+@item $3
+The value of the @code{getgid} system call.
+
+@item $4
+The value of the @code{getegid} system call.
+@end table
+
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)@refill
+@end table
+
+These special file names may be used on the command line as data
+files, as well as for I/O redirections within an @code{awk} program.
+They may not be used as source files with the @samp{-f} option.
+
+Recognition of these special file names is disabled if @code{gawk} is in
+compatibility mode (@pxref{Command Line, ,Invoking @code{awk}}).
+
+@quotation
+@strong{Caution}: Unless your system actually has a @file{/dev/fd} directory
+(or any of the other above listed special files),
+the interpretation of these file names is done by @code{gawk} itself.
+For example, using @samp{/dev/fd/4} for output will actually write on
+file descriptor 4, and not on a new file descriptor that was @code{dup}'ed
+from file descriptor 4. Most of the time this does not matter; however, it
+is important to @emph{not} close any of the files related to file descriptors
+0, 1, and 2. If you do close one of these files, unpredictable behavior
+will result.
+@end quotation
+
+@node One-liners, Patterns, Printing, Top
+@chapter Useful ``One-liners''
+
+@cindex one-liners
+Useful @code{awk} programs are often short, just a line or two. Here is a
+collection of useful, short programs to get you started. Some of these
+programs contain constructs that haven't been covered yet. The description
+of the program will give you a good idea of what is going on, but please
+read the rest of the manual to become an @code{awk} expert!
+
+@c Per suggestions from Michal Jaegermann
+@ifinfo
+Since you are reading this in Info, each line of the example code is
+enclosed in quotes, to represent text that you would type literally.
+The examples themselves represent shell commands that use single quotes
+to keep the shell from interpreting the contents of the program.
+When reading the examples, focus on the text between the open and close
+quotes.
+@end ifinfo
+
+@table @code
+@item awk '@{ if (NF > max) max = NF @}
+@itemx @ @ @ @ @ END @{ print max @}'
+This program prints the maximum number of fields on any input line.
+
+@item awk 'length($0) > 80'
+This program prints every line longer than 80 characters. The sole
+rule has a relational expression as its pattern, and has no action (so the
+default action, printing the record, is used).
+
+@item awk 'NF > 0'
+This program prints every line that has at least one field. This is an
+easy way to delete blank lines from a file (or rather, to create a new
+file similar to the old file but from which the blank lines have been
+deleted).
+
+@item awk '@{ if (NF > 0) print @}'
+This program also prints every line that has at least one field. Here we
+allow the rule to match every line, then decide in the action whether
+to print.
+
+@item awk@ 'BEGIN@ @{@ for (i = 1; i <= 7; i++)
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ print int(101 * rand()) @}'
+This program prints 7 random numbers from 0 to 100, inclusive.
+
+@item ls -l @var{files} | awk '@{ x += $4 @} ; END @{ print "total bytes: " x @}'
+This program prints the total number of bytes used by @var{files}.
+
+@item expand@ @var{file}@ |@ awk@ '@{ if (x < length()) x = length() @}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ END @{ print "maximum line length is " x @}'
+This program prints the maximum line length of @var{file}. The input
+is piped through the @code{expand} program to change tabs into spaces,
+so the widths compared are actually the right-margin columns.
+
+@item awk 'BEGIN @{ FS = ":" @}
+@itemx @ @ @ @ @ @{ print $1 | "sort" @}' /etc/passwd
+This program prints a sorted list of the login names of all users.
+
+@item awk '@{ nlines++ @}
+@itemx @ @ @ @ @ END@ @{ print nlines @}'
+This programs counts lines in a file.
+
+@item awk 'END @{ print NR @}'
+This program also counts lines in a file, but lets @code{awk} do the work.
+
+@item awk '@{ print NR, $0 @}'
+This program adds line numbers to all its input files,
+similar to @samp{cat -n}.
+@end table
+
+@node Patterns, Actions, One-liners, Top
+@chapter Patterns
+@cindex pattern, definition of
+
+Patterns in @code{awk} control the execution of rules: a rule is
+executed when its pattern matches the current input record. This
+chapter tells all about how to write patterns.
+
+@menu
+* Kinds of Patterns:: A list of all kinds of patterns.
+ The following subsections describe
+ them in detail.
+* Regexp:: Regular expressions such as @samp{/foo/}.
+* Comparison Patterns:: Comparison expressions such as @code{$1 > 10}.
+* Boolean Patterns:: Combining comparison expressions.
+* Expression Patterns:: Any expression can be used as a pattern.
+* Ranges:: Pairs of patterns specify record ranges.
+* BEGIN/END:: Specifying initialization and cleanup rules.
+* Empty:: The empty pattern, which matches every record.
+@end menu
+
+@node Kinds of Patterns, Regexp, Patterns, Patterns
+@section Kinds of Patterns
+@cindex patterns, types of
+
+Here is a summary of the types of patterns supported in @code{awk}.
+@c At the next rewrite, check to see that this order matches the
+@c order in the text. It might not matter to a reader, but it's good
+@c style. Also, it might be nice to mention all the topics of sections
+@c that follow in this list; that way people can scan and know when to
+@c expect a specific topic. Specifically please also make an entry
+@c for Boolean operators as patterns in the right place. --mew
+
+@table @code
+@item /@var{regular expression}/
+A regular expression as a pattern. It matches when the text of the
+input record fits the regular expression.
+(@xref{Regexp, ,Regular Expressions as Patterns}.)@refill
+
+@item @var{expression}
+A single expression. It matches when its value, converted to a number,
+is nonzero (if a number) or nonnull (if a string).
+(@xref{Expression Patterns, ,Expressions as Patterns}.)@refill
+
+@item @var{pat1}, @var{pat2}
+A pair of patterns separated by a comma, specifying a range of records.
+(@xref{Ranges, ,Specifying Record Ranges with Patterns}.)
+
+@item BEGIN
+@itemx END
+Special patterns to supply start-up or clean-up information to
+@code{awk}. (@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.)
+
+@item @var{null}
+The empty pattern matches every input record.
+(@xref{Empty, ,The Empty Pattern}.)@refill
+@end table
+
+
+@node Regexp, Comparison Patterns, Kinds of Patterns, Patterns
+@section Regular Expressions as Patterns
+@cindex pattern, regular expressions
+@cindex regexp
+@cindex regular expressions as patterns
+
+A @dfn{regular expression}, or @dfn{regexp}, is a way of describing a
+class of strings. A regular expression enclosed in slashes (@samp{/})
+is an @code{awk} pattern that matches every input record whose text
+belongs to that class.
+
+The simplest regular expression is a sequence of letters, numbers, or
+both. Such a regexp matches any string that contains that sequence.
+Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
+Therefore, the pattern @code{/foo/} matches any input record containing
+@samp{foo}. Other kinds of regexps let you specify more complicated
+classes of strings.
+
+@menu
+* Regexp Usage:: How to Use Regular Expressions
+* Regexp Operators:: Regular Expression Operators
+* Case-sensitivity:: How to do case-insensitive matching.
+@end menu
+
+@node Regexp Usage, Regexp Operators, Regexp, Regexp
+@subsection How to Use Regular Expressions
+
+A regular expression can be used as a pattern by enclosing it in
+slashes. Then the regular expression is matched against the
+entire text of each record. (Normally, it only needs
+to match some part of the text in order to succeed.) For example, this
+prints the second field of each record that contains @samp{foo} anywhere:
+
+@example
+awk '/foo/ @{ print $2 @}' BBS-list
+@end example
+
+@cindex regular expression matching operators
+@cindex string-matching operators
+@cindex operators, string-matching
+@cindex operators, regexp matching
+@cindex regexp search operators
+Regular expressions can also be used in comparison expressions. Then
+you can specify the string to match against; it need not be the entire
+current input record. These comparison expressions can be used as
+patterns or in @code{if}, @code{while}, @code{for}, and @code{do} statements.
+
+@table @code
+@item @var{exp} ~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a character string)
+is matched by @var{regexp}. The following example matches, or selects,
+all input records with the upper-case letter @samp{J} somewhere in the
+first field:@refill
+
+@example
+awk '$1 ~ /J/' inventory-shipped
+@end example
+
+So does this:
+
+@example
+awk '@{ if ($1 ~ /J/) print @}' inventory-shipped
+@end example
+
+@item @var{exp} !~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a character string)
+is @emph{not} matched by @var{regexp}. The following example matches,
+or selects, all input records whose first field @emph{does not} contain
+the upper-case letter @samp{J}:@refill
+
+@example
+awk '$1 !~ /J/' inventory-shipped
+@end example
+@end table
+
+@cindex computed regular expressions
+@cindex regular expressions, computed
+@cindex dynamic regular expressions
+The right hand side of a @samp{~} or @samp{!~} operator need not be a
+constant regexp (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated, and converted if
+necessary to a string; the contents of the string are used as the
+regexp. A regexp that is computed in this way is called a @dfn{dynamic
+regexp}. For example:
+
+@example
+identifier_regexp = "[A-Za-z_][A-Za-z_0-9]+"
+$0 ~ identifier_regexp
+@end example
+
+@noindent
+sets @code{identifier_regexp} to a regexp that describes @code{awk}
+variable names, and tests if the input record matches this regexp.
+
+@node Regexp Operators, Case-sensitivity, Regexp Usage, Regexp
+@subsection Regular Expression Operators
+@cindex metacharacters
+@cindex regular expression metacharacters
+
+You can combine regular expressions with the following characters,
+called @dfn{regular expression operators}, or @dfn{metacharacters}, to
+increase the power and versatility of regular expressions.
+
+Here is a table of metacharacters. All characters not listed in the
+table stand for themselves.
+
+@table @code
+@item ^
+This matches the beginning of the string or the beginning of a line
+within the string. For example:
+
+@example
+^@@chapter
+@end example
+
+@noindent
+matches the @samp{@@chapter} at the beginning of a string, and can be used
+to identify chapter beginnings in Texinfo source files.
+
+@item $
+This is similar to @samp{^}, but it matches only at the end of a string
+or the end of a line within the string. For example:
+
+@example
+p$
+@end example
+
+@noindent
+matches a record that ends with a @samp{p}.
+
+@item .
+This matches any single character except a newline. For example:
+
+@example
+.P
+@end example
+
+@noindent
+matches any single character followed by a @samp{P} in a string. Using
+concatenation we can make regular expressions like @samp{U.A}, which
+matches any three-character sequence that begins with @samp{U} and ends
+with @samp{A}.
+
+@item [@dots{}]
+This is called a @dfn{character set}. It matches any one of the
+characters that are enclosed in the square brackets. For example:
+
+@example
+[MVX]
+@end example
+
+@noindent
+matches any one of the characters @samp{M}, @samp{V}, or @samp{X} in a
+string.@refill
+
+Ranges of characters are indicated by using a hyphen between the beginning
+and ending characters, and enclosing the whole thing in brackets. For
+example:@refill
+
+@example
+[0-9]
+@end example
+
+@noindent
+matches any digit.
+
+To include the character @samp{\}, @samp{]}, @samp{-} or @samp{^} in a
+character set, put a @samp{\} in front of it. For example:
+
+@example
+[d\]]
+@end example
+
+@noindent
+matches either @samp{d}, or @samp{]}.@refill
+
+This treatment of @samp{\} is compatible with other @code{awk}
+implementations, and is also mandated by the @sc{posix} Command Language
+and Utilities standard. The regular expressions in @code{awk} are a superset
+of the @sc{posix} specification for Extended Regular Expressions (EREs).
+@sc{posix} EREs are based on the regular expressions accepted by the
+traditional @code{egrep} utility.
+
+In @code{egrep} syntax, backslash is not syntactically special within
+square brackets. This means that special tricks have to be used to
+represent the characters @samp{]}, @samp{-} and @samp{^} as members of a
+character set.
+
+In @code{egrep} syntax, to match @samp{-}, write it as @samp{---},
+which is a range containing only @w{@samp{-}.} You may also give @samp{-}
+as the first or last character in the set. To match @samp{^}, put it
+anywhere except as the first character of a set. To match a @samp{]},
+make it the first character in the set. For example:@refill
+
+@example
+[]d^]
+@end example
+
+@noindent
+matches either @samp{]}, @samp{d} or @samp{^}.@refill
+
+@item [^ @dots{}]
+This is a @dfn{complemented character set}. The first character after
+the @samp{[} @emph{must} be a @samp{^}. It matches any characters
+@emph{except} those in the square brackets (or newline). For example:
+
+@example
+[^0-9]
+@end example
+
+@noindent
+matches any character that is not a digit.
+
+@item |
+This is the @dfn{alternation operator} and it is used to specify
+alternatives. For example:
+
+@example
+^P|[0-9]
+@end example
+
+@noindent
+matches any string that matches either @samp{^P} or @samp{[0-9]}. This
+means it matches any string that contains a digit or starts with @samp{P}.
+
+The alternation applies to the largest possible regexps on either side.
+@item (@dots{})
+Parentheses are used for grouping in regular expressions as in
+arithmetic. They can be used to concatenate regular expressions
+containing the alternation operator, @samp{|}.
+
+@item *
+This symbol means that the preceding regular expression is to be
+repeated as many times as possible to find a match. For example:
+
+@example
+ph*
+@end example
+
+@noindent
+applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
+to one @samp{p} followed by any number of @samp{h}s. This will also match
+just @samp{p} if no @samp{h}s are present.
+
+The @samp{*} repeats the @emph{smallest} possible preceding expression.
+(Use parentheses if you wish to repeat a larger expression.) It finds
+as many repetitions as possible. For example:
+
+@example
+awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample
+@end example
+
+@noindent
+prints every record in the input containing a string of the form
+@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.@refill
+
+@item +
+This symbol is similar to @samp{*}, but the preceding expression must be
+matched at least once. This means that:
+
+@example
+wh+y
+@end example
+
+@noindent
+would match @samp{why} and @samp{whhy} but not @samp{wy}, whereas
+@samp{wh*y} would match all three of these strings. This is a simpler
+way of writing the last @samp{*} example:
+
+@example
+awk '/\(c[ad]+r x\)/ @{ print @}' sample
+@end example
+
+@item ?
+This symbol is similar to @samp{*}, but the preceding expression can be
+matched once or not at all. For example:
+
+@example
+fe?d
+@end example
+
+@noindent
+will match @samp{fed} and @samp{fd}, but nothing else.@refill
+
+@item \
+This is used to suppress the special meaning of a character when
+matching. For example:
+
+@example
+\$
+@end example
+
+@noindent
+matches the character @samp{$}.
+
+The escape sequences used for string constants
+(@pxref{Constants, ,Constant Expressions}) are
+valid in regular expressions as well; they are also introduced by a
+@samp{\}.@refill
+@end table
+
+In regular expressions, the @samp{*}, @samp{+}, and @samp{?} operators have
+the highest precedence, followed by concatenation, and finally by @samp{|}.
+As in arithmetic, parentheses can change how operators are grouped.@refill
+
+@node Case-sensitivity, , Regexp Operators, Regexp
+@subsection Case-sensitivity in Matching
+
+Case is normally significant in regular expressions, both when matching
+ordinary characters (i.e., not metacharacters), and inside character
+sets. Thus a @samp{w} in a regular expression matches only a lower case
+@samp{w} and not an upper case @samp{W}.
+
+The simplest way to do a case-independent match is to use a character
+set: @samp{[Ww]}. However, this can be cumbersome if you need to use it
+often; and it can make the regular expressions harder for humans to
+read. There are two other alternatives that you might prefer.
+
+One way to do a case-insensitive match at a particular point in the
+program is to convert the data to a single case, using the
+@code{tolower} or @code{toupper} built-in string functions (which we
+haven't discussed yet;
+@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+For example:@refill
+
+@example
+tolower($1) ~ /foo/ @{ @dots{} @}
+@end example
+
+@noindent
+converts the first field to lower case before matching against it.
+
+Another method is to set the variable @code{IGNORECASE} to a nonzero
+value (@pxref{Built-in Variables}). When @code{IGNORECASE} is not zero,
+@emph{all} regexp operations ignore case. Changing the value of
+@code{IGNORECASE} dynamically controls the case sensitivity of your
+program as it runs. Case is significant by default because
+@code{IGNORECASE} (like most variables) is initialized to zero.
+
+@example
+x = "aB"
+if (x ~ /ab/) @dots{} # this test will fail
+
+IGNORECASE = 1
+if (x ~ /ab/) @dots{} # now it will succeed
+@end example
+
+In general, you cannot use @code{IGNORECASE} to make certain rules
+case-insensitive and other rules case-sensitive, because there is no way
+to set @code{IGNORECASE} just for the pattern of a particular rule. To
+do this, you must use character sets or @code{tolower}. However, one
+thing you can do only with @code{IGNORECASE} is turn case-sensitivity on
+or off dynamically for all the rules at once.@refill
+
+@code{IGNORECASE} can be set on the command line, or in a @code{BEGIN}
+rule. Setting @code{IGNORECASE} from the command line is a way to make
+a program case-insensitive without having to edit it.
+
+The value of @code{IGNORECASE} has no effect if @code{gawk} is in
+compatibility mode (@pxref{Command Line, ,Invoking @code{awk}}).
+Case is always significant in compatibility mode.@refill
+
+@node Comparison Patterns, Boolean Patterns, Regexp, Patterns
+@section Comparison Expressions as Patterns
+@cindex comparison expressions as patterns
+@cindex pattern, comparison expressions
+@cindex relational operators
+@cindex operators, relational
+
+@dfn{Comparison patterns} test relationships such as equality between
+two strings or numbers. They are a special case of expression patterns
+(@pxref{Expression Patterns, ,Expressions as Patterns}). They are written
+with @dfn{relational operators}, which are a superset of those in C.
+Here is a table of them:@refill
+
+@table @code
+@item @var{x} < @var{y}
+True if @var{x} is less than @var{y}.
+
+@item @var{x} <= @var{y}
+True if @var{x} is less than or equal to @var{y}.
+
+@item @var{x} > @var{y}
+True if @var{x} is greater than @var{y}.
+
+@item @var{x} >= @var{y}
+True if @var{x} is greater than or equal to @var{y}.
+
+@item @var{x} == @var{y}
+True if @var{x} is equal to @var{y}.
+
+@item @var{x} != @var{y}
+True if @var{x} is not equal to @var{y}.
+
+@item @var{x} ~ @var{y}
+True if @var{x} matches the regular expression described by @var{y}.
+
+@item @var{x} !~ @var{y}
+True if @var{x} does not match the regular expression described by @var{y}.
+@end table
+
+The operands of a relational operator are compared as numbers if they
+are both numbers. Otherwise they are converted to, and compared as,
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers},
+for the detailed rules). Strings are compared by comparing the first
+character of each, then the second character of each,
+and so on, until there is a difference. If the two strings are equal until
+the shorter one runs out, the shorter one is considered to be less than the
+longer one. Thus, @code{"10"} is less than @code{"9"}, and @code{"abc"}
+is less than @code{"abcd"}.@refill
+
+The left operand of the @samp{~} and @samp{!~} operators is a string.
+The right operand is either a constant regular expression enclosed in
+slashes (@code{/@var{regexp}/}), or any expression, whose string value
+is used as a dynamic regular expression
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).@refill
+
+The following example prints the second field of each input record
+whose first field is precisely @samp{foo}.
+
+@example
+awk '$1 == "foo" @{ print $2 @}' BBS-list
+@end example
+
+@noindent
+Contrast this with the following regular expression match, which would
+accept any record with a first field that contains @samp{foo}:
+
+@example
+awk '$1 ~ "foo" @{ print $2 @}' BBS-list
+@end example
+
+@noindent
+or, equivalently, this one:
+
+@example
+awk '$1 ~ /foo/ @{ print $2 @}' BBS-list
+@end example
+
+@node Boolean Patterns, Expression Patterns, Comparison Patterns, Patterns
+@section Boolean Operators and Patterns
+@cindex patterns, boolean
+@cindex boolean patterns
+
+A @dfn{boolean pattern} is an expression which combines other patterns
+using the @dfn{boolean operators} ``or'' (@samp{||}), ``and''
+(@samp{&&}), and ``not'' (@samp{!}). Whether the boolean pattern
+matches an input record depends on whether its subpatterns match.
+
+For example, the following command prints all records in the input file
+@file{BBS-list} that contain both @samp{2400} and @samp{foo}.@refill
+
+@example
+awk '/2400/ && /foo/' BBS-list
+@end example
+
+The following command prints all records in the input file
+@file{BBS-list} that contain @emph{either} @samp{2400} or @samp{foo}, or
+both.@refill
+
+@example
+awk '/2400/ || /foo/' BBS-list
+@end example
+
+The following command prints all records in the input file
+@file{BBS-list} that do @emph{not} contain the string @samp{foo}.
+
+@example
+awk '! /foo/' BBS-list
+@end example
+
+Note that boolean patterns are a special case of expression patterns
+(@pxref{Expression Patterns, ,Expressions as Patterns}); they are
+expressions that use the boolean operators.
+@xref{Boolean Ops, ,Boolean Expressions}, for complete information
+on the boolean operators.@refill
+
+The subpatterns of a boolean pattern can be constant regular
+expressions, comparisons, or any other @code{awk} expressions. Range
+patterns are not expressions, so they cannot appear inside boolean
+patterns. Likewise, the special patterns @code{BEGIN} and @code{END},
+which never match any input record, are not expressions and cannot
+appear inside boolean patterns.
+
+@node Expression Patterns, Ranges, Boolean Patterns, Patterns
+@section Expressions as Patterns
+
+Any @code{awk} expression is also valid as an @code{awk} pattern.
+Then the pattern ``matches'' if the expression's value is nonzero (if a
+number) or nonnull (if a string).
+
+The expression is reevaluated each time the rule is tested against a new
+input record. If the expression uses fields such as @code{$1}, the
+value depends directly on the new input record's text; otherwise, it
+depends only on what has happened so far in the execution of the
+@code{awk} program, but that may still be useful.
+
+Comparison patterns are actually a special case of this. For
+example, the expression @code{$5 == "foo"} has the value 1 when the
+value of @code{$5} equals @code{"foo"}, and 0 otherwise; therefore, this
+expression as a pattern matches when the two values are equal.
+
+Boolean patterns are also special cases of expression patterns.
+
+A constant regexp as a pattern is also a special case of an expression
+pattern. @code{/foo/} as an expression has the value 1 if @samp{foo}
+appears in the current input record; thus, as a pattern, @code{/foo/}
+matches any record containing @samp{foo}.
+
+Other implementations of @code{awk} that are not yet @sc{posix} compliant
+are less general than @code{gawk}: they allow comparison expressions, and
+boolean combinations thereof (optionally with parentheses), but not
+necessarily other kinds of expressions.
+
+@node Ranges, BEGIN/END, Expression Patterns, Patterns
+@section Specifying Record Ranges with Patterns
+
+@cindex range pattern
+@cindex patterns, range
+A @dfn{range pattern} is made of two patterns separated by a comma, of
+the form @code{@var{begpat}, @var{endpat}}. It matches ranges of
+consecutive input records. The first pattern @var{begpat} controls
+where the range begins, and the second one @var{endpat} controls where
+it ends. For example,@refill
+
+@example
+awk '$1 == "on", $1 == "off"'
+@end example
+
+@noindent
+prints every record between @samp{on}/@samp{off} pairs, inclusive.
+
+A range pattern starts out by matching @var{begpat}
+against every input record; when a record matches @var{begpat}, the
+range pattern becomes @dfn{turned on}. The range pattern matches this
+record. As long as it stays turned on, it automatically matches every
+input record read. It also matches @var{endpat} against
+every input record; when that succeeds, the range pattern is turned
+off again for the following record. Now it goes back to checking
+@var{begpat} against each record.
+
+The record that turns on the range pattern and the one that turns it
+off both match the range pattern. If you don't want to operate on
+these records, you can write @code{if} statements in the rule's action
+to distinguish them.
+
+It is possible for a pattern to be turned both on and off by the same
+record, if both conditions are satisfied by that record. Then the action is
+executed for just that record.
+
+@node BEGIN/END, Empty, Ranges, Patterns
+@section @code{BEGIN} and @code{END} Special Patterns
+
+@cindex @code{BEGIN} special pattern
+@cindex patterns, @code{BEGIN}
+@cindex @code{END} special pattern
+@cindex patterns, @code{END}
+@code{BEGIN} and @code{END} are special patterns. They are not used to
+match input records. Rather, they are used for supplying start-up or
+clean-up information to your @code{awk} script. A @code{BEGIN} rule is
+executed, once, before the first input record has been read. An @code{END}
+rule is executed, once, after all the input has been read. For
+example:@refill
+
+@example
+awk 'BEGIN @{ print "Analysis of `foo'" @}
+ /foo/ @{ ++foobar @}
+ END @{ print "`foo' appears " foobar " times." @}' BBS-list
+@end example
+
+This program finds the number of records in the input file @file{BBS-list}
+that contain the string @samp{foo}. The @code{BEGIN} rule prints a title
+for the report. There is no need to use the @code{BEGIN} rule to
+initialize the counter @code{foobar} to zero, as @code{awk} does this
+for us automatically (@pxref{Variables}).
+
+The second rule increments the variable @code{foobar} every time a
+record containing the pattern @samp{foo} is read. The @code{END} rule
+prints the value of @code{foobar} at the end of the run.@refill
+
+The special patterns @code{BEGIN} and @code{END} cannot be used in ranges
+or with boolean operators (indeed, they cannot be used with any operators).
+
+An @code{awk} program may have multiple @code{BEGIN} and/or @code{END}
+rules. They are executed in the order they appear, all the @code{BEGIN}
+rules at start-up and all the @code{END} rules at termination.
+
+Multiple @code{BEGIN} and @code{END} sections are useful for writing
+library functions, since each library can have its own @code{BEGIN} or
+@code{END} rule to do its own initialization and/or cleanup. Note that
+the order in which library functions are named on the command line
+controls the order in which their @code{BEGIN} and @code{END} rules are
+executed. Therefore you have to be careful to write such rules in
+library files so that the order in which they are executed doesn't matter.
+@xref{Command Line, ,Invoking @code{awk}}, for more information on
+using library functions.
+
+If an @code{awk} program only has a @code{BEGIN} rule, and no other
+rules, then the program exits after the @code{BEGIN} rule has been run.
+(Older versions of @code{awk} used to keep reading and ignoring input
+until end of file was seen.) However, if an @code{END} rule exists as
+well, then the input will be read, even if there are no other rules in
+the program. This is necessary in case the @code{END} rule checks the
+@code{NR} variable.
+
+@code{BEGIN} and @code{END} rules must have actions; there is no default
+action for these rules since there is no current record when they run.
+
+@node Empty, , BEGIN/END, Patterns
+@comment node-name, next, previous, up
+@section The Empty Pattern
+
+@cindex empty pattern
+@cindex pattern, empty
+An empty pattern is considered to match @emph{every} input record. For
+example, the program:@refill
+
+@example
+awk '@{ print $1 @}' BBS-list
+@end example
+
+@noindent
+prints the first field of every record.
+
+@node Actions, Expressions, Patterns, Top
+@chapter Overview of Actions
+@cindex action, definition of
+@cindex curly braces
+@cindex action, curly braces
+@cindex action, separating statements
+
+An @code{awk} program or script consists of a series of
+rules and function definitions, interspersed. (Functions are
+described later. @xref{User-defined, ,User-defined Functions}.)
+
+A rule contains a pattern and an action, either of which may be
+omitted. The purpose of the @dfn{action} is to tell @code{awk} what to do
+once a match for the pattern is found. Thus, the entire program
+looks somewhat like this:
+
+@example
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@dots{}
+function @var{name} (@var{args}) @{ @dots{} @}
+@dots{}
+@end example
+
+An action consists of one or more @code{awk} @dfn{statements}, enclosed
+in curly braces (@samp{@{} and @samp{@}}). Each statement specifies one
+thing to be done. The statements are separated by newlines or
+semicolons.
+
+The curly braces around an action must be used even if the action
+contains only one statement, or even if it contains no statements at
+all. However, if you omit the action entirely, omit the curly braces as
+well. (An omitted action is equivalent to @samp{@{ print $0 @}}.)
+
+Here are the kinds of statements supported in @code{awk}:
+
+@itemize @bullet
+@item
+Expressions, which can call functions or assign values to variables
+(@pxref{Expressions, ,Expressions as Action Statements}). Executing
+this kind of statement simply computes the value of the expression and
+then ignores it. This is useful when the expression has side effects
+(@pxref{Assignment Ops, ,Assignment Expressions}).@refill
+
+@item
+Control statements, which specify the control flow of @code{awk}
+programs. The @code{awk} language gives you C-like constructs
+(@code{if}, @code{for}, @code{while}, and so on) as well as a few
+special ones (@pxref{Statements, ,Control Statements in Actions}).@refill
+
+@item
+Compound statements, which consist of one or more statements enclosed in
+curly braces. A compound statement is used in order to put several
+statements together in the body of an @code{if}, @code{while}, @code{do}
+or @code{for} statement.
+
+@item
+Input control, using the @code{getline} command
+(@pxref{Getline, ,Explicit Input with @code{getline}}), and the @code{next}
+statement (@pxref{Next Statement, ,The @code{next} Statement}).
+
+@item
+Output statements, @code{print} and @code{printf}.
+@xref{Printing, ,Printing Output}.@refill
+
+@item
+Deletion statements, for deleting array elements.
+@xref{Delete, ,The @code{delete} Statement}.@refill
+@end itemize
+
+@iftex
+The next two chapters cover in detail expressions and control
+statements, respectively. We go on to treat arrays and built-in
+functions, both of which are used in expressions. Then we proceed
+to discuss how to define your own functions.
+@end iftex
+
+@node Expressions, Statements, Actions, Top
+@chapter Expressions as Action Statements
+@cindex expression
+
+Expressions are the basic building block of @code{awk} actions. An
+expression evaluates to a value, which you can print, test, store in a
+variable or pass to a function. But beyond that, an expression can assign a new value to a variable
+or a field, with an assignment operator.
+
+An expression can serve as a statement on its own. Most other kinds of
+statements contain one or more expressions which specify data to be
+operated on. As in other languages, expressions in @code{awk} include
+variables, array references, constants, and function calls, as well as
+combinations of these with various operators.
+
+@menu
+* Constants:: String, numeric, and regexp constants.
+* Variables:: Variables give names to values for later use.
+* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, etc.)
+* Concatenation:: Concatenating strings.
+* Comparison Ops:: Comparison of numbers and strings
+ with @samp{<}, etc.
+* Boolean Ops:: Combining comparison expressions
+ using boolean operators
+ @samp{||} (``or''), @samp{&&} (``and'') and @samp{!} (``not'').
+
+* Assignment Ops:: Changing the value of a variable or a field.
+* Increment Ops:: Incrementing the numeric value of a variable.
+
+* Conversion:: The conversion of strings to numbers
+ and vice versa.
+* Values:: The whole truth about numbers and strings.
+* Conditional Exp:: Conditional expressions select
+ between two subexpressions under control
+ of a third subexpression.
+* Function Calls:: A function call is an expression.
+* Precedence:: How various operators nest.
+@end menu
+
+@node Constants, Variables, Expressions, Expressions
+@section Constant Expressions
+@cindex constants, types of
+@cindex string constants
+
+The simplest type of expression is the @dfn{constant}, which always has
+the same value. There are three types of constants: numeric constants,
+string constants, and regular expression constants.
+
+@cindex numeric constant
+@cindex numeric value
+A @dfn{numeric constant} stands for a number. This number can be an
+integer, a decimal fraction, or a number in scientific (exponential)
+notation. Note that all numeric values are represented within
+@code{awk} in double-precision floating point. Here are some examples
+of numeric constants, which all have the same value:
+
+@example
+105
+1.05e+2
+1050e-1
+@end example
+
+A string constant consists of a sequence of characters enclosed in
+double-quote marks. For example:
+
+@example
+"parrot"
+@end example
+
+@noindent
+@iftex
+@cindex differences between @code{gawk} and @code{awk}
+@end iftex
+represents the string whose contents are @samp{parrot}. Strings in
+@code{gawk} can be of any length and they can contain all the possible
+8-bit ASCII characters including ASCII NUL. Other @code{awk}
+implementations may have difficulty with some character codes.@refill
+
+@cindex escape sequence notation
+Some characters cannot be included literally in a string constant. You
+represent them instead with @dfn{escape sequences}, which are character
+sequences beginning with a backslash (@samp{\}).
+
+One use of an escape sequence is to include a double-quote character in
+a string constant. Since a plain double-quote would end the string, you
+must use @samp{\"} to represent a single double-quote character as a
+part of the string.
+The
+backslash character itself is another character that cannot be
+included normally; you write @samp{\\} to put one backslash in the
+string. Thus, the string whose contents are the two characters
+@samp{"\} must be written @code{"\"\\"}.
+
+Another use of backslash is to represent unprintable characters
+such as newline. While there is nothing to stop you from writing most
+of these characters directly in a string constant, they may look ugly.
+
+Here is a table of all the escape sequences used in @code{awk}:
+
+@table @code
+@item \\
+Represents a literal backslash, @samp{\}.
+
+@item \a
+Represents the ``alert'' character, control-g, ASCII code 7.
+
+@item \b
+Represents a backspace, control-h, ASCII code 8.
+
+@item \f
+Represents a formfeed, control-l, ASCII code 12.
+
+@item \n
+Represents a newline, control-j, ASCII code 10.
+
+@item \r
+Represents a carriage return, control-m, ASCII code 13.
+
+@item \t
+Represents a horizontal tab, control-i, ASCII code 9.
+
+@item \v
+Represents a vertical tab, control-k, ASCII code 11.
+
+@item \@var{nnn}
+Represents the octal value @var{nnn}, where @var{nnn} are one to three
+digits between 0 and 7. For example, the code for the ASCII ESC
+(escape) character is @samp{\033}.@refill
+
+@item \x@var{hh}@dots{}
+Represents the hexadecimal value @var{hh}, where @var{hh} are hexadecimal
+digits (@samp{0} through @samp{9} and either @samp{A} through @samp{F} or
+@samp{a} through @samp{f}). Like the same construct in @sc{ansi} C, the escape
+sequence continues until the first non-hexadecimal digit is seen. However,
+using more than two hexadecimal digits produces undefined results. (The
+@samp{\x} escape sequence is not allowed in @sc{posix} @code{awk}.)@refill
+@end table
+
+A @dfn{constant regexp} is a regular expression description enclosed in
+slashes, such as @code{/^beginning and end$/}. Most regexps used in
+@code{awk} programs are constant, but the @samp{~} and @samp{!~}
+operators can also match computed or ``dynamic'' regexps
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).@refill
+
+Constant regexps may be used like simple expressions. When a
+constant regexp is not on the right hand side of the @samp{~} or
+@samp{!~} operators, it has the same meaning as if it appeared
+in a pattern, i.e. @samp{($0 ~ /foo/)}
+(@pxref{Expression Patterns, ,Expressions as Patterns}).
+This means that the two code segments,@refill
+
+@example
+if ($0 ~ /barfly/ || $0 ~ /camelot/)
+ print "found"
+@end example
+
+@noindent
+and
+
+@example
+if (/barfly/ || /camelot/)
+ print "found"
+@end example
+
+@noindent
+are exactly equivalent. One rather bizarre consequence of this rule is
+that the following boolean expression is legal, but does not do what the user
+intended:@refill
+
+@example
+if (/foo/ ~ $1) print "found foo"
+@end example
+
+This code is ``obviously'' testing @code{$1} for a match against the regexp
+@code{/foo/}. But in fact, the expression @code{(/foo/ ~ $1)} actually means
+@code{(($0 ~ /foo/) ~ $1)}. In other words, first match the input record
+against the regexp @code{/foo/}. The result will be either a 0 or a 1,
+depending upon the success or failure of the match. Then match that result
+against the first field in the record.@refill
+
+Since it is unlikely that you would ever really wish to make this kind of
+test, @code{gawk} will issue a warning when it sees this construct in
+a program.@refill
+
+Another consequence of this rule is that the assignment statement
+
+@example
+matches = /foo/
+@end example
+
+@noindent
+will assign either 0 or 1 to the variable @code{matches}, depending
+upon the contents of the current input record.
+
+Constant regular expressions are also used as the first argument for
+the @code{sub} and @code{gsub} functions
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
+
+This feature of the language was never well documented until the
+@sc{posix} specification.
+
+You may be wondering, when is
+
+@example
+$1 ~ /foo/ @{ @dots{} @}
+@end example
+
+@noindent
+preferable to
+
+@example
+$1 ~ "foo" @{ @dots{} @}
+@end example
+
+Since the right-hand sides of both @samp{~} operators are constants,
+it is more efficient to use the @samp{/foo/} form: @code{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. In the second form, @code{awk}
+must first convert the string into this internal form, and then perform
+the pattern matching. The first form is also better style; it shows
+clearly that you intend a regexp match.
+
+@node Variables, Arithmetic Ops, Constants, Expressions
+@section Variables
+@cindex variables, user-defined
+@cindex user-defined variables
+@c there should be more than one subsection, ideally. Not a big deal.
+@c But usually there are supposed to be at least two. One way to get
+@c around this is to write the info in the subsection as the info in the
+@c section itself and not have any subsections.. --mew
+
+Variables let you give names to values and refer to them later. You have
+already seen variables in many of the examples. The name of a variable
+must be a sequence of letters, digits and underscores, but it may not begin
+with a digit. Case is significant in variable names; @code{a} and @code{A}
+are distinct variables.
+
+A variable name is a valid expression by itself; it represents the
+variable's current value. Variables are given new values with
+@dfn{assignment operators} and @dfn{increment operators}.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+A few variables have special built-in meanings, such as @code{FS}, the
+field separator, and @code{NF}, the number of fields in the current
+input record. @xref{Built-in Variables}, for a list of them. These
+built-in variables can be used and assigned just like all other
+variables, but their values are also used or changed automatically by
+@code{awk}. Each built-in variable's name is made entirely of upper case
+letters.
+
+Variables in @code{awk} can be assigned either numeric or string
+values. By default, variables are initialized to the null string, which
+is effectively zero if converted to a number. There is no need to
+``initialize'' each variable explicitly in @code{awk}, the way you would in C or most other traditional languages.
+
+@menu
+* Assignment Options:: Setting variables on the command line
+ and a summary of command line syntax.
+ This is an advanced method of input.
+@end menu
+
+@node Assignment Options, , Variables, Variables
+@subsection Assigning Variables on the Command Line
+
+You can set any @code{awk} variable by including a @dfn{variable assignment}
+among the arguments on the command line when you invoke @code{awk}
+(@pxref{Command Line, ,Invoking @code{awk}}). Such an assignment has
+this form:@refill
+
+@example
+@var{variable}=@var{text}
+@end example
+
+@noindent
+With it, you can set a variable either at the beginning of the
+@code{awk} run or in between input files.
+
+If you precede the assignment with the @samp{-v} option, like this:
+
+@example
+-v @var{variable}=@var{text}
+@end example
+
+@noindent
+then the variable is set at the very beginning, before even the
+@code{BEGIN} rules are run. The @samp{-v} option and its assignment
+must precede all the file name arguments, as well as the program text.
+
+Otherwise, the variable assignment is performed at a time determined by
+its position among the input file arguments: after the processing of the
+preceding input file argument. For example:
+
+@example
+awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
+@end example
+
+@noindent
+prints the value of field number @code{n} for all input records. Before
+the first file is read, the command line sets the variable @code{n}
+equal to 4. This causes the fourth field to be printed in lines from
+the file @file{inventory-shipped}. After the first file has finished,
+but before the second file is started, @code{n} is set to 2, so that the
+second field is printed in lines from @file{BBS-list}.
+
+Command line arguments are made available for explicit examination by
+the @code{awk} program in an array named @code{ARGV}
+(@pxref{Built-in Variables}).@refill
+
+@code{awk} processes the values of command line assignments for escape
+sequences (@pxref{Constants, ,Constant Expressions}).
+
+@node Arithmetic Ops, Concatenation, Variables, Expressions
+@section Arithmetic Operators
+@cindex arithmetic operators
+@cindex operators, arithmetic
+@cindex addition
+@cindex subtraction
+@cindex multiplication
+@cindex division
+@cindex remainder
+@cindex quotient
+@cindex exponentiation
+
+The @code{awk} language uses the common arithmetic operators when
+evaluating expressions. All of these arithmetic operators follow normal
+precedence rules, and work as you would expect them to. This example
+divides field three by field four, adds field two, stores the result
+into field one, and prints the resulting altered input record:
+
+@example
+awk '@{ $1 = $2 + $3 / $4; print @}' inventory-shipped
+@end example
+
+The arithmetic operators in @code{awk} are:
+
+@table @code
+@item @var{x} + @var{y}
+Addition.
+
+@item @var{x} - @var{y}
+Subtraction.
+
+@item - @var{x}
+Negation.
+
+@item + @var{x}
+Unary plus. No real effect on the expression.
+
+@item @var{x} * @var{y}
+Multiplication.
+
+@item @var{x} / @var{y}
+Division. Since all numbers in @code{awk} are double-precision
+floating point, the result is not rounded to an integer: @code{3 / 4}
+has the value 0.75.
+
+@item @var{x} % @var{y}
+@iftex
+@cindex differences between @code{gawk} and @code{awk}
+@end iftex
+Remainder. The quotient is rounded toward zero to an integer,
+multiplied by @var{y} and this result is subtracted from @var{x}.
+This operation is sometimes known as ``trunc-mod.'' The following
+relation always holds:
+
+@example
+b * int(a / b) + (a % b) == a
+@end example
+
+One possibly undesirable effect of this definition of remainder is that
+@code{@var{x} % @var{y}} is negative if @var{x} is negative. Thus,
+
+@example
+-17 % 8 = -1
+@end example
+
+In other @code{awk} implementations, the signedness of the remainder
+may be machine dependent.
+
+@item @var{x} ^ @var{y}
+@itemx @var{x} ** @var{y}
+Exponentiation: @var{x} raised to the @var{y} power. @code{2 ^ 3} has
+the value 8. The character sequence @samp{**} is equivalent to
+@samp{^}. (The @sc{posix} standard only specifies the use of @samp{^}
+for exponentiation.)
+@end table
+
+@node Concatenation, Comparison Ops, Arithmetic Ops, Expressions
+@section String Concatenation
+
+@cindex string operators
+@cindex operators, string
+@cindex concatenation
+There is only one string operation: concatenation. It does not have a
+specific operator to represent it. Instead, concatenation is performed by
+writing expressions next to one another, with no operator. For example:
+
+@example
+awk '@{ print "Field number one: " $1 @}' BBS-list
+@end example
+
+@noindent
+produces, for the first record in @file{BBS-list}:
+
+@example
+Field number one: aardvark
+@end example
+
+Without the space in the string constant after the @samp{:}, the line
+would run together. For example:
+
+@example
+awk '@{ print "Field number one:" $1 @}' BBS-list
+@end example
+
+@noindent
+produces, for the first record in @file{BBS-list}:
+
+@example
+Field number one:aardvark
+@end example
+
+Since string concatenation does not have an explicit operator, it is
+often necessary to insure that it happens where you want it to by
+enclosing the items to be concatenated in parentheses. For example, the
+following code fragment does not concatenate @code{file} and @code{name}
+as you might expect:
+
+@example
+file = "file"
+name = "name"
+print "something meaningful" > file name
+@end example
+
+@noindent
+It is necessary to use the following:
+
+@example
+print "something meaningful" > (file name)
+@end example
+
+We recommend you use parentheses around concatenation in all but the
+most common contexts (such as in the right-hand operand of @samp{=}).
+
+@ignore
+@code{gawk} actually now allows a concatenation on the right hand
+side of a @code{>} redirection, but other @code{awk}s don't. So for
+now we won't mention that fact.
+@end ignore
+
+@node Comparison Ops, Boolean Ops, Concatenation, Expressions
+@section Comparison Expressions
+@cindex comparison expressions
+@cindex expressions, comparison
+@cindex relational operators
+@cindex operators, relational
+@cindex regexp operators
+
+@dfn{Comparison expressions} compare strings or numbers for
+relationships such as equality. They are written using @dfn{relational
+operators}, which are a superset of those in C. Here is a table of
+them:
+
+@table @code
+@item @var{x} < @var{y}
+True if @var{x} is less than @var{y}.
+
+@item @var{x} <= @var{y}
+True if @var{x} is less than or equal to @var{y}.
+
+@item @var{x} > @var{y}
+True if @var{x} is greater than @var{y}.
+
+@item @var{x} >= @var{y}
+True if @var{x} is greater than or equal to @var{y}.
+
+@item @var{x} == @var{y}
+True if @var{x} is equal to @var{y}.
+
+@item @var{x} != @var{y}
+True if @var{x} is not equal to @var{y}.
+
+@item @var{x} ~ @var{y}
+True if the string @var{x} matches the regexp denoted by @var{y}.
+
+@item @var{x} !~ @var{y}
+True if the string @var{x} does not match the regexp denoted by @var{y}.
+
+@item @var{subscript} in @var{array}
+True if array @var{array} has an element with the subscript @var{subscript}.
+@end table
+
+Comparison expressions have the value 1 if true and 0 if false.
+
+The rules @code{gawk} uses for performing comparisons are based on those
+in draft 11.2 of the @sc{posix} standard. The @sc{posix} standard introduced
+the concept of a @dfn{numeric string}, which is simply a string that looks
+like a number, for example, @code{@w{" +2"}}.
+
+@vindex CONVFMT
+When performing a relational operation, @code{gawk} considers the type of an
+operand to be the type it received on its last @emph{assignment}, rather
+than the type of its last @emph{use}
+(@pxref{Values, ,Numeric and String Values}).
+This type is @emph{unknown} when the operand is from an ``external'' source:
+field variables, command line arguments, array elements resulting from a
+@code{split} operation, and the value of an @code{ENVIRON} element.
+In this case only, if the operand is a numeric string, then it is
+considered to be of both string type and numeric type. If at least one
+operand of a comparison is of string type only, then a string
+comparison is performed. Any numeric operand will be converted to a
+string using the value of @code{CONVFMT}
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+If one operand of a comparison is numeric, and the other operand is
+either numeric or both numeric and string, then @code{gawk} does a
+numeric comparison. If both operands have both types, then the
+comparison is numeric. Strings are compared
+by comparing the first character of each, then the second character of each,
+and so on. Thus @code{"10"} is less than @code{"9"}. If there are two
+strings where one is a prefix of the other, the shorter string is less than
+the longer one. Thus @code{"abc"} is less than @code{"abcd"}.@refill
+
+Here are some sample expressions, how @code{gawk} compares them, and what
+the result of the comparison is.
+
+@table @code
+@item 1.5 <= 2.0
+numeric comparison (true)
+
+@item "abc" >= "xyz"
+string comparison (false)
+
+@item 1.5 != " +2"
+string comparison (true)
+
+@item "1e2" < "3"
+string comparison (true)
+
+@item a = 2; b = "2"
+@itemx a == b
+string comparison (true)
+@end table
+
+@example
+echo 1e2 3 | awk '@{ print ($1 < $2) ? "true" : "false" @}'
+@end example
+
+@noindent
+prints @samp{false} since both @code{$1} and @code{$2} are numeric
+strings and thus have both string and numeric types, thus dictating
+a numeric comparison.
+
+The purpose of the comparison rules and the use of numeric strings is
+to attempt to produce the behavior that is ``least surprising,'' while
+still ``doing the right thing.''
+
+String comparisons and regular expression comparisons are very different.
+For example,
+
+@example
+$1 == "foo"
+@end example
+
+@noindent
+has the value of 1, or is true, if the first field of the current input
+record is precisely @samp{foo}. By contrast,
+
+@example
+$1 ~ /foo/
+@end example
+
+@noindent
+has the value 1 if the first field contains @samp{foo}, such as @samp{foobar}.
+
+The right hand operand of the @samp{~} and @samp{!~} operators may be
+either a constant regexp (@code{/@dots{}/}), or it may be an ordinary
+expression, in which case the value of the expression as a string is a
+dynamic regexp (@pxref{Regexp Usage, ,How to Use Regular Expressions}).
+
+@cindex regexp as expression
+In very recent implementations of @code{awk}, a constant regular
+expression in slashes by itself is also an expression. The regexp
+@code{/@var{regexp}/} is an abbreviation for this comparison expression:
+
+@example
+$0 ~ /@var{regexp}/
+@end example
+
+In some contexts it may be necessary to write parentheses around the
+regexp to avoid confusing the @code{gawk} parser. For example,
+@code{(/x/ - /y/) > threshold} is not allowed, but @code{((/x/) - (/y/))
+> threshold} parses properly.
+
+One special place where @code{/foo/} is @emph{not} an abbreviation for
+@code{$0 ~ /foo/} is when it is the right-hand operand of @samp{~} or
+@samp{!~}! @xref{Constants, ,Constant Expressions}, where this is
+discussed in more detail.
+
+@node Boolean Ops, Assignment Ops, Comparison Ops, Expressions
+@section Boolean Expressions
+@cindex expressions, boolean
+@cindex boolean expressions
+@cindex operators, boolean
+@cindex boolean operators
+@cindex logical operations
+@cindex and operator
+@cindex or operator
+@cindex not operator
+
+A @dfn{boolean expression} is a combination of comparison expressions or
+matching expressions, using the boolean operators ``or''
+(@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), along with
+parentheses to control nesting. The truth of the boolean expression is
+computed by combining the truth values of the component expressions.
+
+Boolean expressions can be used wherever comparison and matching
+expressions can be used. They can be used in @code{if}, @code{while}
+@code{do} and @code{for} statements. They have numeric values (1 if true,
+0 if false), which come into play if the result of the boolean expression
+is stored in a variable, or used in arithmetic.@refill
+
+In addition, every boolean expression is also a valid boolean pattern, so
+you can use it as a pattern to control the execution of rules.
+
+Here are descriptions of the three boolean operators, with an example of
+each. It may be instructive to compare these examples with the
+analogous examples of boolean patterns
+(@pxref{Boolean Patterns, ,Boolean Operators and Patterns}), which
+use the same boolean operators in patterns instead of expressions.@refill
+
+@table @code
+@item @var{boolean1} && @var{boolean2}
+True if both @var{boolean1} and @var{boolean2} are true. For example,
+the following statement prints the current input record if it contains
+both @samp{2400} and @samp{foo}.@refill
+
+@smallexample
+if ($0 ~ /2400/ && $0 ~ /foo/) print
+@end smallexample
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is true. This can make a difference when @var{boolean2} contains
+expressions that have side effects: in the case of @code{$0 ~ /foo/ &&
+($2 == bar++)}, the variable @code{bar} is not incremented if there is
+no @samp{foo} in the record.
+
+@item @var{boolean1} || @var{boolean2}
+True if at least one of @var{boolean1} or @var{boolean2} is true.
+For example, the following command prints all records in the input
+file @file{BBS-list} that contain @emph{either} @samp{2400} or
+@samp{foo}, or both.@refill
+
+@smallexample
+awk '@{ if ($0 ~ /2400/ || $0 ~ /foo/) print @}' BBS-list
+@end smallexample
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is false. This can make a difference when @var{boolean2} contains
+expressions that have side effects.
+
+@item !@var{boolean}
+True if @var{boolean} is false. For example, the following program prints
+all records in the input file @file{BBS-list} that do @emph{not} contain the
+string @samp{foo}.
+
+@smallexample
+awk '@{ if (! ($0 ~ /foo/)) print @}' BBS-list
+@end smallexample
+@end table
+
+@node Assignment Ops, Increment Ops, Boolean Ops, Expressions
+@section Assignment Expressions
+@cindex assignment operators
+@cindex operators, assignment
+@cindex expressions, assignment
+
+An @dfn{assignment} is an expression that stores a new value into a
+variable. For example, let's assign the value 1 to the variable
+@code{z}:@refill
+
+@example
+z = 1
+@end example
+
+After this expression is executed, the variable @code{z} has the value 1.
+Whatever old value @code{z} had before the assignment is forgotten.
+
+Assignments can store string values also. For example, this would store
+the value @code{"this food is good"} in the variable @code{message}:
+
+@example
+thing = "food"
+predicate = "good"
+message = "this " thing " is " predicate
+@end example
+
+@noindent
+(This also illustrates concatenation of strings.)
+
+The @samp{=} sign is called an @dfn{assignment operator}. It is the
+simplest assignment operator because the value of the right-hand
+operand is stored unchanged.
+
+@cindex side effect
+Most operators (addition, concatenation, and so on) have no effect
+except to compute a value. If you ignore the value, you might as well
+not use the operator. An assignment operator is different; it does
+produce a value, but even if you ignore the value, the assignment still
+makes itself felt through the alteration of the variable. We call this
+a @dfn{side effect}.
+
+@cindex lvalue
+The left-hand operand of an assignment need not be a variable
+(@pxref{Variables}); it can also be a field
+(@pxref{Changing Fields, ,Changing the Contents of a Field}) or
+an array element (@pxref{Arrays, ,Arrays in @code{awk}}).
+These are all called @dfn{lvalues},
+which means they can appear on the left-hand side of an assignment operator.
+The right-hand operand may be any expression; it produces the new value
+which the assignment stores in the specified variable, field or array
+element.@refill
+
+It is important to note that variables do @emph{not} have permanent types.
+The type of a variable is simply the type of whatever value it happens
+to hold at the moment. In the following program fragment, the variable
+@code{foo} has a numeric value at first, and a string value later on:
+
+@example
+foo = 1
+print foo
+foo = "bar"
+print foo
+@end example
+
+@noindent
+When the second assignment gives @code{foo} a string value, the fact that
+it previously had a numeric value is forgotten.
+
+An assignment is an expression, so it has a value: the same value that
+is assigned. Thus, @code{z = 1} as an expression has the value 1.
+One consequence of this is that you can write multiple assignments together:
+
+@example
+x = y = z = 0
+@end example
+
+@noindent
+stores the value 0 in all three variables. It does this because the
+value of @code{z = 0}, which is 0, is stored into @code{y}, and then
+the value of @code{y = z = 0}, which is 0, is stored into @code{x}.
+
+You can use an assignment anywhere an expression is called for. For
+example, it is valid to write @code{x != (y = 1)} to set @code{y} to 1
+and then test whether @code{x} equals 1. But this style tends to make
+programs hard to read; except in a one-shot program, you should
+rewrite it to get rid of such nesting of assignments. This is never very
+hard.
+
+Aside from @samp{=}, there are several other assignment operators that
+do arithmetic with the old value of the variable. For example, the
+operator @samp{+=} computes a new value by adding the right-hand value
+to the old value of the variable. Thus, the following assignment adds
+5 to the value of @code{foo}:
+
+@example
+foo += 5
+@end example
+
+@noindent
+This is precisely equivalent to the following:
+
+@example
+foo = foo + 5
+@end example
+
+@noindent
+Use whichever one makes the meaning of your program clearer.
+
+Here is a table of the arithmetic assignment operators. In each
+case, the right-hand operand is an expression whose value is converted
+to a number.
+
+@table @code
+@item @var{lvalue} += @var{increment}
+Adds @var{increment} to the value of @var{lvalue} to make the new value
+of @var{lvalue}.
+
+@item @var{lvalue} -= @var{decrement}
+Subtracts @var{decrement} from the value of @var{lvalue}.
+
+@item @var{lvalue} *= @var{coefficient}
+Multiplies the value of @var{lvalue} by @var{coefficient}.
+
+@item @var{lvalue} /= @var{quotient}
+Divides the value of @var{lvalue} by @var{quotient}.
+
+@item @var{lvalue} %= @var{modulus}
+Sets @var{lvalue} to its remainder by @var{modulus}.
+
+@item @var{lvalue} ^= @var{power}
+@itemx @var{lvalue} **= @var{power}
+Raises @var{lvalue} to the power @var{power}.
+(Only the @code{^=} operator is specified by @sc{posix}.)
+@end table
+
+@ignore
+From: gatech!ames!elroy!cit-vax!EQL.Caltech.Edu!rankin (Pat Rankin)
+ In the discussion of assignment operators, it states that
+``foo += 5'' "is precisely equivalent to" ``foo = foo + 5'' (p.77). That
+may be true for simple variables, but it's not true for expressions with
+side effects, like array references. For proof, try
+ BEGIN {
+ foo[rand()] += 5; for (x in foo) print x, foo[x]
+ bar[rand()] = bar[rand()] + 5; for (x in bar) print x, bar[x]
+ }
+I suspect that the original statement is simply untrue--that '+=' is more
+efficient in all cases.
+
+ADR --- Try to add something about this here for the next go 'round.
+@end ignore
+
+@node Increment Ops, Conversion, Assignment Ops, Expressions
+@section Increment Operators
+
+@cindex increment operators
+@cindex operators, increment
+@dfn{Increment operators} increase or decrease the value of a variable
+by 1. You could do the same thing with an assignment operator, so
+the increment operators add no power to the @code{awk} language; but they
+are convenient abbreviations for something very common.
+
+The operator to add 1 is written @samp{++}. It can be used to increment
+a variable either before or after taking its value.
+
+To pre-increment a variable @var{v}, write @code{++@var{v}}. This adds
+1 to the value of @var{v} and that new value is also the value of this
+expression. The assignment expression @code{@var{v} += 1} is completely
+equivalent.
+
+Writing the @samp{++} after the variable specifies post-increment. This
+increments the variable value just the same; the difference is that the
+value of the increment expression itself is the variable's @emph{old}
+value. Thus, if @code{foo} has the value 4, then the expression @code{foo++}
+has the value 4, but it changes the value of @code{foo} to 5.
+
+The post-increment @code{foo++} is nearly equivalent to writing @code{(foo
++= 1) - 1}. It is not perfectly equivalent because all numbers in
+@code{awk} are floating point: in floating point, @code{foo + 1 - 1} does
+not necessarily equal @code{foo}. But the difference is minute as
+long as you stick to numbers that are fairly small (less than a trillion).
+
+Any lvalue can be incremented. Fields and array elements are incremented
+just like variables. (Use @samp{$(i++)} when you wish to do a field reference
+and a variable increment at the same time. The parentheses are necessary
+because of the precedence of the field reference operator, @samp{$}.)
+@c expert information in the last parenthetical remark
+
+The decrement operator @samp{--} works just like @samp{++} except that
+it subtracts 1 instead of adding. Like @samp{++}, it can be used before
+the lvalue to pre-decrement or after it to post-decrement.
+
+Here is a summary of increment and decrement expressions.
+
+@table @code
+@item ++@var{lvalue}
+This expression increments @var{lvalue} and the new value becomes the
+value of this expression.
+
+@item @var{lvalue}++
+This expression causes the contents of @var{lvalue} to be incremented.
+The value of the expression is the @emph{old} value of @var{lvalue}.
+
+@item --@var{lvalue}
+Like @code{++@var{lvalue}}, but instead of adding, it subtracts. It
+decrements @var{lvalue} and delivers the value that results.
+
+@item @var{lvalue}--
+Like @code{@var{lvalue}++}, but instead of adding, it subtracts. It
+decrements @var{lvalue}. The value of the expression is the @emph{old}
+value of @var{lvalue}.
+@end table
+
+@node Conversion, Values, Increment Ops, Expressions
+@section Conversion of Strings and Numbers
+
+@cindex conversion of strings and numbers
+Strings are converted to numbers, and numbers to strings, if the context
+of the @code{awk} program demands it. For example, if the value of
+either @code{foo} or @code{bar} in the expression @code{foo + bar}
+happens to be a string, it is converted to a number before the addition
+is performed. If numeric values appear in string concatenation, they
+are converted to strings. Consider this:@refill
+
+@example
+two = 2; three = 3
+print (two three) + 4
+@end example
+
+@noindent
+This eventually prints the (numeric) value 27. The numeric values of
+the variables @code{two} and @code{three} are converted to strings and
+concatenated together, and the resulting string is converted back to the
+number 23, to which 4 is then added.
+
+If, for some reason, you need to force a number to be converted to a
+string, concatenate the null string with that number. To force a string
+to be converted to a number, add zero to that string.
+
+A string is converted to a number by interpreting a numeric prefix
+of the string as numerals:
+@code{"2.5"} converts to 2.5, @code{"1e3"} converts to 1000, and @code{"25fix"}
+has a numeric value of 25.
+Strings that can't be interpreted as valid numbers are converted to
+zero.
+
+@vindex CONVFMT
+The exact manner in which numbers are converted into strings is controlled
+by the @code{awk} built-in variable @code{CONVFMT} (@pxref{Built-in Variables}).
+Numbers are converted using a special version of the @code{sprintf} function
+(@pxref{Built-in, ,Built-in Functions}) with @code{CONVFMT} as the format
+specifier.@refill
+
+@code{CONVFMT}'s default value is @code{"%.6g"}, which prints a value with
+at least six significant digits. For some applications you will want to
+change it to specify more precision. Double precision on most modern
+machines gives you 16 or 17 decimal digits of precision.
+
+Strange results can happen if you set @code{CONVFMT} to a string that doesn't
+tell @code{sprintf} how to format floating point numbers in a useful way.
+For example, if you forget the @samp{%} in the format, all numbers will be
+converted to the same constant string.@refill
+
+As a special case, if a number is an integer, then the result of converting
+it to a string is @emph{always} an integer, no matter what the value of
+@code{CONVFMT} may be. Given the following code fragment:
+
+@example
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+@end example
+
+@noindent
+@code{b} has the value @code{"12"}, not @code{"12.00"}.
+
+@ignore
+For the 2.14 version, describe the ``stickyness'' of conversions. Right now
+the manual assumes everywhere that variables are either numbers or strings;
+in fact both kinds of values may be valid. If both happen to be valid, a
+conversion isn't necessary and isn't done. Revising the manual to be
+consistent with this, though, is too big a job to tackle at the moment.
+
+7/92: This has sort of been done, only the section isn't completely right!
+ What to do?
+7/92: Pretty much fixed, at least for the short term, thanks to text
+ from David.
+@end ignore
+
+@vindex OFMT
+Prior to the @sc{posix} standard, @code{awk} specified that the value
+of @code{OFMT} was used for converting numbers to strings. @code{OFMT}
+specifies the output format to use when printing numbers with @code{print}.
+@code{CONVFMT} was introduced in order to separate the semantics of
+conversions from the semantics of printing. Both @code{CONVFMT} and
+@code{OFMT} have the same default value: @code{"%.6g"}. In the vast majority
+of cases, old @code{awk} programs will not change their behavior.
+However, this use of @code{OFMT} is something to keep in mind if you must
+port your program to other implementations of @code{awk}; we recommend
+that instead of changing your programs, you just port @code{gawk} itself!@refill
+
+@node Values, Conditional Exp, Conversion, Expressions
+@section Numeric and String Values
+@cindex conversion of strings and numbers
+
+Through most of this manual, we present @code{awk} values (such as constants,
+fields, or variables) as @emph{either} numbers @emph{or} strings. This is
+a convenient way to think about them, since typically they are used in only
+one way, or the other.
+
+In truth though, @code{awk} values can be @emph{both} string and
+numeric, at the same time. Internally, @code{awk} represents values
+with a string, a (floating point) number, and an indication that one,
+the other, or both representations of the value are valid.
+
+Keeping track of both kinds of values is important for execution
+efficiency: a variable can acquire a string value the first time it
+is used as a string, and then that string value can be used until the
+variable is assigned a new value. Thus, if a variable with only a numeric
+value is used in several concatenations in a row, it only has to be given
+a string representation once. The numeric value remains valid, so that
+no conversion back to a number is necessary if the variable is later used
+in an arithmetic expression.
+
+Tracking both kinds of values is also important for precise numerical
+calculations. Consider the following:
+
+@smallexample
+a = 123.321
+CONVFMT = "%3.1f"
+b = a " is a number"
+c = a + 1.654
+@end smallexample
+
+@noindent
+The variable @code{a} receives a string value in the concatenation and
+assignment to @code{b}. The string value of @code{a} is @code{"123.3"}.
+If the numeric value was lost when it was converted to a string, then the
+numeric use of @code{a} in the last statement would lose information.
+@code{c} would be assigned the value 124.954 instead of 124.975.
+Such errors accumulate rapidly, and very adversely affect numeric
+computations.@refill
+
+Once a numeric value acquires a corresponding string value, it stays valid
+until a new assignment is made. If @code{CONVFMT}
+(@pxref{Conversion, ,Conversion of Strings and Numbers}) changes in the
+meantime, the old string value will still be used. For example:@refill
+
+@smallexample
+BEGIN @{
+ CONVFMT = "%2.2f"
+ a = 123.456
+ b = a "" # force `a' to have string value too
+ printf "a = %s\n", a
+ CONVFMT = "%.6g"
+ printf "a = %s\n", a
+ a += 0 # make `a' numeric only again
+ printf "a = %s\n", a # use `a' as string
+@}
+@end smallexample
+
+@noindent
+This program prints @samp{a = 123.46} twice, and then prints
+@samp{a = 123.456}.
+
+@xref{Conversion, ,Conversion of Strings and Numbers}, for the rules that
+specify how string values are made from numeric values.
+
+@node Conditional Exp, Function Calls, Values, Expressions
+@section Conditional Expressions
+@cindex conditional expression
+@cindex expression, conditional
+
+A @dfn{conditional expression} is a special kind of expression with
+three operands. It allows you to use one expression's value to select
+one of two other expressions.
+
+The conditional expression looks the same as in the C language:
+
+@example
+@var{selector} ? @var{if-true-exp} : @var{if-false-exp}
+@end example
+
+@noindent
+There are three subexpressions. The first, @var{selector}, is always
+computed first. If it is ``true'' (not zero and not null) then
+@var{if-true-exp} is computed next and its value becomes the value of
+the whole expression. Otherwise, @var{if-false-exp} is computed next
+and its value becomes the value of the whole expression.@refill
+
+For example, this expression produces the absolute value of @code{x}:
+
+@example
+x > 0 ? x : -x
+@end example
+
+Each time the conditional expression is computed, exactly one of
+@var{if-true-exp} and @var{if-false-exp} is computed; the other is ignored.
+This is important when the expressions contain side effects. For example,
+this conditional expression examines element @code{i} of either array
+@code{a} or array @code{b}, and increments @code{i}.
+
+@example
+x == y ? a[i++] : b[i++]
+@end example
+
+@noindent
+This is guaranteed to increment @code{i} exactly once, because each time
+one or the other of the two increment expressions is executed,
+and the other is not.
+
+@node Function Calls, Precedence, Conditional Exp, Expressions
+@section Function Calls
+@cindex function call
+@cindex calling a function
+
+A @dfn{function} is a name for a particular calculation. Because it has
+a name, you can ask for it by name at any point in the program. For
+example, the function @code{sqrt} computes the square root of a number.
+
+A fixed set of functions are @dfn{built-in}, which means they are
+available in every @code{awk} program. The @code{sqrt} function is one
+of these. @xref{Built-in, ,Built-in Functions}, for a list of built-in
+functions and their descriptions. In addition, you can define your own
+functions in the program for use elsewhere in the same program.
+@xref{User-defined, ,User-defined Functions}, for how to do this.@refill
+
+@cindex arguments in function call
+The way to use a function is with a @dfn{function call} expression,
+which consists of the function name followed by a list of
+@dfn{arguments} in parentheses. The arguments are expressions which
+give the raw materials for the calculation that the function will do.
+When there is more than one argument, they are separated by commas. If
+there are no arguments, write just @samp{()} after the function name.
+Here are some examples:
+
+@example
+sqrt(x^2 + y^2) # @r{One argument}
+atan2(y, x) # @r{Two arguments}
+rand() # @r{No arguments}
+@end example
+
+@strong{Do not put any space between the function name and the
+open-parenthesis!} A user-defined function name looks just like the name of
+a variable, and space would make the expression look like concatenation
+of a variable with an expression inside parentheses. Space before the
+parenthesis is harmless with built-in functions, but it is best not to get
+into the habit of using space to avoid mistakes with user-defined
+functions.
+
+Each function expects a particular number of arguments. For example, the
+@code{sqrt} function must be called with a single argument, the number
+to take the square root of:
+
+@example
+sqrt(@var{argument})
+@end example
+
+Some of the built-in functions allow you to omit the final argument.
+If you do so, they use a reasonable default.
+@xref{Built-in, ,Built-in Functions}, for full details. If arguments
+are omitted in calls to user-defined functions, then those arguments are
+treated as local variables, initialized to the null string
+(@pxref{User-defined, ,User-defined Functions}).@refill
+
+Like every other expression, the function call has a value, which is
+computed by the function based on the arguments you give it. In this
+example, the value of @code{sqrt(@var{argument})} is the square root of the
+argument. A function can also have side effects, such as assigning the
+values of certain variables or doing I/O.
+
+Here is a command to read numbers, one number per line, and print the
+square root of each one:
+
+@example
+awk '@{ print "The square root of", $1, "is", sqrt($1) @}'
+@end example
+
+@node Precedence, , Function Calls, Expressions
+@section Operator Precedence (How Operators Nest)
+@cindex precedence
+@cindex operator precedence
+
+@dfn{Operator precedence} determines how operators are grouped, when
+different operators appear close by in one expression. For example,
+@samp{*} has higher precedence than @samp{+}; thus, @code{a + b * c}
+means to multiply @code{b} and @code{c}, and then add @code{a} to the
+product (i.e., @code{a + (b * c)}).
+
+You can overrule the precedence of the operators by using parentheses.
+You can think of the precedence rules as saying where the
+parentheses are assumed if you do not write parentheses yourself. In
+fact, it is wise to always use parentheses whenever you have an unusual
+combination of operators, because other people who read the program may
+not remember what the precedence is in this case. You might forget,
+too; then you could make a mistake. Explicit parentheses will help prevent
+any such mistake.
+
+When operators of equal precedence are used together, the leftmost
+operator groups first, except for the assignment, conditional and
+exponentiation operators, which group in the opposite order.
+Thus, @code{a - b + c} groups as @code{(a - b) + c};
+@code{a = b = c} groups as @code{a = (b = c)}.@refill
+
+The precedence of prefix unary operators does not matter as long as only
+unary operators are involved, because there is only one way to parse
+them---innermost first. Thus, @code{$++i} means @code{$(++i)} and
+@code{++$x} means @code{++($x)}. However, when another operator follows
+the operand, then the precedence of the unary operators can matter.
+Thus, @code{$x^2} means @code{($x)^2}, but @code{-x^2} means
+@code{-(x^2)}, because @samp{-} has lower precedence than @samp{^}
+while @samp{$} has higher precedence.
+
+Here is a table of the operators of @code{awk}, in order of increasing
+precedence:
+
+@table @asis
+@item assignment
+@samp{=}, @samp{+=}, @samp{-=}, @samp{*=}, @samp{/=}, @samp{%=},
+@samp{^=}, @samp{**=}. These operators group right-to-left.
+(The @samp{**=} operator is not specified by @sc{posix}.)
+
+@item conditional
+@samp{?:}. This operator groups right-to-left.
+
+@item logical ``or''.
+@samp{||}.
+
+@item logical ``and''.
+@samp{&&}.
+
+@item array membership
+@samp{in}.
+
+@item matching
+@samp{~}, @samp{!~}.
+
+@item relational, and redirection
+The relational operators and the redirections have the same precedence
+level. Characters such as @samp{>} serve both as relationals and as
+redirections; the context distinguishes between the two meanings.
+
+The relational operators are @samp{<}, @samp{<=}, @samp{==}, @samp{!=},
+@samp{>=} and @samp{>}.
+
+The I/O redirection operators are @samp{<}, @samp{>}, @samp{>>} and
+@samp{|}.
+
+Note that I/O redirection operators in @code{print} and @code{printf}
+statements belong to the statement level, not to expressions. The
+redirection does not produce an expression which could be the operand of
+another operator. As a result, it does not make sense to use a
+redirection operator near another operator of lower precedence, without
+parentheses. Such combinations, for example @samp{print foo > a ? b :
+c}, result in syntax errors.
+
+@item concatenation
+No special token is used to indicate concatenation.
+The operands are simply written side by side.
+
+@item add, subtract
+@samp{+}, @samp{-}.
+
+@item multiply, divide, mod
+@samp{*}, @samp{/}, @samp{%}.
+
+@item unary plus, minus, ``not''
+@samp{+}, @samp{-}, @samp{!}.
+
+@item exponentiation
+@samp{^}, @samp{**}. These operators group right-to-left.
+(The @samp{**} operator is not specified by @sc{posix}.)
+
+@item increment, decrement
+@samp{++}, @samp{--}.
+
+@item field
+@samp{$}.
+@end table
+
+@node Statements, Arrays, Expressions, Top
+@chapter Control Statements in Actions
+@cindex control statement
+
+@dfn{Control statements} such as @code{if}, @code{while}, and so on
+control the flow of execution in @code{awk} programs. Most of the
+control statements in @code{awk} are patterned on similar statements in
+C.
+
+All the control statements start with special keywords such as @code{if}
+and @code{while}, to distinguish them from simple expressions.
+
+Many control statements contain other statements; for example, the
+@code{if} statement contains another statement which may or may not be
+executed. The contained statement is called the @dfn{body}. If you
+want to include more than one statement in the body, group them into a
+single compound statement with curly braces, separating them with
+newlines or semicolons.
+
+@menu
+* If Statement:: Conditionally execute
+ some @code{awk} statements.
+* While Statement:: Loop until some condition is satisfied.
+* Do Statement:: Do specified action while looping until some
+ condition is satisfied.
+* For Statement:: Another looping statement, that provides
+ initialization and increment clauses.
+* Break Statement:: Immediately exit the innermost enclosing loop.
+* Continue Statement:: Skip to the end of the innermost
+ enclosing loop.
+* Next Statement:: Stop processing the current input record.
+* Next File Statement:: Stop processing the current file.
+* Exit Statement:: Stop execution of @code{awk}.
+@end menu
+
+@node If Statement, While Statement, Statements, Statements
+@section The @code{if} Statement
+
+@cindex @code{if} statement
+The @code{if}-@code{else} statement is @code{awk}'s decision-making
+statement. It looks like this:@refill
+
+@example
+if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
+@end example
+
+@noindent
+@var{condition} is an expression that controls what the rest of the
+statement will do. If @var{condition} is true, @var{then-body} is
+executed; otherwise, @var{else-body} is executed (assuming that the
+@code{else} clause is present). The @code{else} part of the statement is
+optional. The condition is considered false if its value is zero or
+the null string, and true otherwise.@refill
+
+Here is an example:
+
+@example
+if (x % 2 == 0)
+ print "x is even"
+else
+ print "x is odd"
+@end example
+
+In this example, if the expression @code{x % 2 == 0} is true (that is,
+the value of @code{x} is divisible by 2), then the first @code{print}
+statement is executed, otherwise the second @code{print} statement is
+performed.@refill
+
+If the @code{else} appears on the same line as @var{then-body}, and
+@var{then-body} is not a compound statement (i.e., not surrounded by
+curly braces), then a semicolon must separate @var{then-body} from
+@code{else}. To illustrate this, let's rewrite the previous example:
+
+@example
+awk '@{ if (x % 2 == 0) print "x is even"; else
+ print "x is odd" @}'
+@end example
+
+@noindent
+If you forget the @samp{;}, @code{awk} won't be able to parse the
+statement, and you will get a syntax error.
+
+We would not actually write this example this way, because a human
+reader might fail to see the @code{else} if it were not the first thing
+on its line.
+
+@node While Statement, Do Statement, If Statement, Statements
+@section The @code{while} Statement
+@cindex @code{while} statement
+@cindex loop
+@cindex body of a loop
+
+In programming, a @dfn{loop} means a part of a program that is (or at least can
+be) executed two or more times in succession.
+
+The @code{while} statement is the simplest looping statement in
+@code{awk}. It repeatedly executes a statement as long as a condition is
+true. It looks like this:
+
+@example
+while (@var{condition})
+ @var{body}
+@end example
+
+@noindent
+Here @var{body} is a statement that we call the @dfn{body} of the loop,
+and @var{condition} is an expression that controls how long the loop
+keeps running.
+
+The first thing the @code{while} statement does is test @var{condition}.
+If @var{condition} is true, it executes the statement @var{body}.
+(@var{condition} is true when the value
+is not zero and not a null string.) After @var{body} has been executed,
+@var{condition} is tested again, and if it is still true, @var{body} is
+executed again. This process repeats until @var{condition} is no longer
+true. If @var{condition} is initially false, the body of the loop is
+never executed.@refill
+
+This example prints the first three fields of each record, one per line.
+
+@example
+awk '@{ i = 1
+ while (i <= 3) @{
+ print $i
+ i++
+ @}
+@}'
+@end example
+
+@noindent
+Here the body of the loop is a compound statement enclosed in braces,
+containing two statements.
+
+The loop works like this: first, the value of @code{i} is set to 1.
+Then, the @code{while} tests whether @code{i} is less than or equal to
+three. This is the case when @code{i} equals one, so the @code{i}-th
+field is printed. Then the @code{i++} increments the value of @code{i}
+and the loop repeats. The loop terminates when @code{i} reaches 4.
+
+As you can see, a newline is not required between the condition and the
+body; but using one makes the program clearer unless the body is a
+compound statement or is very simple. The newline after the open-brace
+that begins the compound statement is not required either, but the
+program would be hard to read without it.
+
+@node Do Statement, For Statement, While Statement, Statements
+@section The @code{do}-@code{while} Statement
+
+The @code{do} loop is a variation of the @code{while} looping statement.
+The @code{do} loop executes the @var{body} once, then repeats @var{body}
+as long as @var{condition} is true. It looks like this:
+
+@example
+do
+ @var{body}
+while (@var{condition})
+@end example
+
+Even if @var{condition} is false at the start, @var{body} is executed at
+least once (and only once, unless executing @var{body} makes
+@var{condition} true). Contrast this with the corresponding
+@code{while} statement:
+
+@example
+while (@var{condition})
+ @var{body}
+@end example
+
+@noindent
+This statement does not execute @var{body} even once if @var{condition}
+is false to begin with.
+
+Here is an example of a @code{do} statement:
+
+@example
+awk '@{ i = 1
+ do @{
+ print $0
+ i++
+ @} while (i <= 10)
+@}'
+@end example
+
+@noindent
+prints each input record ten times. It isn't a very realistic example,
+since in this case an ordinary @code{while} would do just as well. But
+this reflects actual experience; there is only occasionally a real use
+for a @code{do} statement.@refill
+
+@node For Statement, Break Statement, Do Statement, Statements
+@section The @code{for} Statement
+@cindex @code{for} statement
+
+The @code{for} statement makes it more convenient to count iterations of a
+loop. The general form of the @code{for} statement looks like this:@refill
+
+@example
+for (@var{initialization}; @var{condition}; @var{increment})
+ @var{body}
+@end example
+
+@noindent
+This statement starts by executing @var{initialization}. Then, as long
+as @var{condition} is true, it repeatedly executes @var{body} and then
+@var{increment}. Typically @var{initialization} sets a variable to
+either zero or one, @var{increment} adds 1 to it, and @var{condition}
+compares it against the desired number of iterations.
+
+Here is an example of a @code{for} statement:
+
+@example
+@group
+awk '@{ for (i = 1; i <= 3; i++)
+ print $i
+@}'
+@end group
+@end example
+
+@noindent
+This prints the first three fields of each input record, one field per
+line.
+
+In the @code{for} statement, @var{body} stands for any statement, but
+@var{initialization}, @var{condition} and @var{increment} are just
+expressions. You cannot set more than one variable in the
+@var{initialization} part unless you use a multiple assignment statement
+such as @code{x = y = 0}, which is possible only if all the initial values
+are equal. (But you can initialize additional variables by writing
+their assignments as separate statements preceding the @code{for} loop.)
+
+The same is true of the @var{increment} part; to increment additional
+variables, you must write separate statements at the end of the loop.
+The C compound expression, using C's comma operator, would be useful in
+this context, but it is not supported in @code{awk}.
+
+Most often, @var{increment} is an increment expression, as in the
+example above. But this is not required; it can be any expression
+whatever. For example, this statement prints all the powers of 2
+between 1 and 100:
+
+@example
+for (i = 1; i <= 100; i *= 2)
+ print i
+@end example
+
+Any of the three expressions in the parentheses following the @code{for} may
+be omitted if there is nothing to be done there. Thus, @w{@samp{for (;x
+> 0;)}} is equivalent to @w{@samp{while (x > 0)}}. If the
+@var{condition} is omitted, it is treated as @var{true}, effectively
+yielding an @dfn{infinite loop} (i.e., a loop that will never
+terminate).@refill
+
+In most cases, a @code{for} loop is an abbreviation for a @code{while}
+loop, as shown here:
+
+@example
+@var{initialization}
+while (@var{condition}) @{
+ @var{body}
+ @var{increment}
+@}
+@end example
+
+@noindent
+The only exception is when the @code{continue} statement
+(@pxref{Continue Statement, ,The @code{continue} Statement}) is used
+inside the loop; changing a @code{for} statement to a @code{while}
+statement in this way can change the effect of the @code{continue}
+statement inside the loop.@refill
+
+There is an alternate version of the @code{for} loop, for iterating over
+all the indices of an array:
+
+@example
+for (i in array)
+ @var{do something with} array[i]
+@end example
+
+@noindent
+@xref{Arrays, ,Arrays in @code{awk}}, for more information on this
+version of the @code{for} loop.
+
+The @code{awk} language has a @code{for} statement in addition to a
+@code{while} statement because often a @code{for} loop is both less work to
+type and more natural to think of. Counting the number of iterations is
+very common in loops. It can be easier to think of this counting as part
+of looping rather than as something to do inside the loop.
+
+The next section has more complicated examples of @code{for} loops.
+
+@node Break Statement, Continue Statement, For Statement, Statements
+@section The @code{break} Statement
+@cindex @code{break} statement
+@cindex loops, exiting
+
+The @code{break} statement jumps out of the innermost @code{for},
+@code{while}, or @code{do}-@code{while} loop that encloses it. The
+following example finds the smallest divisor of any integer, and also
+identifies prime numbers:@refill
+
+@smallexample
+awk '# find smallest divisor of num
+ @{ num = $1
+ for (div = 2; div*div <= num; div++)
+ if (num % div == 0)
+ break
+ if (num % div == 0)
+ printf "Smallest divisor of %d is %d\n", num, div
+ else
+ printf "%d is prime\n", num @}'
+@end smallexample
+
+When the remainder is zero in the first @code{if} statement, @code{awk}
+immediately @dfn{breaks out} of the containing @code{for} loop. This means
+that @code{awk} proceeds immediately to the statement following the loop
+and continues processing. (This is very different from the @code{exit}
+statement which stops the entire @code{awk} program.
+@xref{Exit Statement, ,The @code{exit} Statement}.)@refill
+
+Here is another program equivalent to the previous one. It illustrates how
+the @var{condition} of a @code{for} or @code{while} could just as well be
+replaced with a @code{break} inside an @code{if}:
+
+@smallexample
+@group
+awk '# find smallest divisor of num
+ @{ num = $1
+ for (div = 2; ; div++) @{
+ if (num % div == 0) @{
+ printf "Smallest divisor of %d is %d\n", num, div
+ break
+ @}
+ if (div*div > num) @{
+ printf "%d is prime\n", num
+ break
+ @}
+ @}
+@}'
+@end group
+@end smallexample
+
+@node Continue Statement, Next Statement, Break Statement, Statements
+@section The @code{continue} Statement
+
+@cindex @code{continue} statement
+The @code{continue} statement, like @code{break}, is used only inside
+@code{for}, @code{while}, and @code{do}-@code{while} loops. It skips
+over the rest of the loop body, causing the next cycle around the loop
+to begin immediately. Contrast this with @code{break}, which jumps out
+of the loop altogether. Here is an example:@refill
+
+@example
+# print names that don't contain the string "ignore"
+
+# first, save the text of each line
+@{ names[NR] = $0 @}
+
+# print what we're interested in
+END @{
+ for (x in names) @{
+ if (names[x] ~ /ignore/)
+ continue
+ print names[x]
+ @}
+@}
+@end example
+
+If one of the input records contains the string @samp{ignore}, this
+example skips the print statement for that record, and continues back to
+the first statement in the loop.
+
+This is not a practical example of @code{continue}, since it would be
+just as easy to write the loop like this:
+
+@example
+for (x in names)
+ if (names[x] !~ /ignore/)
+ print names[x]
+@end example
+
+@ignore
+from brennan@boeing.com:
+
+page 90, section 9.6. The example is too artificial as
+the one line program
+
+ !/ignore/
+
+does the same thing.
+@end ignore
+@c ADR --- he's right, but don't worry about this for now
+
+The @code{continue} statement in a @code{for} loop directs @code{awk} to
+skip the rest of the body of the loop, and resume execution with the
+increment-expression of the @code{for} statement. The following program
+illustrates this fact:@refill
+
+@example
+awk 'BEGIN @{
+ for (x = 0; x <= 20; x++) @{
+ if (x == 5)
+ continue
+ printf ("%d ", x)
+ @}
+ print ""
+@}'
+@end example
+
+@noindent
+This program prints all the numbers from 0 to 20, except for 5, for
+which the @code{printf} is skipped. Since the increment @code{x++}
+is not skipped, @code{x} does not remain stuck at 5. Contrast the
+@code{for} loop above with the @code{while} loop:
+
+@example
+awk 'BEGIN @{
+ x = 0
+ while (x <= 20) @{
+ if (x == 5)
+ continue
+ printf ("%d ", x)
+ x++
+ @}
+ print ""
+@}'
+@end example
+
+@noindent
+This program loops forever once @code{x} gets to 5.
+
+As described above, the @code{continue} statement has no meaning when
+used outside the body of a loop. However, although it was never documented,
+historical implementations of @code{awk} have treated the @code{continue}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement, ,The @code{next} Statement}).
+By default, @code{gawk} silently supports this usage. However, if
+@samp{-W posix} has been specified on the command line
+(@pxref{Command Line, ,Invoking @code{awk}}),
+it will be treated as an error, since the @sc{posix} standard specifies
+that @code{continue} should only be used inside the body of a loop.@refill
+
+@node Next Statement, Next File Statement, Continue Statement, Statements
+@section The @code{next} Statement
+@cindex @code{next} statement
+
+The @code{next} statement forces @code{awk} to immediately stop processing
+the current record and go on to the next record. This means that no
+further rules are executed for the current record. The rest of the
+current rule's action is not executed either.
+
+Contrast this with the effect of the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}). That too causes
+@code{awk} to read the next record immediately, but it does not alter the
+flow of control in any way. So the rest of the current action executes
+with a new input record.
+
+At the highest level, @code{awk} program execution is a loop that reads
+an input record and then tests each rule's pattern against it. If you
+think of this loop as a @code{for} statement whose body contains the
+rules, then the @code{next} statement is analogous to a @code{continue}
+statement: it skips to the end of the body of this implicit loop, and
+executes the increment (which reads another record).
+
+For example, if your @code{awk} program works only on records with four
+fields, and you don't want it to fail when given bad input, you might
+use this rule near the beginning of the program:
+
+@smallexample
+NF != 4 @{
+ printf("line %d skipped: doesn't have 4 fields", FNR) > "/dev/stderr"
+ next
+@}
+@end smallexample
+
+@noindent
+so that the following rules will not see the bad record. The error
+message is redirected to the standard error output stream, as error
+messages should be. @xref{Special Files, ,Standard I/O Streams}.
+
+According to the @sc{posix} standard, the behavior is undefined if
+the @code{next} statement is used in a @code{BEGIN} or @code{END} rule.
+@code{gawk} will treat it as a syntax error.
+
+If the @code{next} statement causes the end of the input to be reached,
+then the code in the @code{END} rules, if any, will be executed.
+@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.
+
+@node Next File Statement, Exit Statement, Next Statement, Statements
+@section The @code{next file} Statement
+
+@cindex @code{next file} statement
+The @code{next file} statement is similar to the @code{next} statement.
+However, instead of abandoning processing of the current record, the
+@code{next file} statement instructs @code{awk} to stop processing the
+current data file.
+
+Upon execution of the @code{next file} statement, @code{FILENAME} is
+updated to the name of the next data file listed on the command line,
+@code{FNR} is reset to 1, and processing starts over with the first
+rule in the progam. @xref{Built-in Variables}.
+
+If the @code{next file} statement causes the end of the input to be reached,
+then the code in the @code{END} rules, if any, will be executed.
+@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.
+
+The @code{next file} statement is a @code{gawk} extension; it is not
+(currently) available in any other @code{awk} implementation. You can
+simulate its behavior by creating a library file named @file{nextfile.awk},
+with the following contents. (This sample program uses user-defined
+functions, a feature that has not been presented yet.
+@xref{User-defined, ,User-defined Functions},
+for more information.)@refill
+
+@smallexample
+# nextfile --- function to skip remaining records in current file
+
+# this should be read in before the "main" awk program
+
+function nextfile() @{ _abandon_ = FILENAME; next @}
+
+_abandon_ == FILENAME && FNR > 1 @{ next @}
+_abandon_ == FILENAME && FNR == 1 @{ _abandon_ = "" @}
+@end smallexample
+
+The @code{nextfile} function simply sets a ``private'' variable@footnote{Since
+all variables in @code{awk} are global, this program uses the common
+practice of prefixing the variable name with an underscore. In fact, it
+also suffixes the variable name with an underscore, as extra insurance
+against using a variable name that might be used in some other library
+file.} to the name of the current data file, and then retrieves the next
+record. Since this file is read before the main @code{awk} program,
+the rules that follows the function definition will be executed before the
+rules in the main program. The first rule continues to skip records as long as
+the name of the input file has not changed, and this is not the first
+record in the file. This rule is sufficient most of the time. But what if
+the @emph{same} data file is named twice in a row on the command line?
+This rule would not process the data file the second time. The second rule
+catches this case: If the data file name is what was being skipped, but
+@code{FNR} is 1, then this is the second time the file is being processed,
+and it should not be skipped.
+
+The @code{next file} statement would be useful if you have many data
+files to process, and due to the nature of the data, you expect that you
+would not want to process every record in the file. In order to move on to
+the next data file, you would have to continue scanning the unwanted
+records (as described above). The @code{next file} statement accomplishes
+this much more efficiently.
+
+@ignore
+Would it make sense down the road to nuke `next file' in favor of
+semantics that would make this work?
+
+ function nextfile() { ARGIND++ ; next }
+@end ignore
+
+@node Exit Statement, , Next File Statement, Statements
+@section The @code{exit} Statement
+
+@cindex @code{exit} statement
+The @code{exit} statement causes @code{awk} to immediately stop
+executing the current rule and to stop processing input; any remaining input
+is ignored.@refill
+
+If an @code{exit} statement is executed from a @code{BEGIN} rule the
+program stops processing everything immediately. No input records are
+read. However, if an @code{END} rule is present, it is executed
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).
+
+If @code{exit} is used as part of an @code{END} rule, it causes
+the program to stop immediately.
+
+An @code{exit} statement that is part of an ordinary rule (that is, not part
+of a @code{BEGIN} or @code{END} rule) stops the execution of any further
+automatic rules, but the @code{END} rule is executed if there is one.
+If you do not want the @code{END} rule to do its job in this case, you
+can set a variable to nonzero before the @code{exit} statement, and check
+that variable in the @code{END} rule.
+
+If an argument is supplied to @code{exit}, its value is used as the exit
+status code for the @code{awk} process. If no argument is supplied,
+@code{exit} returns status zero (success).@refill
+
+For example, let's say you've discovered an error condition you really
+don't know how to handle. Conventionally, programs report this by
+exiting with a nonzero status. Your @code{awk} program can do this
+using an @code{exit} statement with a nonzero argument. Here's an
+example of this:@refill
+
+@example
+@group
+BEGIN @{
+ if (("date" | getline date_now) < 0) @{
+ print "Can't get system date" > "/dev/stderr"
+ exit 4
+ @}
+@}
+@end group
+@end example
+
+@node Arrays, Built-in, Statements, Top
+@chapter Arrays in @code{awk}
+
+An @dfn{array} is a table of values, called @dfn{elements}. The
+elements of an array are distinguished by their indices. @dfn{Indices}
+may be either numbers or strings. Each array has a name, which looks
+like a variable name, but must not be in use as a variable name in the
+same @code{awk} program.
+
+@menu
+* Array Intro:: Introduction to Arrays
+* Reference to Elements:: How to examine one element of an array.
+* Assigning Elements:: How to change an element of an array.
+* Array Example:: Basic Example of an Array
+* Scanning an Array:: A variation of the @code{for} statement.
+ It loops through the indices of
+ an array's existing elements.
+* Delete:: The @code{delete} statement removes
+ an element from an array.
+* Numeric Array Subscripts:: How to use numbers as subscripts in @code{awk}.
+* Multi-dimensional:: Emulating multi-dimensional arrays in @code{awk}.
+* Multi-scanning:: Scanning multi-dimensional arrays.
+@end menu
+
+@node Array Intro, Reference to Elements, Arrays, Arrays
+@section Introduction to Arrays
+
+@cindex arrays
+The @code{awk} language has one-dimensional @dfn{arrays} for storing groups
+of related strings or numbers.
+
+Every @code{awk} array must have a name. Array names have the same
+syntax as variable names; any valid variable name would also be a valid
+array name. But you cannot use one name in both ways (as an array and
+as a variable) in one @code{awk} program.
+
+Arrays in @code{awk} superficially resemble arrays in other programming
+languages; but there are fundamental differences. In @code{awk}, you
+don't need to specify the size of an array before you start to use it.
+Additionally, any number or string in @code{awk} may be used as an
+array index.
+
+In most other languages, you have to @dfn{declare} an array and specify
+how many elements or components it contains. In such languages, the
+declaration causes a contiguous block of memory to be allocated for that
+many elements. An index in the array must be a positive integer; for
+example, the index 0 specifies the first element in the array, which is
+actually stored at the beginning of the block of memory. Index 1
+specifies the second element, which is stored in memory right after the
+first element, and so on. It is impossible to add more elements to the
+array, because it has room for only as many elements as you declared.
+
+A contiguous array of four elements might look like this,
+conceptually, if the element values are @code{8}, @code{"foo"},
+@code{""} and @code{30}:@refill
+
+@example
++---------+---------+--------+---------+
+| 8 | "foo" | "" | 30 | @r{value}
++---------+---------+--------+---------+
+ 0 1 2 3 @r{index}
+@end example
+
+@noindent
+Only the values are stored; the indices are implicit from the order of
+the values. @code{8} is the value at index 0, because @code{8} appears in the
+position with 0 elements before it.
+
+@cindex arrays, definition of
+@cindex associative arrays
+Arrays in @code{awk} are different: they are @dfn{associative}. This means
+that each array is a collection of pairs: an index, and its corresponding
+array element value:
+
+@example
+@r{Element} 4 @r{Value} 30
+@r{Element} 2 @r{Value} "foo"
+@r{Element} 1 @r{Value} 8
+@r{Element} 3 @r{Value} ""
+@end example
+
+@noindent
+We have shown the pairs in jumbled order because their order is irrelevant.
+
+One advantage of an associative array is that new pairs can be added
+at any time. For example, suppose we add to the above array a tenth element
+whose value is @w{@code{"number ten"}}. The result is this:
+
+@example
+@r{Element} 10 @r{Value} "number ten"
+@r{Element} 4 @r{Value} 30
+@r{Element} 2 @r{Value} "foo"
+@r{Element} 1 @r{Value} 8
+@r{Element} 3 @r{Value} ""
+@end example
+
+@noindent
+Now the array is @dfn{sparse} (i.e., some indices are missing): it has
+elements 1--4 and 10, but doesn't have elements 5, 6, 7, 8, or 9.@refill
+
+Another consequence of associative arrays is that the indices don't
+have to be positive integers. Any number, or even a string, can be
+an index. For example, here is an array which translates words from
+English into French:
+
+@example
+@r{Element} "dog" @r{Value} "chien"
+@r{Element} "cat" @r{Value} "chat"
+@r{Element} "one" @r{Value} "un"
+@r{Element} 1 @r{Value} "un"
+@end example
+
+@noindent
+Here we decided to translate the number 1 in both spelled-out and
+numeric form---thus illustrating that a single array can have both
+numbers and strings as indices.
+
+When @code{awk} creates an array for you, e.g., with the @code{split}
+built-in function,
+that array's indices are consecutive integers starting at 1.
+(@xref{String Functions, ,Built-in Functions for String Manipulation}.)
+
+@node Reference to Elements, Assigning Elements, Array Intro, Arrays
+@section Referring to an Array Element
+@cindex array reference
+@cindex element of array
+@cindex reference to array
+
+The principal way of using an array is to refer to one of its elements.
+An array reference is an expression which looks like this:
+
+@example
+@var{array}[@var{index}]
+@end example
+
+@noindent
+Here, @var{array} is the name of an array. The expression @var{index} is
+the index of the element of the array that you want.
+
+The value of the array reference is the current value of that array
+element. For example, @code{foo[4.3]} is an expression for the element
+of array @code{foo} at index 4.3.
+
+If you refer to an array element that has no recorded value, the value
+of the reference is @code{""}, the null string. This includes elements
+to which you have not assigned any value, and elements that have been
+deleted (@pxref{Delete, ,The @code{delete} Statement}). Such a reference
+automatically creates that array element, with the null string as its value.
+(In some cases, this is unfortunate, because it might waste memory inside
+@code{awk}).
+
+@cindex arrays, presence of elements
+You can find out if an element exists in an array at a certain index with
+the expression:
+
+@example
+@var{index} in @var{array}
+@end example
+
+@noindent
+This expression tests whether or not the particular index exists,
+without the side effect of creating that element if it is not present.
+The expression has the value 1 (true) if @code{@var{array}[@var{index}]}
+exists, and 0 (false) if it does not exist.@refill
+
+For example, to test whether the array @code{frequencies} contains the
+index @code{"2"}, you could write this statement:@refill
+
+@smallexample
+if ("2" in frequencies) print "Subscript \"2\" is present."
+@end smallexample
+
+Note that this is @emph{not} a test of whether or not the array
+@code{frequencies} contains an element whose @emph{value} is @code{"2"}.
+(There is no way to do that except to scan all the elements.) Also, this
+@emph{does not} create @code{frequencies["2"]}, while the following
+(incorrect) alternative would do so:@refill
+
+@smallexample
+if (frequencies["2"] != "") print "Subscript \"2\" is present."
+@end smallexample
+
+@node Assigning Elements, Array Example, Reference to Elements, Arrays
+@section Assigning Array Elements
+@cindex array assignment
+@cindex element assignment
+
+Array elements are lvalues: they can be assigned values just like
+@code{awk} variables:
+
+@example
+@var{array}[@var{subscript}] = @var{value}
+@end example
+
+@noindent
+Here @var{array} is the name of your array. The expression
+@var{subscript} is the index of the element of the array that you want
+to assign a value. The expression @var{value} is the value you are
+assigning to that element of the array.@refill
+
+@node Array Example, Scanning an Array, Assigning Elements, Arrays
+@section Basic Example of an Array
+
+The following program takes a list of lines, each beginning with a line
+number, and prints them out in order of line number. The line numbers are
+not in order, however, when they are first read: they are scrambled. This
+program sorts the lines by making an array using the line numbers as
+subscripts. It then prints out the lines in sorted order of their numbers.
+It is a very simple program, and gets confused if it encounters repeated
+numbers, gaps, or lines that don't begin with a number.@refill
+
+@example
+@{
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
+@}
+
+END @{
+ for (x = 1; x <= max; x++)
+ print arr[x]
+@}
+@end example
+
+The first rule keeps track of the largest line number seen so far;
+it also stores each line into the array @code{arr}, at an index that
+is the line's number.
+
+The second rule runs after all the input has been read, to print out
+all the lines.
+
+When this program is run with the following input:
+
+@example
+5 I am the Five man
+2 Who are you? The new number two!
+4 . . . And four on the floor
+1 Who is number one?
+3 I three you.
+@end example
+
+@noindent
+its output is this:
+
+@example
+1 Who is number one?
+2 Who are you? The new number two!
+3 I three you.
+4 . . . And four on the floor
+5 I am the Five man
+@end example
+
+If a line number is repeated, the last line with a given number overrides
+the others.
+
+Gaps in the line numbers can be handled with an easy improvement to the
+program's @code{END} rule:
+
+@example
+END @{
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
+@}
+@end example
+
+@node Scanning an Array, Delete, Array Example, Arrays
+@section Scanning all Elements of an Array
+@cindex @code{for (x in @dots{})}
+@cindex arrays, special @code{for} statement
+@cindex scanning an array
+
+In programs that use arrays, often you need a loop that executes
+once for each element of an array. In other languages, where arrays are
+contiguous and indices are limited to positive integers, this is
+easy: the largest index is one less than the length of the array, and you can
+find all the valid indices by counting from zero up to that value. This
+technique won't do the job in @code{awk}, since any number or string
+may be an array index. So @code{awk} has a special kind of @code{for}
+statement for scanning an array:
+
+@example
+for (@var{var} in @var{array})
+ @var{body}
+@end example
+
+@noindent
+This loop executes @var{body} once for each different value that your
+program has previously used as an index in @var{array}, with the
+variable @var{var} set to that index.@refill
+
+Here is a program that uses this form of the @code{for} statement. The
+first rule scans the input records and notes which words appear (at
+least once) in the input, by storing a 1 into the array @code{used} with
+the word as index. The second rule scans the elements of @code{used} to
+find all the distinct words that appear in the input. It prints each
+word that is more than 10 characters long, and also prints the number of
+such words. @xref{Built-in, ,Built-in Functions}, for more information
+on the built-in function @code{length}.
+
+@smallexample
+# Record a 1 for each word that is used at least once.
+@{
+ for (i = 1; i <= NF; i++)
+ used[$i] = 1
+@}
+
+# Find number of distinct words more than 10 characters long.
+END @{
+ for (x in used)
+ if (length(x) > 10) @{
+ ++num_long_words
+ print x
+ @}
+ print num_long_words, "words longer than 10 characters"
+@}
+@end smallexample
+
+@noindent
+@xref{Sample Program}, for a more detailed example of this type.
+
+The order in which elements of the array are accessed by this statement
+is determined by the internal arrangement of the array elements within
+@code{awk} and cannot be controlled or changed. This can lead to
+problems if new elements are added to @var{array} by statements in
+@var{body}; you cannot predict whether or not the @code{for} loop will
+reach them. Similarly, changing @var{var} inside the loop can produce
+strange results. It is best to avoid such things.@refill
+
+@node Delete, Numeric Array Subscripts, Scanning an Array, Arrays
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements of arrays
+@cindex removing elements of arrays
+@cindex arrays, deleting an element
+
+You can remove an individual element of an array using the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index}]
+@end example
+
+You can not refer to an array element after it has been deleted;
+it is as if you had never referred
+to it and had never given it any value. You can no longer obtain any
+value the element once had.
+
+Here is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+ delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+
+If you delete an element, a subsequent @code{for} statement to scan the array
+will not report that element, and the @code{in} operator to check for
+the presence of that element will return 0:
+
+@example
+delete foo[4]
+if (4 in foo)
+ print "This will never be printed"
+@end example
+
+It is not an error to delete an element which does not exist.
+
+@node Numeric Array Subscripts, Multi-dimensional, Delete, Arrays
+@section Using Numbers to Subscript Arrays
+
+An important aspect of arrays to remember is that array subscripts
+are @emph{always} strings. If you use a numeric value as a subscript,
+it will be converted to a string value before it is used for subscripting
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@cindex conversions, during subscripting
+@cindex numbers, used as subscripts
+@vindex CONVFMT
+This means that the value of the @code{CONVFMT} can potentially
+affect how your program accesses elements of an array. For example:
+
+@example
+a = b = 12.153
+data[a] = 1
+CONVFMT = "%2.2f"
+if (b in data)
+ printf "%s is in data", b
+else
+ printf "%s is not in data", b
+@end example
+
+@noindent
+should print @samp{12.15 is not in data}. The first statement gives
+both @code{a} and @code{b} the same numeric value. Assigning to
+@code{data[a]} first gives @code{a} the string value @code{"12.153"}
+(using the default conversion value of @code{CONVFMT}, @code{"%.6g"}),
+and then assigns 1 to @code{data["12.153"]}. The program then changes
+the value of @code{CONVFMT}. The test @samp{(b in data)} forces @code{b}
+to be converted to a string, this time @code{"12.15"}, since the value of
+@code{CONVFMT} only allows two significant digits. This test fails,
+since @code{"12.15"} is a different string from @code{"12.153"}.@refill
+
+According to the rules for conversions
+(@pxref{Conversion, ,Conversion of Strings and Numbers}), integer
+values are always converted to strings as integers, no matter what the
+value of @code{CONVFMT} may happen to be. So the usual case of@refill
+
+@example
+for (i = 1; i <= maxsub; i++)
+ @i{do something with} array[i]
+@end example
+
+@noindent
+will work, no matter what the value of @code{CONVFMT}.
+
+Like many things in @code{awk}, the majority of the time things work
+as you would expect them to work. But it is useful to have a precise
+knowledge of the actual rules, since sometimes they can have a subtle
+effect on your programs.
+
+@node Multi-dimensional, Multi-scanning, Numeric Array Subscripts, Arrays
+@section Multi-dimensional Arrays
+
+@c the following index entry is an overfull hbox. --mew 30jan1992
+@cindex subscripts in arrays
+@cindex arrays, multi-dimensional subscripts
+@cindex multi-dimensional subscripts
+A multi-dimensional array is an array in which an element is identified
+by a sequence of indices, not a single index. For example, a
+two-dimensional array requires two indices. The usual way (in most
+languages, including @code{awk}) to refer to an element of a
+two-dimensional array named @code{grid} is with
+@code{grid[@var{x},@var{y}]}.
+
+@vindex SUBSEP
+Multi-dimensional arrays are supported in @code{awk} through
+concatenation of indices into one string. What happens is that
+@code{awk} converts the indices into strings
+(@pxref{Conversion, ,Conversion of Strings and Numbers}) and
+concatenates them together, with a separator between them. This creates
+a single string that describes the values of the separate indices. The
+combined string is used as a single index into an ordinary,
+one-dimensional array. The separator used is the value of the built-in
+variable @code{SUBSEP}.@refill
+
+For example, suppose we evaluate the expression @code{foo[5,12]="value"}
+when the value of @code{SUBSEP} is @code{"@@"}. The numbers 5 and 12 are
+converted to strings and
+concatenated with an @samp{@@} between them, yielding @code{"5@@12"}; thus,
+the array element @code{foo["5@@12"]} is set to @code{"value"}.@refill
+
+Once the element's value is stored, @code{awk} has no record of whether
+it was stored with a single index or a sequence of indices. The two
+expressions @code{foo[5,12]} and @w{@code{foo[5 SUBSEP 12]}} always have
+the same value.
+
+The default value of @code{SUBSEP} is the string @code{"\034"},
+which contains a nonprinting character that is unlikely to appear in an
+@code{awk} program or in the input data.
+
+The usefulness of choosing an unlikely character comes from the fact
+that index values that contain a string matching @code{SUBSEP} lead to
+combined strings that are ambiguous. Suppose that @code{SUBSEP} were
+@code{"@@"}; then @w{@code{foo["a@@b", "c"]}} and @w{@code{foo["a",
+"b@@c"]}} would be indistinguishable because both would actually be
+stored as @code{foo["a@@b@@c"]}. Because @code{SUBSEP} is
+@code{"\034"}, such confusion can arise only when an index
+contains the character with ASCII code 034, which is a rare
+event.@refill
+
+You can test whether a particular index-sequence exists in a
+``multi-dimensional'' array with the same operator @code{in} used for single
+dimensional arrays. Instead of a single index as the left-hand operand,
+write the whole sequence of indices, separated by commas, in
+parentheses:@refill
+
+@example
+(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
+@end example
+
+The following example treats its input as a two-dimensional array of
+fields; it rotates this array 90 degrees clockwise and prints the
+result. It assumes that all lines have the same number of
+elements.
+
+@example
+awk '@{
+ if (max_nf < NF)
+ max_nf = NF
+ max_nr = NR
+ for (x = 1; x <= NF; x++)
+ vector[x, NR] = $x
+@}
+
+END @{
+ for (x = 1; x <= max_nf; x++) @{
+ for (y = max_nr; y >= 1; --y)
+ printf("%s ", vector[x, y])
+ printf("\n")
+ @}
+@}'
+@end example
+
+@noindent
+When given the input:
+
+@example
+@group
+1 2 3 4 5 6
+2 3 4 5 6 1
+3 4 5 6 1 2
+4 5 6 1 2 3
+@end group
+@end example
+
+@noindent
+it produces:
+
+@example
+@group
+4 3 2 1
+5 4 3 2
+6 5 4 3
+1 6 5 4
+2 1 6 5
+3 2 1 6
+@end group
+@end example
+
+@node Multi-scanning, , Multi-dimensional, Arrays
+@section Scanning Multi-dimensional Arrays
+
+There is no special @code{for} statement for scanning a
+``multi-dimensional'' array; there cannot be one, because in truth there
+are no multi-dimensional arrays or elements; there is only a
+multi-dimensional @emph{way of accessing} an array.
+
+However, if your program has an array that is always accessed as
+multi-dimensional, you can get the effect of scanning it by combining
+the scanning @code{for} statement
+(@pxref{Scanning an Array, ,Scanning all Elements of an Array}) with the
+@code{split} built-in function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+It works like this:@refill
+
+@example
+for (combined in @var{array}) @{
+ split(combined, separate, SUBSEP)
+ @dots{}
+@}
+@end example
+
+@noindent
+This finds each concatenated, combined index in the array, and splits it
+into the individual indices by breaking it apart where the value of
+@code{SUBSEP} appears. The split-out indices become the elements of
+the array @code{separate}.
+
+Thus, suppose you have previously stored in @code{@var{array}[1,
+"foo"]}; then an element with index @code{"1\034foo"} exists in
+@var{array}. (Recall that the default value of @code{SUBSEP} contains
+the character with code 034.) Sooner or later the @code{for} statement
+will find that index and do an iteration with @code{combined} set to
+@code{"1\034foo"}. Then the @code{split} function is called as
+follows:
+
+@example
+split("1\034foo", separate, "\034")
+@end example
+
+@noindent
+The result of this is to set @code{separate[1]} to 1 and @code{separate[2]}
+to @code{"foo"}. Presto, the original sequence of separate indices has
+been recovered.
+
+@node Built-in, User-defined, Arrays, Top
+@chapter Built-in Functions
+
+@cindex built-in functions
+@dfn{Built-in} functions are functions that are always available for
+your @code{awk} program to call. This chapter defines all the built-in
+functions in @code{awk}; some of them are mentioned in other sections,
+but they are summarized here for your convenience. (You can also define
+new functions yourself. @xref{User-defined, ,User-defined Functions}.)
+
+@menu
+* Calling Built-in:: How to call built-in functions.
+* Numeric Functions:: Functions that work with numbers,
+ including @code{int}, @code{sin} and @code{rand}.
+* String Functions:: Functions for string manipulation,
+ such as @code{split}, @code{match}, and @code{sprintf}.
+* I/O Functions:: Functions for files and shell commands.
+* Time Functions:: Functions for dealing with time stamps.
+@end menu
+
+@node Calling Built-in, Numeric Functions, Built-in, Built-in
+@section Calling Built-in Functions
+
+To call a built-in function, write the name of the function followed
+by arguments in parentheses. For example, @code{atan2(y + z, 1)}
+is a call to the function @code{atan2}, with two arguments.
+
+Whitespace is ignored between the built-in function name and the
+open-parenthesis, but we recommend that you avoid using whitespace
+there. User-defined functions do not permit whitespace in this way, and
+you will find it easier to avoid mistakes by following a simple
+convention which always works: no whitespace after a function name.
+
+Each built-in function accepts a certain number of arguments. In most
+cases, any extra arguments given to built-in functions are ignored. The
+defaults for omitted arguments vary from function to function and are
+described under the individual functions.
+
+When a function is called, expressions that create the function's actual
+parameters are evaluated completely before the function call is performed.
+For example, in the code fragment:
+
+@example
+i = 4
+j = sqrt(i++)
+@end example
+
+@noindent
+the variable @code{i} is set to 5 before @code{sqrt} is called
+with a value of 4 for its actual parameter.
+
+@node Numeric Functions, String Functions, Calling Built-in, Built-in
+@section Numeric Built-in Functions
+@c I didn't make all the examples small because a couple of them were
+@c short already. --mew 29jan1992
+
+Here is a full list of built-in functions that work with numbers:
+
+@table @code
+@item int(@var{x})
+This gives you the integer part of @var{x}, truncated toward 0. This
+produces the nearest integer to @var{x}, located between @var{x} and 0.
+
+For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)}
+is @minus{}3, and @code{int(-3)} is @minus{}3 as well.@refill
+
+@item sqrt(@var{x})
+This gives you the positive square root of @var{x}. It reports an error
+if @var{x} is negative. Thus, @code{sqrt(4)} is 2.@refill
+
+@item exp(@var{x})
+This gives you the exponential of @var{x}, or reports an error if
+@var{x} is out of range. The range of values @var{x} can have depends
+on your machine's floating point representation.@refill
+
+@item log(@var{x})
+This gives you the natural logarithm of @var{x}, if @var{x} is positive;
+otherwise, it reports an error.@refill
+
+@item sin(@var{x})
+This gives you the sine of @var{x}, with @var{x} in radians.
+
+@item cos(@var{x})
+This gives you the cosine of @var{x}, with @var{x} in radians.
+
+@item atan2(@var{y}, @var{x})
+This gives you the arctangent of @code{@var{y} / @var{x}} in radians.
+
+@item rand()
+This gives you a random number. The values of @code{rand} are
+uniformly-distributed between 0 and 1. The value is never 0 and never
+1.
+
+Often you want random integers instead. Here is a user-defined function
+you can use to obtain a random nonnegative integer less than @var{n}:
+
+@example
+function randint(n) @{
+ return int(n * rand())
+@}
+@end example
+
+@noindent
+The multiplication produces a random real number greater than 0 and less
+than @var{n}. We then make it an integer (using @code{int}) between 0
+and @code{@var{n} @minus{} 1}.
+
+Here is an example where a similar function is used to produce
+random integers between 1 and @var{n}. Note that this program will
+print a new random number for each input record.
+
+@smallexample
+awk '
+# Function to roll a simulated die.
+function roll(n) @{ return 1 + int(rand() * n) @}
+
+# Roll 3 six-sided dice and print total number of points.
+@{
+ printf("%d points\n", roll(6)+roll(6)+roll(6))
+@}'
+@end smallexample
+
+@strong{Note:} @code{rand} starts generating numbers from the same
+point, or @dfn{seed}, each time you run @code{awk}. This means that
+a program will produce the same results each time you run it.
+The numbers are random within one @code{awk} run, but predictable
+from run to run. This is convenient for debugging, but if you want
+a program to do different things each time it is used, you must change
+the seed to a value that will be different in each run. To do this,
+use @code{srand}.
+
+@item srand(@var{x})
+The function @code{srand} sets the starting point, or @dfn{seed},
+for generating random numbers to the value @var{x}.
+
+Each seed value leads to a particular sequence of ``random'' numbers.
+Thus, if you set the seed to the same value a second time, you will get
+the same sequence of ``random'' numbers again.
+
+If you omit the argument @var{x}, as in @code{srand()}, then the current
+date and time of day are used for a seed. This is the way to get random
+numbers that are truly unpredictable.
+
+The return value of @code{srand} is the previous seed. This makes it
+easy to keep track of the seeds for use in consistently reproducing
+sequences of random numbers.
+@end table
+
+@node String Functions, I/O Functions, Numeric Functions, Built-in
+@section Built-in Functions for String Manipulation
+
+The functions in this section look at or change the text of one or more
+strings.
+
+@table @code
+@item index(@var{in}, @var{find})
+@findex match
+This searches the string @var{in} for the first occurrence of the string
+@var{find}, and returns the position in characters where that occurrence
+begins in the string @var{in}. For example:@refill
+
+@smallexample
+awk 'BEGIN @{ print index("peanut", "an") @}'
+@end smallexample
+
+@noindent
+prints @samp{3}. If @var{find} is not found, @code{index} returns 0.
+(Remember that string indices in @code{awk} start at 1.)
+
+@item length(@var{string})
+@findex length
+This gives you the number of characters in @var{string}. If
+@var{string} is a number, the length of the digit string representing
+that number is returned. For example, @code{length("abcde")} is 5. By
+contrast, @code{length(15 * 35)} works out to 3. How? Well, 15 * 35 =
+525, and 525 is then converted to the string @samp{"525"}, which has
+three characters.
+
+If no argument is supplied, @code{length} returns the length of @code{$0}.
+
+In older versions of @code{awk}, you could call the @code{length} function
+without any parentheses. Doing so is marked as ``deprecated'' in the
+@sc{posix} standard. This means that while you can do this in your
+programs, it is a feature that can eventually be removed from a future
+version of the standard. Therefore, for maximal portability of your
+@code{awk} programs you should always supply the parentheses.
+
+@item match(@var{string}, @var{regexp})
+@findex match
+The @code{match} function searches the string, @var{string}, for the
+longest, leftmost substring matched by the regular expression,
+@var{regexp}. It returns the character position, or @dfn{index}, of
+where that substring begins (1, if it starts at the beginning of
+@var{string}). If no match if found, it returns 0.
+
+@vindex RSTART
+@vindex RLENGTH
+The @code{match} function sets the built-in variable @code{RSTART} to
+the index. It also sets the built-in variable @code{RLENGTH} to the
+length in characters of the matched substring. If no match is found,
+@code{RSTART} is set to 0, and @code{RLENGTH} to @minus{}1.
+
+For example:
+
+@smallexample
+awk '@{
+ if ($1 == "FIND")
+ regex = $2
+ else @{
+ where = match($0, regex)
+ if (where)
+ print "Match of", regex, "found at", where, "in", $0
+ @}
+@}'
+@end smallexample
+
+@noindent
+This program looks for lines that match the regular expression stored in
+the variable @code{regex}. This regular expression can be changed. If the
+first word on a line is @samp{FIND}, @code{regex} is changed to be the
+second word on that line. Therefore, given:
+
+@smallexample
+FIND fo*bar
+My program was a foobar
+But none of it would doobar
+FIND Melvin
+JF+KM
+This line is property of The Reality Engineering Co.
+This file created by Melvin.
+@end smallexample
+
+@noindent
+@code{awk} prints:
+
+@smallexample
+Match of fo*bar found at 18 in My program was a foobar
+Match of Melvin found at 26 in This file created by Melvin.
+@end smallexample
+
+@item split(@var{string}, @var{array}, @var{fieldsep})
+@findex split
+This divides @var{string} into pieces separated by @var{fieldsep},
+and stores the pieces in @var{array}. The first piece is stored in
+@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
+forth. The string value of the third argument, @var{fieldsep}, is
+a regexp describing where to split @var{string} (much as @code{FS} can
+be a regexp describing where to split input records). If
+the @var{fieldsep} is omitted, the value of @code{FS} is used.
+@code{split} returns the number of elements created.@refill
+
+The @code{split} function, then, splits strings into pieces in a
+manner similar to the way input lines are split into fields. For example:
+
+@smallexample
+split("auto-da-fe", a, "-")
+@end smallexample
+
+@noindent
+splits the string @samp{auto-da-fe} into three fields using @samp{-} as the
+separator. It sets the contents of the array @code{a} as follows:
+
+@smallexample
+a[1] = "auto"
+a[2] = "da"
+a[3] = "fe"
+@end smallexample
+
+@noindent
+The value returned by this call to @code{split} is 3.
+
+As with input field-splitting, when the value of @var{fieldsep} is
+@code{" "}, leading and trailing whitespace is ignored, and the elements
+are separated by runs of whitespace.
+
+@item sprintf(@var{format}, @var{expression1},@dots{})
+@findex sprintf
+This returns (without printing) the string that @code{printf} would
+have printed out with the same arguments
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+For example:@refill
+
+@smallexample
+sprintf("pi = %.2f (approx.)", 22/7)
+@end smallexample
+
+@noindent
+returns the string @w{@code{"pi = 3.14 (approx.)"}}.
+
+@item sub(@var{regexp}, @var{replacement}, @var{target})
+@findex sub
+The @code{sub} function alters the value of @var{target}.
+It searches this value, which should be a string, for the
+leftmost substring matched by the regular expression, @var{regexp},
+extending this match as far as possible. Then the entire string is
+changed by replacing the matched text with @var{replacement}.
+The modified string becomes the new value of @var{target}.
+
+This function is peculiar because @var{target} is not simply
+used to compute a value, and not just any expression will do: it
+must be a variable, field or array reference, so that @code{sub} can
+store a modified value there. If this argument is omitted, then the
+default is to use and alter @code{$0}.
+
+For example:@refill
+
+@smallexample
+str = "water, water, everywhere"
+sub(/at/, "ith", str)
+@end smallexample
+
+@noindent
+sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the
+leftmost, longest occurrence of @samp{at} with @samp{ith}.
+
+The @code{sub} function returns the number of substitutions made (either
+one or zero).
+
+If the special character @samp{&} appears in @var{replacement}, it
+stands for the precise substring that was matched by @var{regexp}. (If
+the regexp can match more than one string, then this precise substring
+may vary.) For example:@refill
+
+@smallexample
+awk '@{ sub(/candidate/, "& and his wife"); print @}'
+@end smallexample
+
+@noindent
+changes the first occurrence of @samp{candidate} to @samp{candidate
+and his wife} on each input line.
+
+Here is another example:
+
+@smallexample
+awk 'BEGIN @{
+ str = "daabaaa"
+ sub(/a*/, "c&c", str)
+ print str
+@}'
+@end smallexample
+
+@noindent
+prints @samp{dcaacbaaa}. This show how @samp{&} can represent a non-constant
+string, and also illustrates the ``leftmost, longest'' rule.
+
+The effect of this special character (@samp{&}) can be turned off by putting a
+backslash before it in the string. As usual, to insert one backslash in
+the string, you must write two backslashes. Therefore, write @samp{\\&}
+in a string constant to include a literal @samp{&} in the replacement.
+For example, here is how to replace the first @samp{|} on each line with
+an @samp{&}:@refill
+
+@smallexample
+awk '@{ sub(/\|/, "\\&"); print @}'
+@end smallexample
+
+@strong{Note:} as mentioned above, the third argument to @code{sub} must
+be an lvalue. Some versions of @code{awk} allow the third argument to
+be an expression which is not an lvalue. In such a case, @code{sub}
+would still search for the pattern and return 0 or 1, but the result of
+the substitution (if any) would be thrown away because there is no place
+to put it. Such versions of @code{awk} accept expressions like
+this:@refill
+
+@smallexample
+sub(/USA/, "United States", "the USA and Canada")
+@end smallexample
+
+@noindent
+But that is considered erroneous in @code{gawk}.
+
+@item gsub(@var{regexp}, @var{replacement}, @var{target})
+@findex gsub
+This is similar to the @code{sub} function, except @code{gsub} replaces
+@emph{all} of the longest, leftmost, @emph{nonoverlapping} matching
+substrings it can find. The @samp{g} in @code{gsub} stands for
+``global,'' which means replace everywhere. For example:@refill
+
+@smallexample
+awk '@{ gsub(/Britain/, "United Kingdom"); print @}'
+@end smallexample
+
+@noindent
+replaces all occurrences of the string @samp{Britain} with @samp{United
+Kingdom} for all input records.@refill
+
+The @code{gsub} function returns the number of substitutions made. If
+the variable to be searched and altered, @var{target}, is
+omitted, then the entire input record, @code{$0}, is used.@refill
+
+As in @code{sub}, the characters @samp{&} and @samp{\} are special, and
+the third argument must be an lvalue.
+
+@item substr(@var{string}, @var{start}, @var{length})
+@findex substr
+This returns a @var{length}-character-long substring of @var{string},
+starting at character number @var{start}. The first character of a
+string is character number one. For example,
+@code{substr("washington", 5, 3)} returns @code{"ing"}.@refill
+
+If @var{length} is not present, this function returns the whole suffix of
+@var{string} that begins at character number @var{start}. For example,
+@code{substr("washington", 5)} returns @code{"ington"}. This is also
+the case if @var{length} is greater than the number of characters remaining
+in the string, counting from character number @var{start}.
+
+@item tolower(@var{string})
+@findex tolower
+This returns a copy of @var{string}, with each upper-case character
+in the string replaced with its corresponding lower-case character.
+Nonalphabetic characters are left unchanged. For example,
+@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}.
+
+@item toupper(@var{string})
+@findex toupper
+This returns a copy of @var{string}, with each lower-case character
+in the string replaced with its corresponding upper-case character.
+Nonalphabetic characters are left unchanged. For example,
+@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
+@end table
+
+@node I/O Functions, Time Functions, String Functions, Built-in
+@section Built-in Functions for Input/Output
+
+@table @code
+@item close(@var{filename})
+Close the file @var{filename}, for input or output. The argument may
+alternatively be a shell command that was used for redirecting to or
+from a pipe; then the pipe is closed.
+
+@xref{Close Input, ,Closing Input Files and Pipes}, regarding closing
+input files and pipes. @xref{Close Output, ,Closing Output Files and Pipes},
+regarding closing output files and pipes.@refill
+
+@item system(@var{command})
+@findex system
+@c the following index entry is an overfull hbox. --mew 30jan1992
+@cindex interaction, @code{awk} and other programs
+The system function allows the user to execute operating system commands
+and then return to the @code{awk} program. The @code{system} function
+executes the command given by the string @var{command}. It returns, as
+its value, the status returned by the command that was executed.
+
+For example, if the following fragment of code is put in your @code{awk}
+program:
+
+@smallexample
+END @{
+ system("mail -s 'awk run done' operator < /dev/null")
+@}
+@end smallexample
+
+@noindent
+the system operator will be sent mail when the @code{awk} program
+finishes processing input and begins its end-of-input processing.
+
+Note that much the same result can be obtained by redirecting
+@code{print} or @code{printf} into a pipe. However, if your @code{awk}
+program is interactive, @code{system} is useful for cranking up large
+self-contained programs, such as a shell or an editor.@refill
+
+Some operating systems cannot implement the @code{system} function.
+@code{system} causes a fatal error if it is not supported.
+@end table
+
+@c fakenode --- for prepinfo
+@subheading Controlling Output Buffering with @code{system}
+@cindex flushing buffers
+@cindex buffers, flushing
+@cindex buffering output
+@cindex output, buffering
+
+Many utility programs will @dfn{buffer} their output; they save information
+to be written to a disk file or terminal in memory, until there is enough
+to be written in one operation. This is often more efficient than writing
+every little bit of information as soon as it is ready. However, sometimes
+it is necessary to force a program to @dfn{flush} its buffers; that is,
+write the information to its destination, even if a buffer is not full.
+You can do this from your @code{awk} program by calling @code{system}
+with a null string as its argument:
+
+@example
+system("") # flush output
+@end example
+
+@noindent
+@code{gawk} treats this use of the @code{system} function as a special
+case, and is smart enough not to run a shell (or other command
+interpreter) with the empty command. Therefore, with @code{gawk}, this
+idiom is not only useful, it is efficient. While this idiom should work
+with other @code{awk} implementations, it will not necessarily avoid
+starting an unnecessary shell.
+@ignore
+Need a better explanation, perhaps in a separate paragraph. Explain that
+for
+
+awk 'BEGIN { print "hi"
+ system("echo hello")
+ print "howdy" }'
+
+that the output had better be
+
+ hi
+ hello
+ howdy
+
+and not
+
+ hello
+ hi
+ howdy
+
+which it would be if awk did not flush its buffers before calling system.
+@end ignore
+
+@node Time Functions, , I/O Functions, Built-in
+@section Functions for Dealing with Time Stamps
+
+@cindex time stamps
+@cindex time of day
+A common use for @code{awk} programs is the processing of log files.
+Log files often contain time stamp information, indicating when a
+particular log record was written. Many programs log their time stamp
+in the form returned by the @code{time} system call, which is the
+number of seconds since a particular epoch. On @sc{posix} systems,
+it is the number of seconds since Midnight, January 1, 1970, @sc{utc}.
+
+In order to make it easier to process such log files, and to easily produce
+useful reports, @code{gawk} provides two functions for working with time
+stamps. Both of these are @code{gawk} extensions; they are not specified
+in the @sc{posix} standard, nor are they in any other known version
+of @code{awk}.
+
+@table @code
+@item systime()
+@findex systime
+This function returns the current time as the number of seconds since
+the system epoch. On @sc{posix} systems, this is the number of seconds
+since Midnight, January 1, 1970, @sc{utc}. It may be a different number on
+other systems.
+
+@item strftime(@var{format}, @var{timestamp})
+@findex strftime
+This function returns a string. It is similar to the function of the
+same name in the @sc{ansi} C standard library. The time specified by
+@var{timestamp} is used to produce a string, based on the contents
+of the @var{format} string.
+@end table
+
+The @code{systime} function allows you to compare a time stamp from a
+log file with the current time of day. In particular, it is easy to
+determine how long ago a particular record was logged. It also allows
+you to produce log records using the ``seconds since the epoch'' format.
+
+The @code{strftime} function allows you to easily turn a time stamp
+into human-readable information. It is similar in nature to the @code{sprintf}
+function, copying non-format specification characters verbatim to the
+returned string, and substituting date and time values for format
+specifications in the @var{format} string. If no @var{timestamp} argument
+is supplied, @code{gawk} will use the current time of day as the
+time stamp.@refill
+
+@code{strftime} is guaranteed by the @sc{ansi} C standard to support
+the following date format specifications:
+
+@table @code
+@item %a
+The locale's abbreviated weekday name.
+
+@item %A
+The locale's full weekday name.
+
+@item %b
+The locale's abbreviated month name.
+
+@item %B
+The locale's full month name.
+
+@item %c
+The locale's ``appropriate'' date and time representation.
+
+@item %d
+The day of the month as a decimal number (01--31).
+
+@item %H
+The hour (24-hour clock) as a decimal number (00--23).
+
+@item %I
+The hour (12-hour clock) as a decimal number (01--12).
+
+@item %j
+The day of the year as a decimal number (001--366).
+
+@item %m
+The month as a decimal number (01--12).
+
+@item %M
+The minute as a decimal number (00--59).
+
+@item %p
+The locale's equivalent of the AM/PM designations associated
+with a 12-hour clock.
+
+@item %S
+The second as a decimal number (00--61). (Occasionally there are
+minutes in a year with one or two leap seconds, which is why the
+seconds can go from 0 all the way to 61.)
+
+@item %U
+The week number of the year (the first Sunday as the first day of week 1)
+as a decimal number (00--53).
+
+@item %w
+The weekday as a decimal number (0--6). Sunday is day 0.
+
+@item %W
+The week number of the year (the first Monday as the first day of week 1)
+as a decimal number (00--53).
+
+@item %x
+The locale's ``appropriate'' date representation.
+
+@item %X
+The locale's ``appropriate'' time representation.
+
+@item %y
+The year without century as a decimal number (00--99).
+
+@item %Y
+The year with century as a decimal number.
+
+@item %Z
+The time zone name or abbreviation, or no characters if
+no time zone is determinable.
+
+@item %%
+A literal @samp{%}.
+@end table
+
+@c The parenthetical remark here should really be a footnote, but
+@c it gave formatting problems at the FSF. So for now put it in
+@c parentheses.
+If a conversion specifier is not one of the above, the behavior is
+undefined. (This is because the @sc{ansi} standard for C leaves the
+behavior of the C version of @code{strftime} undefined, and @code{gawk}
+will use the system's version of @code{strftime} if it's there.
+Typically, the conversion specifier will either not appear in the
+returned string, or it will appear literally.)
+
+Informally, a @dfn{locale} is the geographic place in which a program
+is meant to run. For example, a common way to abbreviate the date
+September 4, 1991 in the United States would be ``9/4/91''.
+In many countries in Europe, however, it would be abbreviated ``4.9.91''.
+Thus, the @samp{%x} specification in a @code{"US"} locale might produce
+@samp{9/4/91}, while in a @code{"EUROPE"} locale, it might produce
+@samp{4.9.91}. The @sc{ansi} C standard defines a default @code{"C"}
+locale, which is an environment that is typical of what most C programmers
+are used to.
+
+A public-domain C version of @code{strftime} is shipped with @code{gawk}
+for systems that are not yet fully @sc{ansi}-compliant. If that version is
+used to compile @code{gawk} (@pxref{Installation, ,Installing @code{gawk}}),
+then the following additional format specifications are available:@refill
+
+@table @code
+@item %D
+Equivalent to specifying @samp{%m/%d/%y}.
+
+@item %e
+The day of the month, padded with a blank if it is only one digit.
+
+@item %h
+Equivalent to @samp{%b}, above.
+
+@item %n
+A newline character (ASCII LF).
+
+@item %r
+Equivalent to specifying @samp{%I:%M:%S %p}.
+
+@item %R
+Equivalent to specifying @samp{%H:%M}.
+
+@item %T
+Equivalent to specifying @samp{%H:%M:%S}.
+
+@item %t
+A TAB character.
+
+@item %k
+is replaced by the hour (24-hour clock) as a decimal number (0-23).
+Single digit numbers are padded with a blank.
+
+@item %l
+is replaced by the hour (12-hour clock) as a decimal number (1-12).
+Single digit numbers are padded with a blank.
+
+@item %C
+The century, as a number between 00 and 99.
+
+@item %u
+is replaced by the weekday as a decimal number
+[1 (Monday)--7].
+
+@item %V
+is replaced by the week number of the year (the first Monday as the first
+day of week 1) as a decimal number (01--53).
+The method for determining the week number is as specified by ISO 8601
+(to wit: if the week containing January 1 has four or more days in the
+new year, then it is week 1, otherwise it is week 53 of the previous year
+and the next week is week 1).@refill
+
+@item %Ec %EC %Ex %Ey %EY %Od %Oe %OH %OI
+@itemx %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
+These are ``alternate representations'' for the specifications
+that use only the second letter (@samp{%c}, @samp{%C}, and so on).
+They are recognized, but their normal representations are used.
+(These facilitate compliance with the @sc{posix} @code{date}
+utility.)@refill
+
+@item %v
+The date in VMS format (e.g. 20-JUN-1991).
+@end table
+
+Here are two examples that use @code{strftime}. The first is an
+@code{awk} version of the C @code{ctime} function. (This is a
+user defined function, which we have not discussed yet.
+@xref{User-defined, ,User-defined Functions}, for more information.)
+
+@smallexample
+# ctime.awk
+#
+# awk version of C ctime(3) function
+
+function ctime(ts, format)
+@{
+ format = "%a %b %e %H:%M:%S %Z %Y"
+ if (ts == 0)
+ ts = systime() # use current time as default
+ return strftime(format, ts)
+@}
+@end smallexample
+
+This next example is an @code{awk} implementation of the @sc{posix}
+@code{date} utility. Normally, the @code{date} utility prints the
+current date and time of day in a well known format. However, if you
+provide an argument to it that begins with a @samp{+}, @code{date}
+will copy non-format specifier characters to the standard output, and
+will interpret the current time according to the format specifiers in
+the string. For example:
+
+@smallexample
+date '+Today is %A, %B %d, %Y.'
+@end smallexample
+
+@noindent
+might print
+
+@smallexample
+Today is Thursday, July 11, 1991.
+@end smallexample
+
+Here is the @code{awk} version of the @code{date} utility.
+
+@smallexample
+#! /usr/bin/gawk -f
+#
+# date --- implement the P1003.2 Draft 11 'date' command
+#
+# Bug: does not recognize the -u argument.
+
+BEGIN \
+@{
+ format = "%a %b %e %H:%M:%S %Z %Y"
+ exitval = 0
+
+ if (ARGC > 2)
+ exitval = 1
+ else if (ARGC == 2) @{
+ format = ARGV[1]
+ if (format ~ /^\+/)
+ format = substr(format, 2) # remove leading +
+ @}
+ print strftime(format)
+ exit exitval
+@}
+@end smallexample
+
+@node User-defined, Built-in Variables, Built-in, Top
+@chapter User-defined Functions
+
+@cindex user-defined functions
+@cindex functions, user-defined
+Complicated @code{awk} programs can often be simplified by defining
+your own functions. User-defined functions can be called just like
+built-in ones (@pxref{Function Calls}), but it is up to you to define
+them---to tell @code{awk} what they should do.
+
+@menu
+* Definition Syntax:: How to write definitions and what they mean.
+* Function Example:: An example function definition and
+ what it does.
+* Function Caveats:: Things to watch out for.
+* Return Statement:: Specifying the value a function returns.
+@end menu
+
+@node Definition Syntax, Function Example, User-defined, User-defined
+@section Syntax of Function Definitions
+@cindex defining functions
+@cindex function definition
+
+Definitions of functions can appear anywhere between the rules of the
+@code{awk} program. Thus, the general form of an @code{awk} program is
+extended to include sequences of rules @emph{and} user-defined function
+definitions.
+
+The definition of a function named @var{name} looks like this:
+
+@example
+function @var{name} (@var{parameter-list}) @{
+ @var{body-of-function}
+@}
+@end example
+
+@noindent
+@var{name} is the name of the function to be defined. A valid function
+name is like a valid variable name: a sequence of letters, digits and
+underscores, not starting with a digit. Functions share the same pool
+of names as variables and arrays.
+
+@var{parameter-list} is a list of the function's arguments and local
+variable names, separated by commas. When the function is called,
+the argument names are used to hold the argument values given in
+the call. The local variables are initialized to the null string.
+
+The @var{body-of-function} consists of @code{awk} statements. It is the
+most important part of the definition, because it says what the function
+should actually @emph{do}. The argument names exist to give the body a
+way to talk about the arguments; local variables, to give the body
+places to keep temporary values.
+
+Argument names are not distinguished syntactically from local variable
+names; instead, the number of arguments supplied when the function is
+called determines how many argument variables there are. Thus, if three
+argument values are given, the first three names in @var{parameter-list}
+are arguments, and the rest are local variables.
+
+It follows that if the number of arguments is not the same in all calls
+to the function, some of the names in @var{parameter-list} may be
+arguments on some occasions and local variables on others. Another
+way to think of this is that omitted arguments default to the
+null string.
+
+Usually when you write a function you know how many names you intend to
+use for arguments and how many you intend to use as locals. By
+convention, you should write an extra space between the arguments and
+the locals, so other people can follow how your function is
+supposed to be used.
+
+During execution of the function body, the arguments and local variable
+values hide or @dfn{shadow} any variables of the same names used in the
+rest of the program. The shadowed variables are not accessible in the
+function definition, because there is no way to name them while their
+names have been taken away for the local variables. All other variables
+used in the @code{awk} program can be referenced or set normally in the
+function definition.
+
+The arguments and local variables last only as long as the function body
+is executing. Once the body finishes, the shadowed variables come back.
+
+The function body can contain expressions which call functions. They
+can even call this function, either directly or by way of another
+function. When this happens, we say the function is @dfn{recursive}.
+
+There is no need in @code{awk} to put the definition of a function
+before all uses of the function. This is because @code{awk} reads the
+entire program before starting to execute any of it.
+
+In many @code{awk} implementations, the keyword @code{function} may be
+abbreviated @code{func}. However, @sc{posix} only specifies the use of
+the keyword @code{function}. This actually has some practical implications.
+If @code{gawk} is in @sc{posix}-compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), then the following
+statement will @emph{not} define a function:@refill
+
+@example
+func foo() @{ a = sqrt($1) ; print a @}
+@end example
+
+@noindent
+Instead it defines a rule that, for each record, concatenates the value
+of the variable @samp{func} with the return value of the function @samp{foo},
+and based on the truth value of the result, executes the corresponding action.
+This is probably not what was desired. (@code{awk} accepts this input as
+syntactically valid, since functions may be used before they are defined
+in @code{awk} programs.)
+
+@node Function Example, Function Caveats, Definition Syntax, User-defined
+@section Function Definition Example
+
+Here is an example of a user-defined function, called @code{myprint}, that
+takes a number and prints it in a specific format.
+
+@example
+function myprint(num)
+@{
+ printf "%6.3g\n", num
+@}
+@end example
+
+@noindent
+To illustrate, here is an @code{awk} rule which uses our @code{myprint}
+function:
+
+@example
+$3 > 0 @{ myprint($3) @}
+@end example
+
+@noindent
+This program prints, in our special format, all the third fields that
+contain a positive number in our input. Therefore, when given:
+
+@example
+ 1.2 3.4 5.6 7.8
+ 9.10 11.12 -13.14 15.16
+17.18 19.20 21.22 23.24
+@end example
+
+@noindent
+this program, using our function to format the results, prints:
+
+@example
+ 5.6
+ 21.2
+@end example
+
+Here is a rather contrived example of a recursive function. It prints a
+string backwards:
+
+@example
+function rev (str, len) @{
+ if (len == 0) @{
+ printf "\n"
+ return
+ @}
+ printf "%c", substr(str, len, 1)
+ rev(str, len - 1)
+@}
+@end example
+
+@node Function Caveats, Return Statement, Function Example, User-defined
+@section Calling User-defined Functions
+
+@dfn{Calling a function} means causing the function to run and do its job.
+A function call is an expression, and its value is the value returned by
+the function.
+
+A function call consists of the function name followed by the arguments
+in parentheses. What you write in the call for the arguments are
+@code{awk} expressions; each time the call is executed, these
+expressions are evaluated, and the values are the actual arguments. For
+example, here is a call to @code{foo} with three arguments (the first
+being a string concatenation):
+
+@example
+foo(x y, "lose", 4 * z)
+@end example
+
+@quotation
+@strong{Caution:} whitespace characters (spaces and tabs) are not allowed
+between the function name and the open-parenthesis of the argument list.
+If you write whitespace by mistake, @code{awk} might think that you mean
+to concatenate a variable with an expression in parentheses. However, it
+notices that you used a function name and not a variable name, and reports
+an error.
+@end quotation
+
+@cindex call by value
+When a function is called, it is given a @emph{copy} of the values of
+its arguments. This is called @dfn{call by value}. The caller may use
+a variable as the expression for the argument, but the called function
+does not know this: it only knows what value the argument had. For
+example, if you write this code:
+
+@example
+foo = "bar"
+z = myfunc(foo)
+@end example
+
+@noindent
+then you should not think of the argument to @code{myfunc} as being
+``the variable @code{foo}.'' Instead, think of the argument as the
+string value, @code{"bar"}.
+
+If the function @code{myfunc} alters the values of its local variables,
+this has no effect on any other variables. In particular, if @code{myfunc}
+does this:
+
+@example
+function myfunc (win) @{
+ print win
+ win = "zzz"
+ print win
+@}
+@end example
+
+@noindent
+to change its first argument variable @code{win}, this @emph{does not}
+change the value of @code{foo} in the caller. The role of @code{foo} in
+calling @code{myfunc} ended when its value, @code{"bar"}, was computed.
+If @code{win} also exists outside of @code{myfunc}, the function body
+cannot alter this outer value, because it is shadowed during the
+execution of @code{myfunc} and cannot be seen or changed from there.
+
+@cindex call by reference
+However, when arrays are the parameters to functions, they are @emph{not}
+copied. Instead, the array itself is made available for direct manipulation
+by the function. This is usually called @dfn{call by reference}.
+Changes made to an array parameter inside the body of a function @emph{are}
+visible outside that function.
+@ifinfo
+This can be @strong{very} dangerous if you do not watch what you are
+doing. For example:@refill
+@end ifinfo
+@iftex
+@emph{This can be very dangerous if you do not watch what you are
+doing.} For example:@refill
+@end iftex
+
+@example
+function changeit (array, ind, nvalue) @{
+ array[ind] = nvalue
+@}
+
+BEGIN @{
+ a[1] = 1 ; a[2] = 2 ; a[3] = 3
+ changeit(a, 2, "two")
+ printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3]
+ @}
+@end example
+
+@noindent
+prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because calling
+@code{changeit} stores @code{"two"} in the second element of @code{a}.
+
+@node Return Statement, , Function Caveats, User-defined
+@section The @code{return} Statement
+@cindex @code{return} statement
+
+The body of a user-defined function can contain a @code{return} statement.
+This statement returns control to the rest of the @code{awk} program. It
+can also be used to return a value for use in the rest of the @code{awk}
+program. It looks like this:@refill
+
+@example
+return @var{expression}
+@end example
+
+The @var{expression} part is optional. If it is omitted, then the returned
+value is undefined and, therefore, unpredictable.
+
+A @code{return} statement with no value expression is assumed at the end of
+every function definition. So if control reaches the end of the function
+body, then the function returns an unpredictable value. @code{awk}
+will not warn you if you use the return value of such a function; you will
+simply get unpredictable or unexpected results.
+
+Here is an example of a user-defined function that returns a value
+for the largest number among the elements of an array:@refill
+
+@example
+@group
+function maxelt (vec, i, ret) @{
+ for (i in vec) @{
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ @}
+ return ret
+@}
+@end group
+@end example
+
+@noindent
+You call @code{maxelt} with one argument, which is an array name. The local
+variables @code{i} and @code{ret} are not intended to be arguments;
+while there is nothing to stop you from passing two or three arguments
+to @code{maxelt}, the results would be strange. The extra space before
+@code{i} in the function parameter list is to indicate that @code{i} and
+@code{ret} are not supposed to be arguments. This is a convention which
+you should follow when you define functions.
+
+Here is a program that uses our @code{maxelt} function. It loads an
+array, calls @code{maxelt}, and then reports the maximum number in that
+array:@refill
+
+@example
+@group
+awk '
+function maxelt (vec, i, ret) @{
+ for (i in vec) @{
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ @}
+ return ret
+@}
+@end group
+
+@group
+# Load all fields of each record into nums.
+@{
+ for(i = 1; i <= NF; i++)
+ nums[NR, i] = $i
+@}
+
+END @{
+ print maxelt(nums)
+@}'
+@end group
+@end example
+
+Given the following input:
+
+@example
+@group
+ 1 5 23 8 16
+44 3 5 2 8 26
+256 291 1396 2962 100
+-6 467 998 1101
+99385 11 0 225
+@end group
+@end example
+
+@noindent
+our program tells us (predictably) that:
+
+@example
+99385
+@end example
+
+@noindent
+is the largest number in our array.
+
+@node Built-in Variables, Command Line, User-defined, Top
+@chapter Built-in Variables
+@cindex built-in variables
+
+Most @code{awk} variables are available for you to use for your own
+purposes; they never change except when your program assigns values to
+them, and never affect anything except when your program examines them.
+
+A few variables have special built-in meanings. Some of them @code{awk}
+examines automatically, so that they enable you to tell @code{awk} how
+to do certain things. Others are set automatically by @code{awk}, so
+that they carry information from the internal workings of @code{awk} to
+your program.
+
+This chapter documents all the built-in variables of @code{gawk}. Most
+of them are also documented in the chapters where their areas of
+activity are described.
+
+@menu
+* User-modified:: Built-in variables that you change
+ to control @code{awk}.
+* Auto-set:: Built-in variables where @code{awk}
+ gives you information.
+@end menu
+
+@node User-modified, Auto-set, Built-in Variables, Built-in Variables
+@section Built-in Variables that Control @code{awk}
+@cindex built-in variables, user modifiable
+
+This is a list of the variables which you can change to control how
+@code{awk} does certain things.
+
+@table @code
+@iftex
+@vindex CONVFMT
+@end iftex
+@item CONVFMT
+This string is used by @code{awk} to control conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+It works by being passed, in effect, as the first argument to the
+@code{sprintf} function. Its default value is @code{"%.6g"}.
+@code{CONVFMT} was introduced by the @sc{posix} standard.@refill
+
+@iftex
+@vindex FIELDWIDTHS
+@end iftex
+@item FIELDWIDTHS
+This is a space separated list of columns that tells @code{gawk}
+how to manage input with fixed, columnar boundaries. It is an
+experimental feature that is still evolving. Assigning to @code{FIELDWIDTHS}
+overrides the use of @code{FS} for field splitting.
+@xref{Constant Size, ,Reading Fixed-width Data}, for more information.@refill
+
+If @code{gawk} is in compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), then @code{FIELDWIDTHS}
+has no special meaning, and field splitting operations are done based
+exclusively on the value of @code{FS}.@refill
+
+@iftex
+@vindex FS
+@end iftex
+@item FS
+@code{FS} is the input field separator
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).
+The value is a single-character string or a multi-character regular
+expression that matches the separations between fields in an input
+record.@refill
+
+The default value is @w{@code{" "}}, a string consisting of a single
+space. As a special exception, this value actually means that any
+sequence of spaces and tabs is a single separator. It also causes
+spaces and tabs at the beginning or end of a line to be ignored.
+
+You can set the value of @code{FS} on the command line using the
+@samp{-F} option:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+If @code{gawk} is using @code{FIELDWIDTHS} for field-splitting,
+assigning a value to @code{FS} will cause @code{gawk} to return to
+the normal, regexp-based, field splitting.
+
+@item IGNORECASE
+@iftex
+@vindex IGNORECASE
+@end iftex
+If @code{IGNORECASE} is nonzero, then @emph{all} regular expression
+matching is done in a case-independent fashion. In particular, regexp
+matching with @samp{~} and @samp{!~}, and the @code{gsub} @code{index},
+@code{match}, @code{split} and @code{sub} functions all ignore case when
+doing their particular regexp operations. @strong{Note:} since field
+splitting with the value of the @code{FS} variable is also a regular
+expression operation, that too is done with case ignored.
+@xref{Case-sensitivity, ,Case-sensitivity in Matching}.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), then @code{IGNORECASE} has
+no special meaning, and regexp operations are always case-sensitive.@refill
+
+@item OFMT
+@iftex
+@vindex OFMT
+@end iftex
+This string is used by @code{awk} to control conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}) for
+printing with the @code{print} statement.
+It works by being passed, in effect, as the first argument to the
+@code{sprintf} function. Its default value is @code{"%.6g"}.
+Earlier versions of @code{awk} also used @code{OFMT} to specify the
+format for converting numbers to strings in general expressions; this
+has been taken over by @code{CONVFMT}.@refill
+
+@item OFS
+@iftex
+@vindex OFS
+@end iftex
+This is the output field separator (@pxref{Output Separators}). It is
+output between the fields output by a @code{print} statement. Its
+default value is @w{@code{" "}}, a string consisting of a single space.
+
+@item ORS
+@iftex
+@vindex ORS
+@end iftex
+This is the output record separator. It is output at the end of every
+@code{print} statement. Its default value is a string containing a
+single newline character, which could be written as @code{"\n"}.
+(@xref{Output Separators}.)@refill
+
+@item RS
+@iftex
+@vindex RS
+@end iftex
+This is @code{awk}'s input record separator. Its default value is a string
+containing a single newline character, which means that an input record
+consists of a single line of text.
+(@xref{Records, ,How Input is Split into Records}.)@refill
+
+@item SUBSEP
+@iftex
+@vindex SUBSEP
+@end iftex
+@code{SUBSEP} is the subscript separator. It has the default value of
+@code{"\034"}, and is used to separate the parts of the name of a
+multi-dimensional array. Thus, if you access @code{foo[12,3]}, it
+really accesses @code{foo["12\0343"]}
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).@refill
+@end table
+
+@node Auto-set, , User-modified, Built-in Variables
+@section Built-in Variables that Convey Information
+
+This is a list of the variables that are set automatically by @code{awk}
+on certain occasions so as to provide information to your program.
+
+@table @code
+@item ARGC
+@itemx ARGV
+@iftex
+@vindex ARGC
+@vindex ARGV
+@end iftex
+The command-line arguments available to @code{awk} programs are stored in
+an array called @code{ARGV}. @code{ARGC} is the number of command-line
+arguments present. @xref{Command Line, ,Invoking @code{awk}}.
+@code{ARGV} is indexed from zero to @w{@code{ARGC - 1}}. For example:@refill
+
+@example
+awk 'BEGIN @{
+ for (i = 0; i < ARGC; i++)
+ print ARGV[i]
+ @}' inventory-shipped BBS-list
+@end example
+
+@noindent
+In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
+contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
+@code{"BBS-list"}. The value of @code{ARGC} is 3, one more than the
+index of the last element in @code{ARGV} since the elements are numbered
+from zero.@refill
+
+The names @code{ARGC} and @code{ARGV}, as well the convention of indexing
+the array from 0 to @w{@code{ARGC - 1}}, are derived from the C language's
+method of accessing command line arguments.@refill
+
+Notice that the @code{awk} program is not entered in @code{ARGV}. The
+other special command line options, with their arguments, are also not
+entered. But variable assignments on the command line @emph{are}
+treated as arguments, and do show up in the @code{ARGV} array.
+
+Your program can alter @code{ARGC} and the elements of @code{ARGV}.
+Each time @code{awk} reaches the end of an input file, it uses the next
+element of @code{ARGV} as the name of the next input file. By storing a
+different string there, your program can change which files are read.
+You can use @code{"-"} to represent the standard input. By storing
+additional elements and incrementing @code{ARGC} you can cause
+additional files to be read.
+
+If you decrease the value of @code{ARGC}, that eliminates input files
+from the end of the list. By recording the old value of @code{ARGC}
+elsewhere, your program can treat the eliminated arguments as
+something other than file names.
+
+To eliminate a file from the middle of the list, store the null string
+(@code{""}) into @code{ARGV} in place of the file's name. As a
+special feature, @code{awk} ignores file names that have been
+replaced with the null string.
+
+@ignore
+see getopt.awk in the examples...
+@end ignore
+
+@item ARGIND
+@vindex ARGIND
+The index in @code{ARGV} of the current file being processed.
+Every time @code{gawk} opens a new data file for processing, it sets
+@code{ARGIND} to the index in @code{ARGV} of the file name. Thus, the
+condition @samp{FILENAME == ARGV[ARGIND]} is always true.
+
+This variable is useful in file processing; it allows you to tell how far
+along you are in the list of data files, and to distinguish between
+multiple successive instances of the same filename on the command line.
+
+While you can change the value of @code{ARGIND} within your @code{awk}
+program, @code{gawk} will automatically set it to a new value when the
+next file is opened.
+
+This variable is a @code{gawk} extension; in other @code{awk} implementations
+it is not special.
+
+@item ENVIRON
+@vindex ENVIRON
+This is an array that contains the values of the environment. The array
+indices are the environment variable names; the values are the values of
+the particular environment variables. For example,
+@code{ENVIRON["HOME"]} might be @file{/u/close}. Changing this array
+does not affect the environment passed on to any programs that
+@code{awk} may spawn via redirection or the @code{system} function.
+(In a future version of @code{gawk}, it may do so.)
+
+Some operating systems may not have environment variables.
+On such systems, the array @code{ENVIRON} is empty.
+
+@item ERRNO
+@iftex
+@vindex ERRNO
+@end iftex
+If a system error occurs either doing a redirection for @code{getline},
+during a read for @code{getline}, or during a @code{close} operation,
+then @code{ERRNO} will contain a string describing the error.
+
+This variable is a @code{gawk} extension; in other @code{awk} implementations
+it is not special.
+
+@item FILENAME
+@iftex
+@vindex FILENAME
+@end iftex
+This is the name of the file that @code{awk} is currently reading.
+If @code{awk} is reading from the standard input (in other words,
+there are no files listed on the command line),
+@code{FILENAME} is set to @code{"-"}.
+@code{FILENAME} is changed each time a new file is read
+(@pxref{Reading Files, ,Reading Input Files}).@refill
+
+@item FNR
+@iftex
+@vindex FNR
+@end iftex
+@code{FNR} is the current record number in the current file. @code{FNR} is
+incremented each time a new record is read
+(@pxref{Getline, ,Explicit Input with @code{getline}}). It is reinitialized
+to 0 each time a new input file is started.@refill
+
+@item NF
+@iftex
+@vindex NF
+@end iftex
+@code{NF} is the number of fields in the current input record.
+@code{NF} is set each time a new record is read, when a new field is
+created, or when @code{$0} changes (@pxref{Fields, ,Examining Fields}).@refill
+
+@item NR
+@iftex
+@vindex NR
+@end iftex
+This is the number of input records @code{awk} has processed since
+the beginning of the program's execution.
+(@pxref{Records, ,How Input is Split into Records}).
+@code{NR} is set each time a new record is read.@refill
+
+@item RLENGTH
+@iftex
+@vindex RLENGTH
+@end iftex
+@code{RLENGTH} is the length of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RLENGTH} is set by invoking the @code{match} function. Its value
+is the length of the matched string, or @minus{}1 if no match was found.@refill
+
+@item RSTART
+@iftex
+@vindex RSTART
+@end iftex
+@code{RSTART} is the start-index in characters of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RSTART} is set by invoking the @code{match} function. Its value
+is the position of the string where the matched substring starts, or 0
+if no match was found.@refill
+@end table
+
+@node Command Line, Language History, Built-in Variables, Top
+@c node-name, next, previous, up
+@chapter Invoking @code{awk}
+@cindex command line
+@cindex invocation of @code{gawk}
+@cindex arguments, command line
+@cindex options, command line
+@cindex long options
+@cindex options, long
+
+There are two ways to run @code{awk}: with an explicit program, or with
+one or more program files. Here are templates for both of them; items
+enclosed in @samp{@r{[}@dots{}@r{]}} in these templates are optional.
+
+Besides traditional one-letter @sc{posix}-style options, @code{gawk} also
+supports GNU long named options.
+
+@example
+awk @r{[@var{POSIX or GNU style options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
+awk @r{[@var{POSIX or GNU style options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+@menu
+* Options:: Command line options and their meanings.
+* Other Arguments:: Input file names and variable assignments.
+* AWKPATH Variable:: Searching directories for @code{awk} programs.
+* Obsolete:: Obsolete Options and/or features.
+* Undocumented:: Undocumented Options and Features.
+@end menu
+
+@node Options, Other Arguments, Command Line, Command Line
+@section Command Line Options
+
+Options begin with a minus sign, and consist of a single character.
+GNU style long named options consist of two minus signs and
+a keyword that can be abbreviated if the abbreviation allows the option
+to be uniquely identified. If the option takes an argument, then the
+keyword is immediately followed by an equals sign (@samp{=}) and the
+argument's value. For brevity, the discussion below only refers to the
+traditional short options; however the long and short options are
+interchangeable in all contexts.
+
+Each long named option for @code{gawk} has a corresponding
+@sc{posix}-style option. The options and their meanings are as follows:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator=@var{fs}
+@iftex
+@cindex @code{-F} option
+@end iftex
+@cindex @code{--field-separator} option
+Sets the @code{FS} variable to @var{fs}
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
+
+@item -f @var{source-file}
+@itemx --file=@var{source-file}
+@iftex
+@cindex @code{-f} option
+@end iftex
+@cindex @code{--file} option
+Indicates that the @code{awk} program is to be found in @var{source-file}
+instead of in the first non-option argument.
+
+@item -v @var{var}=@var{val}
+@itemx --assign=@var{var}=@var{val}
+@cindex @samp{-v} option
+@cindex @code{--assign} option
+Sets the variable @var{var} to the value @var{val} @emph{before}
+execution of the program begins. Such variable values are available
+inside the @code{BEGIN} rule (see below for a fuller explanation).
+
+The @samp{-v} option can only set one variable, but you can use
+it more than once, setting another variable each time, like this:
+@samp{@w{-v foo=1} @w{-v bar=2}}.
+
+@item -W @var{gawk-opt}
+@cindex @samp{-W} option
+Following the @sc{posix} standard, options that are implementation
+specific are supplied as arguments to the @samp{-W} option. With @code{gawk},
+these arguments may be separated by commas, or quoted and separated by
+whitespace. Case is ignored when processing these options. These options
+also have corresponding GNU style long named options. The following
+@code{gawk}-specific options are available:
+
+@table @code
+@item -W compat
+@itemx --compat
+@cindex @code{--compat} option
+Specifies @dfn{compatibility mode}, in which the GNU extensions in
+@code{gawk} are disabled, so that @code{gawk} behaves just like Unix
+@code{awk}.
+@xref{POSIX/GNU, ,Extensions in @code{gawk} not in POSIX @code{awk}},
+which summarizes the extensions. Also see
+@ref{Compatibility Mode, ,Downward Compatibility and Debugging}.@refill
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+@cindex @code{--copyleft} option
+@cindex @code{--copyright} option
+Print the short version of the General Public License.
+This option may disappear in a future version of @code{gawk}.
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+@cindex @code{--help} option
+@cindex @code{--usage} option
+Print a ``usage'' message summarizing the short and long style options
+that @code{gawk} accepts, and then exit.
+
+@item -W lint
+@itemx --lint
+@cindex @code{--lint} option
+Provide warnings about constructs that are dubious or non-portable to
+other @code{awk} implementations.
+Some warnings are issued when @code{gawk} first reads your program. Others
+are issued at run-time, as your program executes.
+
+@item -W posix
+@itemx --posix
+@cindex @code{--posix} option
+Operate in strict @sc{posix} mode. This disables all @code{gawk}
+extensions (just like @code{-W compat}), and adds the following additional
+restrictions:
+
+@itemize @bullet{}
+@item
+@code{\x} escape sequences are not recognized
+(@pxref{Constants, ,Constant Expressions}).@refill
+
+@item
+The synonym @code{func} for the keyword @code{function} is not
+recognized (@pxref{Definition Syntax, ,Syntax of Function Definitions}).
+
+@item
+The operators @samp{**} and @samp{**=} cannot be used in
+place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
+and also @pxref{Assignment Ops, ,Assignment Expressions}).@refill
+
+@item
+Specifying @samp{-Ft} on the command line does not set the value
+of @code{FS} to be a single tab character
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
+@end itemize
+
+Although you can supply both @samp{-W compat} and @samp{-W posix} on the
+command line, @samp{-W posix} will take precedence.
+
+@item -W source=@var{program-text}
+@itemx --source=@var{program-text}
+@cindex @code{--source} option
+Program source code is taken from the @var{program-text}. This option
+allows you to mix @code{awk} source code in files with program source
+code that you would enter on the command line. This is particularly useful
+when you have library functions that you wish to use from your command line
+programs (@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+@item -W version
+@itemx --version
+@cindex @code{--version} option
+Prints version information for this particular copy of @code{gawk}.
+This is so you can determine if your copy of @code{gawk} is up to date
+with respect to whatever the Free Software Foundation is currently
+distributing. This option may disappear in a future version of @code{gawk}.
+@end table
+
+@item --
+Signals the end of the command line options. The following arguments
+are not treated as options even if they begin with @samp{-}. This
+interpretation of @samp{--} follows the @sc{posix} argument parsing
+conventions.
+
+This is useful if you have file names that start with @samp{-},
+or in shell scripts, if you have file names that will be specified
+by the user which could start with @samp{-}.
+@end table
+
+Any other options are flagged as invalid with a warning message, but
+are otherwise ignored.
+
+In compatibility mode, as a special case, if the value of @var{fs} supplied
+to the @samp{-F} option is @samp{t}, then @code{FS} is set to the tab
+character (@code{"\t"}). This is only true for @samp{-W compat}, and not
+for @samp{-W posix}
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
+
+If the @samp{-f} option is @emph{not} used, then the first non-option
+command line argument is expected to be the program text.
+
+The @samp{-f} option may be used more than once on the command line.
+If it is, @code{awk} reads its program source from all of the named files, as
+if they had been concatenated together into one big file. This is
+useful for creating libraries of @code{awk} functions. Useful functions
+can be written once, and then retrieved from a standard place, instead
+of having to be included into each individual program. You can still
+type in a program at the terminal and use library functions, by specifying
+@samp{-f /dev/tty}. @code{awk} will read a file from the terminal
+to use as part of the @code{awk} program. After typing your program,
+type @kbd{Control-d} (the end-of-file character) to terminate it.
+(You may also use @samp{-f -} to read program source from the standard
+input, but then you will not be able to also use the standard input as a
+source of data.)
+
+Because it is clumsy using the standard @code{awk} mechanisms to mix source
+file and command line @code{awk} programs, @code{gawk} provides the
+@samp{--source} option. This does not require you to pre-empt the standard
+input for your source code, and allows you to easily mix command line
+and library source code
+(@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+If no @samp{-f} or @samp{--source} option is specified, then @code{gawk}
+will use the first non-option command line argument as the text of the
+program source code.
+
+@node Other Arguments, AWKPATH Variable, Options, Command Line
+@section Other Command Line Arguments
+
+Any additional arguments on the command line are normally treated as
+input files to be processed in the order specified. However, an
+argument that has the form @code{@var{var}=@var{value}}, means to assign
+the value @var{value} to the variable @var{var}---it does not specify a
+file at all.
+
+@vindex ARGV
+All these arguments are made available to your @code{awk} program in the
+@code{ARGV} array (@pxref{Built-in Variables}). Command line options
+and the program text (if present) are omitted from the @code{ARGV}
+array. All other arguments, including variable assignments, are
+included.
+
+The distinction between file name arguments and variable-assignment
+arguments is made when @code{awk} is about to open the next input file.
+At that point in execution, it checks the ``file name'' to see whether
+it is really a variable assignment; if so, @code{awk} sets the variable
+instead of reading a file.
+
+Therefore, the variables actually receive the specified values after all
+previously specified files have been read. In particular, the values of
+variables assigned in this fashion are @emph{not} available inside a
+@code{BEGIN} rule
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}),
+since such rules are run before @code{awk} begins scanning the argument list.
+The values given on the command line are processed for escape sequences
+(@pxref{Constants, ,Constant Expressions}).@refill
+
+In some earlier implementations of @code{awk}, when a variable assignment
+occurred before any file names, the assignment would happen @emph{before}
+the @code{BEGIN} rule was executed. Some applications came to depend
+upon this ``feature.'' When @code{awk} was changed to be more consistent,
+the @samp{-v} option was added to accommodate applications that depended
+upon this old behavior.
+
+The variable assignment feature is most useful for assigning to variables
+such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
+output formats, before scanning the data files. It is also useful for
+controlling state if multiple passes are needed over a data file. For
+example:@refill
+
+@cindex multiple passes over data
+@cindex passes, multiple
+@smallexample
+awk 'pass == 1 @{ @var{pass 1 stuff} @}
+ pass == 2 @{ @var{pass 2 stuff} @}' pass=1 datafile pass=2 datafile
+@end smallexample
+
+Given the variable assignment feature, the @samp{-F} option is not
+strictly necessary. It remains for historical compatibility.
+
+@node AWKPATH Variable, Obsolete, Other Arguments, Command Line
+@section The @code{AWKPATH} Environment Variable
+@cindex @code{AWKPATH} environment variable
+@cindex search path
+@cindex directory search
+@cindex path, search
+@iftex
+@cindex differences between @code{gawk} and @code{awk}
+@end iftex
+
+The previous section described how @code{awk} program files can be named
+on the command line with the @samp{-f} option. In some @code{awk}
+implementations, you must supply a precise path name for each program
+file, unless the file is in the current directory.
+
+But in @code{gawk}, if the file name supplied in the @samp{-f} option
+does not contain a @samp{/}, then @code{gawk} searches a list of
+directories (called the @dfn{search path}), one by one, looking for a
+file with the specified name.
+
+The search path is actually a string consisting of directory names
+separated by colons. @code{gawk} gets its search path from the
+@code{AWKPATH} environment variable. If that variable does not exist,
+@code{gawk} uses the default path, which is
+@samp{.:/usr/lib/awk:/usr/local/lib/awk}. (Programs written by
+system administrators should use an @code{AWKPATH} variable that
+does not include the current directory, @samp{.}.)@refill
+
+The search path feature is particularly useful for building up libraries
+of useful @code{awk} functions. The library files can be placed in a
+standard directory that is in the default path, and then specified on
+the command line with a short file name. Otherwise, the full file name
+would have to be typed for each file.
+
+By combining the @samp{--source} and @samp{-f} options, your command line
+@code{awk} programs can use facilities in @code{awk} library files.
+
+Path searching is not done if @code{gawk} is in compatibility mode.
+This is true for both @samp{-W compat} and @samp{-W posix}.
+@xref{Options, ,Command Line Options}.
+
+@strong{Note:} if you want files in the current directory to be found,
+you must include the current directory in the path, either by writing
+@file{.} as an entry in the path, or by writing a null entry in the
+path. (A null entry is indicated by starting or ending the path with a
+colon, or by placing two colons next to each other (@samp{::}).) If the
+current directory is not included in the path, then files cannot be
+found in the current directory. This path search mechanism is identical
+to the shell's.
+@c someday, @cite{The Bourne Again Shell}....
+
+@node Obsolete, Undocumented, AWKPATH Variable, Command Line
+@section Obsolete Options and/or Features
+
+@cindex deprecated options
+@cindex obsolete options
+@cindex deprecated features
+@cindex obsolete features
+This section describes features and/or command line options from the
+previous release of @code{gawk} that are either not available in the
+current version, or that are still supported but deprecated (meaning that
+they will @emph{not} be in the next release).
+
+@c update this section for each release!
+
+For version 2.15 of @code{gawk}, the following command line options
+from version 2.11.1 are no longer recognized.
+
+@table @samp
+@ignore
+@item -nostalgia
+Use @samp{-W nostalgia} instead.
+@end ignore
+
+@item -c
+Use @samp{-W compat} instead.
+
+@item -V
+Use @samp{-W version} instead.
+
+@item -C
+Use @samp{-W copyright} instead.
+
+@item -a
+@itemx -e
+These options produce an ``unrecognized option'' error message but have
+no effect on the execution of @code{gawk}. The @sc{posix} standard now
+specifies traditional @code{awk} regular expressions for the @code{awk} utility.
+@end table
+
+The public-domain version of @code{strftime} that is distributed with
+@code{gawk} changed for the 2.14 release. The @samp{%V} conversion specifier
+that used to generate the date in VMS format was changed to @samp{%v}.
+This is because the @sc{posix} standard for the @code{date} utility now
+specifies a @samp{%V} conversion specifier.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for details.
+
+@node Undocumented, , Obsolete, Command Line
+@section Undocumented Options and Features
+
+This section intentionally left blank.
+
+@c Read The Source, Luke!
+
+@ignore
+@c If these came out in the Info file or TeX manual, then they wouldn't
+@c be undocumented, would they?
+
+@code{gawk} has one undocumented option:
+
+@table @samp
+@item -W nostalgia
+Print the message @code{"awk: bailing out near line 1"} and dump core.
+This option was inspired by the common behavior of very early versions of
+Unix @code{awk}, and by a t--shirt.
+@end table
+
+Early versions of @code{awk} used to not require any separator (either
+a newline or @samp{;}) between the rules in @code{awk} programs. Thus,
+it was common to see one-line programs like:
+
+@example
+awk '@{ sum += $1 @} END @{ print sum @}'
+@end example
+
+@code{gawk} actually supports this, but it is purposely undocumented
+since it is considered bad style. The correct way to write such a program
+is either
+
+@example
+awk '@{ sum += $1 @} ; END @{ print sum @}'
+@end example
+
+@noindent
+or
+
+@example
+awk '@{ sum += $1 @}
+ END @{ print sum @}' data
+@end example
+
+@noindent
+@xref{Statements/Lines, ,@code{awk} Statements versus Lines}, for a fuller
+explanation.@refill
+
+As an accident of the implementation of the original Unix @code{awk}, if
+a built-in function used @code{$0} as its default argument, it was possible
+to call that function without the parentheses. In particular, it was
+common practice to use the @code{length} function in this fashion.
+For example, the pipeline:
+
+@example
+echo abcdef | awk '@{ print length @}'
+@end example
+
+@noindent
+would print @samp{6}.
+
+For backwards compatibility with old programs, @code{gawk} supports
+this usage, but only for the @code{length} function. New programs should
+@emph{not} call the @code{length} function this way. In particular,
+this usage will not be portable to other @sc{posix} compliant versions
+of @code{awk}. It is also poor style.
+
+@end ignore
+
+@node Language History, Installation, Command Line, Top
+@chapter The Evolution of the @code{awk} Language
+
+This manual describes the GNU implementation of @code{awk}, which is patterned
+after the @sc{posix} specification. Many @code{awk} users are only familiar
+with the original @code{awk} implementation in Version 7 Unix, which is also
+the basis for the version in Berkeley Unix (through 4.3--Reno). This chapter
+briefly describes the evolution of the @code{awk} language.
+
+@menu
+* V7/S5R3.1:: The major changes between V7 and
+ System V Release 3.1.
+* S5R4:: Minor changes between System V
+ Releases 3.1 and 4.
+* POSIX:: New features from the @sc{posix} standard.
+* POSIX/GNU:: The extensions in @code{gawk}
+ not in @sc{posix} @code{awk}.
+@end menu
+
+@node V7/S5R3.1, S5R4, Language History, Language History
+@section Major Changes between V7 and S5R3.1
+
+The @code{awk} language evolved considerably between the release of
+Version 7 Unix (1978) and the new version first made widely available in
+System V Release 3.1 (1987). This section summarizes the changes, with
+cross-references to further details.
+
+@itemize @bullet
+@item
+The requirement for @samp{;} to separate rules on a line
+(@pxref{Statements/Lines, ,@code{awk} Statements versus Lines}).
+
+@item
+User-defined functions, and the @code{return} statement
+(@pxref{User-defined, ,User-defined Functions}).
+
+@item
+The @code{delete} statement (@pxref{Delete, ,The @code{delete} Statement}).
+
+@item
+The @code{do}-@code{while} statement
+(@pxref{Do Statement, ,The @code{do}-@code{while} Statement}).@refill
+
+@item
+The built-in functions @code{atan2}, @code{cos}, @code{sin}, @code{rand} and
+@code{srand} (@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The built-in functions @code{gsub}, @code{sub}, and @code{match}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+The built-in functions @code{close}, which closes an open file, and
+@code{system}, which allows the user to execute operating system
+commands (@pxref{I/O Functions, ,Built-in Functions for Input/Output}).@refill
+@c Does the above verbiage prevents an overfull hbox? --mew, rjc 24jan1992
+
+@item
+The @code{ARGC}, @code{ARGV}, @code{FNR}, @code{RLENGTH}, @code{RSTART},
+and @code{SUBSEP} built-in variables (@pxref{Built-in Variables}).
+
+@item
+The conditional expression using the operators @samp{?} and @samp{:}
+(@pxref{Conditional Exp, ,Conditional Expressions}).@refill
+
+@item
+The exponentiation operator @samp{^}
+(@pxref{Arithmetic Ops, ,Arithmetic Operators}) and its assignment operator
+form @samp{^=} (@pxref{Assignment Ops, ,Assignment Expressions}).@refill
+
+@item
+C-compatible operator precedence, which breaks some old @code{awk}
+programs (@pxref{Precedence, ,Operator Precedence (How Operators Nest)}).
+
+@item
+Regexps as the value of @code{FS}
+(@pxref{Field Separators, ,Specifying how Fields are Separated}), and as the
+third argument to the @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
+
+@item
+Dynamic regexps as operands of the @samp{~} and @samp{!~} operators
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).
+
+@item
+Escape sequences (@pxref{Constants, ,Constant Expressions}) in regexps.@refill
+
+@item
+The escape sequences @samp{\b}, @samp{\f}, and @samp{\r}
+(@pxref{Constants, ,Constant Expressions}).
+
+@item
+Redirection of input for the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
+
+@item
+Multiple @code{BEGIN} and @code{END} rules
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).@refill
+
+@item
+Simulated multi-dimensional arrays
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).@refill
+@end itemize
+
+@node S5R4, POSIX, V7/S5R3.1, Language History
+@section Changes between S5R3.1 and S5R4
+
+The System V Release 4 version of Unix @code{awk} added these features
+(some of which originated in @code{gawk}):
+
+@itemize @bullet
+@item
+The @code{ENVIRON} variable (@pxref{Built-in Variables}).
+
+@item
+Multiple @samp{-f} options on the command line
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@item
+The @samp{-v} option for assigning variables before program execution begins
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@item
+The @samp{--} option for terminating command line options.
+
+@item
+The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences
+(@pxref{Constants, ,Constant Expressions}).@refill
+
+@item
+A defined return value for the @code{srand} built-in function
+(@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The @code{toupper} and @code{tolower} built-in string functions
+for case translation
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
+
+@item
+A cleaner specification for the @samp{%c} format-control letter in the
+@code{printf} function
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
+
+@item
+The ability to dynamically pass the field width and precision (@code{"%*.*d"})
+in the argument list of the @code{printf} function
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
+
+@item
+The use of constant regexps such as @code{/foo/} as expressions, where
+they are equivalent to use of the matching operator, as in @code{$0 ~
+/foo/} (@pxref{Constants, ,Constant Expressions}).
+@end itemize
+
+@node POSIX, POSIX/GNU, S5R4, Language History
+@section Changes between S5R4 and POSIX @code{awk}
+
+The @sc{posix} Command Language and Utilities standard for @code{awk}
+introduced the following changes into the language:
+
+@itemize @bullet{}
+@item
+The use of @samp{-W} for implementation-specific options.
+
+@item
+The use of @code{CONVFMT} for controlling the conversion of numbers
+to strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@item
+The concept of a numeric string, and tighter comparison rules to go
+with it (@pxref{Comparison Ops, ,Comparison Expressions}).
+
+@item
+More complete documentation of many of the previously undocumented
+features of the language.
+@end itemize
+
+@node POSIX/GNU, , POSIX, Language History
+@section Extensions in @code{gawk} not in POSIX @code{awk}
+
+The GNU implementation, @code{gawk}, adds these features:
+
+@itemize @bullet
+@item
+The @code{AWKPATH} environment variable for specifying a path search for
+the @samp{-f} command line option
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@item
+The various @code{gawk} specific features available via the @samp{-W}
+command line option (@pxref{Command Line, ,Invoking @code{awk}}).
+
+@item
+The @code{ARGIND} variable, that tracks the movement of @code{FILENAME}
+through @code{ARGV}. (@pxref{Built-in Variables}).
+
+@item
+The @code{ERRNO} variable, that contains the system error message when
+@code{getline} returns @minus{}1, or when @code{close} fails.
+(@pxref{Built-in Variables}).
+
+@item
+The @code{IGNORECASE} variable and its effects
+(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).@refill
+
+@item
+The @code{FIELDWIDTHS} variable and its effects
+(@pxref{Constant Size, ,Reading Fixed-width Data}).@refill
+
+@item
+The @code{next file} statement for skipping to the next data file
+(@pxref{Next File Statement, ,The @code{next file} Statement}).@refill
+
+@item
+The @code{systime} and @code{strftime} built-in functions for obtaining
+and printing time stamps
+(@pxref{Time Functions, ,Functions for Dealing with Time Stamps}).@refill
+
+@item
+The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr}, and
+@file{/dev/fd/@var{n}} file name interpretation
+(@pxref{Special Files, ,Standard I/O Streams}).@refill
+
+@item
+The @samp{-W compat} option to turn off these extensions
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@item
+The @samp{-W posix} option for full @sc{posix} compliance
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@end itemize
+
+@node Installation, Gawk Summary, Language History, Top
+@chapter Installing @code{gawk}
+
+This chapter provides instructions for installing @code{gawk} on the
+various platforms that are supported by the developers. The primary
+developers support Unix (and one day, GNU), while the other ports were
+contributed. The file @file{ACKNOWLEDGMENT} in the @code{gawk}
+distribution lists the electronic mail addresses of the people who did
+the respective ports.@refill
+
+@menu
+* Gawk Distribution:: What is in the @code{gawk} distribution.
+* Unix Installation:: Installing @code{gawk} under various versions
+ of Unix.
+* VMS Installation:: Installing @code{gawk} on VMS.
+* MS-DOS Installation:: Installing @code{gawk} on MS-DOS.
+* Atari Installation:: Installing @code{gawk} on the Atari ST.
+@end menu
+
+@node Gawk Distribution, Unix Installation, Installation, Installation
+@section The @code{gawk} Distribution
+
+This section first describes how to get and extract the @code{gawk}
+distribution, and then discusses what is in the various files and
+subdirectories.
+
+@menu
+* Extracting:: How to get and extract the distribution.
+* Distribution contents:: What is in the distribution.
+@end menu
+
+@node Extracting, Distribution contents, Gawk Distribution, Gawk Distribution
+@subsection Getting the @code{gawk} Distribution
+
+@cindex getting gawk
+@cindex anonymous ftp
+@cindex anonymous uucp
+@cindex ftp, anonymous
+@cindex uucp, anonymous
+@code{gawk} is distributed as a @code{tar} file compressed with the
+GNU Zip program, @code{gzip}. You can
+get it via anonymous @code{ftp} to the Internet host @code{prep.ai.mit.edu}.
+Like all GNU software, it will be archived at other well known systems,
+from which it will be possible to use some sort of anonymous @code{uucp} to
+obtain the distribution as well.
+You can also order @code{gawk} on tape or CD-ROM directly from the
+Free Software Foundation. (The address is on the copyright page.)
+Doing so directly contributes to the support of the foundation and to
+the production of more free software.
+
+Once you have the distribution (for example,
+@file{gawk-2.15.0.tar.z}), first use @code{gzip} to expand the
+file, and then use @code{tar} to extract it. You can use the following
+pipeline to produce the @code{gawk} distribution:
+
+@example
+# Under System V, add 'o' to the tar flags
+gzip -d -c gawk-2.15.0.tar.z | tar -xvpf -
+@end example
+
+@noindent
+This will create a directory named @file{gawk-2.15} in the current
+directory.
+
+The distribution file name is of the form @file{gawk-2.15.@var{n}.tar.Z}.
+The @var{n} represents a @dfn{patchlevel}, meaning that minor bugs have
+been fixed in the major release. The current patchlevel is 0, but when
+retrieving distributions, you should get the version with the highest
+patchlevel.@refill
+
+If you are not on a Unix system, you will need to make other arrangements
+for getting and extracting the @code{gawk} distribution. You should consult
+a local expert.
+
+@node Distribution contents, , Extracting, Gawk Distribution
+@subsection Contents of the @code{gawk} Distribution
+
+@code{gawk} has a number of C source files, documentation files,
+subdirectories and files related to the configuration process
+(@pxref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}),
+and several subdirectories related to different, non-Unix,
+operating systems.@refill
+
+@table @asis
+@item various @samp{.c}, @samp{.y}, and @samp{.h} files
+
+The C and YACC source files are the actual @code{gawk} source code.
+@end table
+
+@table @file
+@item README
+@itemx README.VMS
+@itemx README.dos
+@itemx README.rs6000
+@itemx README.ultrix
+Descriptive files: @file{README} for @code{gawk} under Unix, and the
+rest for the various hardware and software combinations.
+
+@item PORTS
+A list of systems to which @code{gawk} has been ported, and which
+have successfully run the test suite.
+
+@item ACKNOWLEDGMENT
+A list of the people who contributed major parts of the code or documentation.
+
+@item NEWS
+A list of changes to @code{gawk} since the last release or patch.
+
+@item COPYING
+The GNU General Public License.
+
+@item FUTURES
+A brief list of features and/or changes being contemplated for future
+releases, with some indication of the time frame for the feature, based
+on its difficulty.
+
+@item LIMITATIONS
+A list of those factors that limit @code{gawk}'s performance.
+Most of these depend on the hardware or operating system software, and
+are not limits in @code{gawk} itself.@refill
+
+@item PROBLEMS
+A file describing known problems with the current release.
+
+@item gawk.1
+The @code{troff} source for a manual page describing @code{gawk}.
+
+@item gawk.texinfo
+@ifinfo
+The @code{texinfo} source file for this Info file.
+It should be processed with @TeX{} to produce a printed manual, and
+with @code{makeinfo} to produce the Info file.@refill
+@end ifinfo
+@iftex
+The @code{texinfo} source file for this manual.
+It should be processed with @TeX{} to produce a printed manual, and
+with @code{makeinfo} to produce the Info file.@refill
+@end iftex
+
+@item Makefile.in
+@itemx config
+@itemx config.in
+@itemx configure
+@itemx missing
+@itemx mungeconf
+These files and subdirectories are used when configuring @code{gawk}
+for various Unix systems. They are explained in detail in
+@ref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}.@refill
+
+@item atari
+Files needed for building @code{gawk} on an Atari ST.
+@xref{Atari Installation, ,Installing @code{gawk} on the Atari ST}, for details.
+
+@item pc
+Files needed for building @code{gawk} under MS-DOS.
+@xref{MS-DOS Installation, ,Installing @code{gawk} on MS-DOS}, for details.
+
+@item vms
+Files needed for building @code{gawk} under VMS.
+@xref{VMS Installation, ,Compiling Installing and Running @code{gawk} on VMS}, for details.
+
+@item test
+Many interesting @code{awk} programs, provided as a test suite for
+@code{gawk}. You can use @samp{make test} from the top level @code{gawk}
+directory to run your version of @code{gawk} against the test suite.
+@c There are many programs here that are useful in their own right.
+If @code{gawk} successfully passes @samp{make test} then you can
+be confident of a successful port.@refill
+@end table
+
+@node Unix Installation, VMS Installation, Gawk Distribution, Installation
+@section Compiling and Installing @code{gawk} on Unix
+
+Often, you can compile and install @code{gawk} by typing only two
+commands. However, if you do not use a supported system, you may need
+to configure @code{gawk} for your system yourself.
+
+@menu
+* Quick Installation:: Compiling @code{gawk} on a
+ supported Unix version.
+* Configuration Philosophy:: How it's all supposed to work.
+* New Configurations:: What to do if there is no supplied
+ configuration for your system.
+@end menu
+
+@node Quick Installation, Configuration Philosophy, Unix Installation, Unix Installation
+@subsection Compiling @code{gawk} for a Supported Unix Version
+
+@cindex installation, unix
+After you have extracted the @code{gawk} distribution, @code{cd}
+to @file{gawk-2.15}. Look in the @file{config} subdirectory for a
+file that matches your hardware/software combination. In general,
+only the software is relevant; for example @code{sunos41} is used
+for SunOS 4.1, on both Sun 3 and Sun 4 hardware.@refill
+
+If you find such a file, run the command:
+
+@example
+# assume you have SunOS 4.1
+./configure sunos41
+@end example
+
+This produces a @file{Makefile} and @file{config.h} tailored to your
+system. You may wish to edit the @file{Makefile} to use a different
+C compiler, such as @code{gcc}, the GNU C compiler, if you have it.
+You may also wish to change the @code{CFLAGS} variable, which controls
+the command line options that are passed to the C compiler (such as
+optimization levels, or compiling for debugging).@refill
+
+After you have configured @file{Makefile} and @file{config.h}, type:
+
+@example
+make
+@end example
+
+@noindent
+and shortly thereafter, you should have an executable version of @code{gawk}.
+That's all there is to it!
+
+@node Configuration Philosophy, New Configurations, Quick Installation, Unix Installation
+@subsection The Configuration Process
+
+(This section is of interest only if you know something about using the
+C language and the Unix operating system.)
+
+The source code for @code{gawk} generally attempts to adhere to industry
+standards wherever possible. This means that @code{gawk} uses library
+routines that are specified by the @sc{ansi} C standard and by the @sc{posix}
+operating system interface standard. When using an @sc{ansi} C compiler,
+function prototypes are provided to help improve the compile-time checking.
+
+Many older Unix systems do not support all of either the @sc{ansi} or the
+@sc{posix} standards. The @file{missing} subdirectory in the @code{gawk}
+distribution contains replacement versions of those subroutines that are
+most likely to be missing.
+
+The @file{config.h} file that is created by the @code{configure} program
+contains definitions that describe features of the particular operating
+system where you are attempting to compile @code{gawk}. For the most
+part, it lists which standard subroutines are @emph{not} available.
+For example, if your system lacks the @samp{getopt} routine, then
+@samp{GETOPT_MISSING} would be defined.
+
+@file{config.h} also defines constants that describe facts about your
+variant of Unix. For example, there may not be an @samp{st_blksize}
+element in the @code{stat} structure. In this case @samp{BLKSIZE_MISSING}
+would be defined.
+
+Based on the list in @file{config.h} of standard subroutines that are
+missing, @file{missing.c} will do a @samp{#include} of the appropriate
+file(s) from the @file{missing} subdirectory.@refill
+
+Conditionally compiled code in the other source files relies on the
+other definitions in the @file{config.h} file.
+
+Besides creating @file{config.h}, @code{configure} produces a @file{Makefile}
+from @file{Makefile.in}. There are a number of lines in @file{Makefile.in}
+that are system or feature specific. For example, there is line that begins
+with @samp{##MAKE_ALLOCA_C##}. This is normally a comment line, since
+it starts with @samp{#}. If a configuration file has @samp{MAKE_ALLOCA_C}
+in it, then @code{configure} will delete the @samp{##MAKE_ALLOCA_C##}
+from the beginning of the line. This will enable the rules in the
+@file{Makefile} that use a C version of @samp{alloca}. There are several
+similar features that work in this fashion.@refill
+
+@node New Configurations, , Configuration Philosophy, Unix Installation
+@subsection Configuring @code{gawk} for a New System
+
+(This section is of interest only if you know something about using the
+C language and the Unix operating system, and if you have to install
+@code{gawk} on a system that is not supported by the @code{gawk} distribution.
+If you are a C or Unix novice, get help from a local expert.)
+
+If you need to configure @code{gawk} for a Unix system that is not
+supported in the distribution, first see
+@ref{Configuration Philosophy, ,The Configuration Process}.
+Then, copy @file{config.in} to @file{config.h}, and copy
+@file{Makefile.in} to @file{Makefile}.@refill
+
+Next, edit both files. Both files are liberally commented, and the
+necessary changes should be straightforward.
+
+While editing @file{config.h}, you need to determine what library
+routines you do or do not have by consulting your system documentation, or
+by perusing your actual libraries using the @code{ar} or @code{nm} utilities.
+In the worst case, simply do not define @emph{any} of the macros for missing
+subroutines. When you compile @code{gawk}, the final link-editing step
+will fail. The link editor will provide you with a list of unresolved external
+references---these are the missing subroutines. Edit @file{config.h} again
+and recompile, and you should be set.@refill
+
+Editing the @file{Makefile} should also be straightforward. Enable or
+disable the lines that begin with @samp{##MAKE_@var{whatever}##}, as
+appropriate. Select the correct C compiler and @code{CFLAGS} for it.
+Then run @code{make}.
+
+Getting a correct configuration is likely to be an iterative process.
+Do not be discouraged if it takes you several tries. If you have no
+luck whatsoever, please report your system type, and the steps you took.
+Once you do have a working configuration, please send it to the maintainers
+so that support for your system can be added to the official release.
+
+@xref{Bugs, ,Reporting Problems and Bugs}, for information on how to report
+problems in configuring @code{gawk}. You may also use the same mechanisms
+for sending in new configurations.@refill
+
+@node VMS Installation, MS-DOS Installation, Unix Installation, Installation
+@section Compiling, Installing, and Running @code{gawk} on VMS
+
+@c based on material from
+@c Pat Rankin <rankin@eql.caltech.edu>
+
+@cindex installation, vms
+This section describes how to compile and install @code{gawk} under VMS.
+
+@menu
+* VMS Compilation:: How to compile @code{gawk} under VMS.
+* VMS Installation Details:: How to install @code{gawk} under VMS.
+* VMS Running:: How to run @code{gawk} under VMS.
+* VMS POSIX:: Alternate instructions for VMS POSIX.
+@end menu
+
+@node VMS Compilation, VMS Installation Details, VMS Installation, VMS Installation
+@subsection Compiling @code{gawk} under VMS
+
+To compile @code{gawk} under VMS, there is a @code{DCL} command procedure that
+will issue all the necessary @code{CC} and @code{LINK} commands, and there is
+also a @file{Makefile} for use with the @code{MMS} utility. From the source
+directory, use either
+
+@smallexample
+$ @@[.VMS]VMSBUILD.COM
+@end smallexample
+
+@noindent
+or
+
+@smallexample
+$ MMS/DESCRIPTION=[.VMS]DECSRIP.MMS GAWK
+@end smallexample
+
+Depending upon which C compiler you are using, follow one of the sets
+of instructions in this table:
+
+@table @asis
+@item VAX C V3.x
+Use either @file{vmsbuild.com} or @file{descrip.mms} as is. These use
+@code{CC/OPTIMIZE=NOLINE}, which is essential for Version 3.0.
+
+@item VAX C V2.x
+You must have Version 2.3 or 2.4; older ones won't work. Edit either
+@file{vmsbuild.com} or @file{descrip.mms} according to the comments in them.
+For @file{vmsbuild.com}, this just entails removing two @samp{!} delimiters.
+Also edit @file{config.h} (which is a copy of file @file{[.config]vms-conf.h})
+and comment out or delete the two lines @samp{#define __STDC__ 0} and
+@samp{#define VAXC_BUILTINS} near the end.@refill
+
+@item GNU C
+Edit @file{vmsbuild.com} or @file{descrip.mms}; the changes are different
+from those for VAX C V2.x, but equally straightforward. No changes to
+@file{config.h} should be needed.
+
+@item DEC C
+Edit @file{vmsbuild.com} or @file{descrip.mms} according to their comments.
+No changes to @file{config.h} should be needed.
+@end table
+
+@code{gawk} 2.15 has been tested under VAX/VMS 5.5-1 using VAX C V3.2,
+GNU C 1.40 and 2.3. It should work without modifications for VMS V4.6 and up.
+
+@node VMS Installation Details, VMS Running, VMS Compilation, VMS Installation
+@subsection Installing @code{gawk} on VMS
+
+To install @code{gawk}, all you need is a ``foreign'' command, which is
+a @code{DCL} symbol whose value begins with a dollar sign.
+
+@smallexample
+$ GAWK :== $device:[directory]GAWK
+@end smallexample
+
+@noindent
+(Substitute the actual location of @code{gawk.exe} for
+@samp{device:[directory]}.) The symbol should be placed in the
+@file{login.com} of any user who wishes to run @code{gawk},
+so that it will be defined every time the user logs on.
+Alternatively, the symbol may be placed in the system-wide
+@file{sylogin.com} procedure, which will allow all users
+to run @code{gawk}.@refill
+
+Optionally, the help entry can be loaded into a VMS help library:
+
+@smallexample
+$ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP
+@end smallexample
+
+@noindent
+(You may want to substitute a site-specific help library rather than
+the standard VMS library @samp{HELPLIB}.) After loading the help text,
+
+@c this is so tiny, but `should' be smallexample for consistency sake...
+@c I didn't because it was so short. --mew 29jan1992
+@example
+$ HELP GAWK
+@end example
+
+@noindent
+will provide information about both the @code{gawk} implementation and the
+@code{awk} programming language.
+
+The logical name @samp{AWK_LIBRARY} can designate a default location
+for @code{awk} program files. For the @samp{-f} option, if the specified
+filename has no device or directory path information in it, @code{gawk}
+will look in the current directory first, then in the directory specified
+by the translation of @samp{AWK_LIBRARY} if the file was not found.
+If after searching in both directories, the file still is not found,
+then @code{gawk} appends the suffix @samp{.awk} to the filename and the
+file search will be re-tried. If @samp{AWK_LIBRARY} is not defined, that
+portion of the file search will fail benignly.@refill
+
+@node VMS Running, VMS POSIX, VMS Installation Details, VMS Installation
+@subsection Running @code{gawk} on VMS
+
+Command line parsing and quoting conventions are significantly different
+on VMS, so examples in this manual or from other sources often need minor
+changes. They @emph{are} minor though, and all @code{awk} programs
+should run correctly.
+
+Here are a couple of trivial tests:
+
+@smallexample
+$ gawk -- "BEGIN @{print ""Hello, World!""@}"
+$ gawk -"W" version ! could also be -"W version" or "-W version"
+@end smallexample
+
+@noindent
+Note that upper-case and mixed-case text must be quoted.
+
+The VMS port of @code{gawk} includes a @code{DCL}-style interface in addition
+to the original shell-style interface (see the help entry for details).
+One side-effect of dual command line parsing is that if there is only a
+single parameter (as in the quoted string program above), the command
+becomes ambiguous. To work around this, the normally optional @samp{--}
+flag is required to force Unix style rather than @code{DCL} parsing. If any
+other dash-type options (or multiple parameters such as data files to be
+processed) are present, there is no ambiguity and @samp{--} can be omitted.
+
+The default search path when looking for @code{awk} program files specified
+by the @samp{-f} option is @code{"SYS$DISK:[],AWK_LIBRARY:"}. The logical
+name @samp{AWKPATH} can be used to override this default. The format
+of @samp{AWKPATH} is a comma-separated list of directory specifications.
+When defining it, the value should be quoted so that it retains a single
+translation, and not a multi-translation @code{RMS} searchlist.
+
+@node VMS POSIX, , VMS Running, VMS Installation
+@subsection Building and using @code{gawk} under VMS POSIX
+
+Ignore the instructions above, although @file{vms/gawk.hlp} should still
+be made available in a help library. Make sure that the two scripts,
+@file{configure} and @file{mungeconf}, are executable; use @samp{chmod +x}
+on them if necessary. Then execute the following commands:
+
+@smallexample
+$ POSIX
+psx> configure vms-posix
+psx> make awktab.c gawk
+@end smallexample
+
+@noindent
+The first command will construct files @file{config.h} and @file{Makefile}
+out of templates. The second command will compile and link @code{gawk}.
+Due to a @code{make} bug in VMS POSIX V1.0 and V1.1,
+the file @file{awktab.c} must be given as an explicit target or it will
+not be built and the final link step will fail. Ignore the warning
+@samp{"Could not find lib m in lib list"}; it is harmless, caused by the
+explicit use of @samp{-lm} as a linker option which is not needed
+under VMS POSIX. Under V1.1 (but not V1.0) a problem with the @code{yacc}
+skeleton @file{/etc/yyparse.c} will cause a compiler warning for
+@file{awktab.c}, followed by a linker warning about compilation warnings
+in the resulting object module. These warnings can be ignored.@refill
+
+Once built, @code{gawk} will work like any other shell utility. Unlike
+the normal VMS port of @code{gawk}, no special command line manipulation is
+needed in the VMS POSIX environment.
+
+@node MS-DOS Installation, Atari Installation, VMS Installation, Installation
+@section Installing @code{gawk} on MS-DOS
+
+@cindex installation, ms-dos
+The first step is to get all the files in the @code{gawk} distribution
+onto your PC. Move all the files from the @file{pc} directory into
+the main directory where the other files are. Edit the file
+@file{make.bat} so that it will be an acceptable MS-DOS batch file.
+This means making sure that all lines are terminated with the ASCII
+carriage return and line feed characters.
+restrictions.
+
+@code{gawk} has only been compiled with version 5.1 of the Microsoft
+C compiler. The file @file{make.bat} from the @file{pc} directory
+assumes that you have this compiler.
+
+Copy the file @file{setargv.obj} from the library directory where it
+resides to the @code{gawk} source code directory.
+
+Run @file{make.bat}. This will compile @code{gawk} for you, and link it.
+That's all there is to it!
+
+@node Atari Installation, , MS-DOS Installation, Installation
+@section Installing @code{gawk} on the Atari ST
+
+@c based on material from
+@c Michal Jaegermann <ntomczak@vm.ucs.ualberta.ca>
+
+@cindex installation, atari
+This section assumes that you are running TOS. It applies to other Atari
+models (STe, TT) as well.
+
+In order to use @code{gawk}, you need to have a shell, either text or
+graphics, that does not map all the characters of a command line to
+upper case. Maintaining case distinction in option flags is very
+important (@pxref{Command Line, ,Invoking @code{awk}}). Popular shells
+like @code{gulam} or @code{gemini} will work, as will newer versions of
+@code{desktop}. Support for I/O redirection is necessary to make it easy
+to import @code{awk} programs from other environments. Pipes are nice to have,
+but not vital.
+
+If you have received an executable version of @code{gawk}, place it,
+as usual, anywhere in your @code{PATH} where your shell will find it.
+
+While executing, @code{gawk} creates a number of temporary files.
+@code{gawk} looks for either of the environment variables @code{TEMP}
+or @code{TMPDIR}, in that order. If either one is found, its value
+is assumed to be a directory for temporary files. This directory
+must exist, and if you can spare the memory, it is a good idea to
+put it on a @sc{ram} drive. If neither @code{TEMP} nor @code{TMPDIR}
+are found, then @code{gawk} uses the current directory for its
+temporary files.
+
+The ST version of @code{gawk} searches for its program files as
+described in @ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+On the ST, the default value for the @code{AWKPATH} variable is
+@code{@w{".,c:\lib\awk,c:\gnu\lib\awk"}}.
+The search path can be modified by explicitly setting @code{AWKPATH} to
+whatever you wish. Note that colons cannot be used on the ST to separate
+elements in the @code{AWKPATH} variable, since they have another, reserved,
+meaning. Instead, you must use a comma to separate elements in the path.
+If you are recompiling @code{gawk} on the ST, then you can choose a new
+default search path, by setting the value of @samp{DEFPATH} in the file
+@file{...\config\atari}. You may choose a different separator character
+by setting the value of @samp{ENVSEP} in the same file. The new values will
+be used when creating the header file @file{config.h}.@refill
+
+@ignore
+As a last resort, small
+adjustments can be made directly on the executable version of @code{gawk}
+using a binary editor.@refill
+@end ignore
+
+Although @code{awk} allows great flexibility in doing I/O redirections
+from within a program, this facility should be used with care on the ST.
+In some circumstances the OS routines for file handle pool processing
+lose track of certain events, causing the computer to crash, and requiring
+a reboot. Often a warm reboot is sufficient. Fortunately, this happens
+infrequently, and in rather esoteric situations. In particular, avoid
+having one part of an @code{awk} program using @code{print}
+statements explicitly redirected to @code{"/dev/stdout"}, while other
+@code{print} statements use the default standard output, and a
+calling shell has redirected standard output to a file.@refill
+@c whew!
+
+When @code{gawk} is compiled with the ST version of @code{gcc} and its
+usual libraries, it will accept both @samp{/} and @samp{\} as path separators.
+While this is convenient, it should be remembered that this removes one,
+technically legal, character (@samp{/}) from your file names, and that
+it may create problems for external programs, called via the @code{system()}
+function, which may not support this convention. Whenever it is possible
+that a file created by @code{gawk} will be used by some other program,
+use only backslashes. Also remember that in @code{awk}, backslashes in
+strings have to be doubled in order to get literal backslashes.
+
+The initial port of @code{gawk} to the ST was done with @code{gcc}.
+If you wish to recompile @code{gawk} from scratch, you will need to use
+a compiler that accepts @sc{ansi} standard C (such as @code{gcc}, Turbo C,
+or Prospero C). If @code{sizeof(int) != @w{sizeof(int *)}}, the correctness
+of the generated code depends heavily on the fact that all function calls
+have function prototypes in the current scope. If your compiler does
+not accept function prototypes, you will probably have to add a
+number of casts to the code.@refill
+
+If you are using @code{gcc}, make sure that you have up-to-date libraries.
+Older versions have problems with some library functions (@code{atan2()},
+@code{strftime()}, the @samp{%g} conversion in @code{sprintf()}) which
+may affect the operation of @code{gawk}.
+
+In the @file{atari} subdirectory of the @code{gawk} distribution is
+a version of the @code{system()} function that has been tested with
+@code{gulam} and @code{msh}; it should work with other shells as well.
+With @code{gulam}, it passes the string to be executed without spawning
+an extra copy of a shell. It is possible to replace this version of
+@code{system()} with a similar function from a library or from some other
+source if that version would be a better choice for the shell you prefer.
+
+The files needed to recompile @code{gawk} on the ST can be found in
+the @file{atari} directory. The provided files and instructions below
+assume that you have the GNU C compiler (@code{gcc}), the @code{gulam} shell,
+and an ST version of @code{sed}. The @file{Makefile} is set up to use
+@file{byacc} as a @file{yacc} replacement. With a different set of tools some
+adjustments and/or editing will be needed.@refill
+
+@code{cd} to the @file{atari} directory. Copy @file{Makefile.st} to
+@file{makefile} in the source (parent) directory. Possibly adjust
+@file{../config/atari} to suit your system. Execute the script @file{mkconf.g}
+which will create the header file @file{../config.h}. Go back to the source
+directory. If you are not using @code{gcc}, check the file @file{missing.c}.
+It may be necessary to change forward slashes in the references to files
+from the @file{atari} subdirectory into backslashes. Type @code{make} and
+enjoy.@refill
+
+Compilation with @code{gcc} of some of the bigger modules, like
+@file{awk_tab.c}, may require a full four megabytes of memory. On smaller
+machines you would need to cut down on optimizations, or you would have to
+switch to another, less memory hungry, compiler.@refill
+
+@node Gawk Summary, Sample Program, Installation, Top
+@appendix @code{gawk} Summary
+
+This appendix provides a brief summary of the @code{gawk} command line and the
+@code{awk} language. It is designed to serve as ``quick reference.'' It is
+therefore terse, but complete.
+
+@menu
+* Command Line Summary:: Recapitulation of the command line.
+* Language Summary:: A terse review of the language.
+* Variables/Fields:: Variables, fields, and arrays.
+* Rules Summary:: Patterns and Actions, and their
+ component parts.
+* Functions Summary:: Defining and calling functions.
+* Historical Features:: Some undocumented but supported ``features''.
+@end menu
+
+@node Command Line Summary, Language Summary, Gawk Summary, Gawk Summary
+@appendixsec Command Line Options Summary
+
+The command line consists of options to @code{gawk} itself, the
+@code{awk} program text (if not supplied via the @samp{-f} option), and
+values to be made available in the @code{ARGC} and @code{ARGV}
+predefined @code{awk} variables:
+
+@example
+awk @r{[@var{POSIX or GNU style options}]} -f source-file @r{[@code{--}]} @var{file} @dots{}
+awk @r{[@var{POSIX or GNU style options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+The options that @code{gawk} accepts are:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator=@var{fs}
+Use @var{fs} for the input field separator (the value of the @code{FS}
+predefined variable).
+
+@item -f @var{program-file}
+@itemx --file=@var{program-file}
+Read the @code{awk} program source from the file @var{program-file}, instead
+of from the first command line argument.
+
+@item -v @var{var}=@var{val}
+@itemx --assign=@var{var}=@var{val}
+Assign the variable @var{var} the value @var{val} before program execution
+begins.
+
+@item -W compat
+@itemx --compat
+Specifies compatibility mode, in which @code{gawk} extensions are turned
+off.
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+Print the short version of the General Public License on the error
+output. This option may disappear in a future version of @code{gawk}.
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+Print a relatively short summary of the available options on the error output.
+
+@item -W lint
+@itemx --lint
+Give warnings about dubious or non-portable @code{awk} constructs.
+
+@item -W posix
+@itemx --posix
+Specifies @sc{posix} compatibility mode, in which @code{gawk} extensions
+are turned off and additional restrictions apply.
+
+@item -W source=@var{program-text}
+@itemx --source=@var{program-text}
+Use @var{program-text} as @code{awk} program source code. This option allows
+mixing command line source code with source code from files, and is
+particularly useful for mixing command line programs with library functions.
+
+@item -W version
+@itemx --version
+Print version information for this particular copy of @code{gawk} on the error
+output. This option may disappear in a future version of @code{gawk}.
+
+@item --
+Signal the end of options. This is useful to allow further arguments to the
+@code{awk} program itself to start with a @samp{-}. This is mainly for
+consistency with the argument parsing conventions of @sc{posix}.
+@end table
+
+Any other options are flagged as invalid, but are otherwise ignored.
+@xref{Command Line, ,Invoking @code{awk}}, for more details.
+
+@node Language Summary, Variables/Fields, Command Line Summary, Gawk Summary
+@appendixsec Language Summary
+
+An @code{awk} program consists of a sequence of pattern-action statements
+and optional function definitions.
+
+@example
+@var{pattern} @{ @var{action statements} @}
+
+function @var{name}(@var{parameter list}) @{ @var{action statements} @}
+@end example
+
+@code{gawk} first reads the program source from the
+@var{program-file}(s) if specified, or from the first non-option
+argument on the command line. The @samp{-f} option may be used multiple
+times on the command line. @code{gawk} reads the program text from all
+the @var{program-file} files, effectively concatenating them in the
+order they are specified. This is useful for building libraries of
+@code{awk} functions, without having to include them in each new
+@code{awk} program that uses them. To use a library function in a file
+from a program typed in on the command line, specify @samp{-f /dev/tty};
+then type your program, and end it with a @kbd{Control-d}.
+@xref{Command Line, ,Invoking @code{awk}}.@refill
+
+The environment variable @code{AWKPATH} specifies a search path to use
+when finding source files named with the @samp{-f} option. The default
+path, which is
+@samp{.:/usr/lib/awk:/usr/local/lib/awk} is used if @code{AWKPATH} is not set.
+If a file name given to the @samp{-f} option contains a @samp{/} character,
+no path search is performed.
+@xref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable},
+for a full description of the @code{AWKPATH} environment variable.@refill
+
+@code{gawk} compiles the program into an internal form, and then proceeds to
+read each file named in the @code{ARGV} array. If there are no files named
+on the command line, @code{gawk} reads the standard input.
+
+If a ``file'' named on the command line has the form
+@samp{@var{var}=@var{val}}, it is treated as a variable assignment: the
+variable @var{var} is assigned the value @var{val}.
+If any of the files have a value that is the null string, that
+element in the list is skipped.@refill
+
+For each line in the input, @code{gawk} tests to see if it matches any
+@var{pattern} in the @code{awk} program. For each pattern that the line
+matches, the associated @var{action} is executed.
+
+@node Variables/Fields, Rules Summary, Language Summary, Gawk Summary
+@appendixsec Variables and Fields
+
+@code{awk} variables are dynamic; they come into existence when they are
+first used. Their values are either floating-point numbers or strings.
+@code{awk} also has one-dimension arrays; multiple-dimensional arrays
+may be simulated. There are several predefined variables that
+@code{awk} sets as a program runs; these are summarized below.
+
+@menu
+* Fields Summary:: Input field splitting.
+* Built-in Summary:: @code{awk}'s built-in variables.
+* Arrays Summary:: Using arrays.
+* Data Type Summary:: Values in @code{awk} are numbers or strings.
+@end menu
+
+@node Fields Summary, Built-in Summary, Variables/Fields, Variables/Fields
+@appendixsubsec Fields
+
+As each input line is read, @code{gawk} splits the line into
+@var{fields}, using the value of the @code{FS} variable as the field
+separator. If @code{FS} is a single character, fields are separated by
+that character. Otherwise, @code{FS} is expected to be a full regular
+expression. In the special case that @code{FS} is a single blank,
+fields are separated by runs of blanks and/or tabs. Note that the value
+of @code{IGNORECASE} (@pxref{Case-sensitivity, ,Case-sensitivity in Matching})
+also affects how fields are split when @code{FS} is a regular expression.@refill
+
+Each field in the input line may be referenced by its position, @code{$1},
+@code{$2}, and so on. @code{$0} is the whole line. The value of a field may
+be assigned to as well. Field numbers need not be constants:
+
+@example
+n = 5
+print $n
+@end example
+
+@noindent
+prints the fifth field in the input line. The variable @code{NF} is set to
+the total number of fields in the input line.
+
+References to nonexistent fields (i.e., fields after @code{$NF}) return
+the null-string. However, assigning to a nonexistent field (e.g.,
+@code{$(NF+2) = 5}) increases the value of @code{NF}, creates any
+intervening fields with the null string as their value, and causes the
+value of @code{$0} to be recomputed, with the fields being separated by
+the value of @code{OFS}.@refill
+
+@xref{Reading Files, ,Reading Input Files}, for a full description of the
+way @code{awk} defines and uses fields.
+
+@node Built-in Summary, Arrays Summary, Fields Summary, Variables/Fields
+@appendixsubsec Built-in Variables
+
+@code{awk}'s built-in variables are:
+
+@table @code
+@item ARGC
+The number of command line arguments (not including options or the
+@code{awk} program itself).
+
+@item ARGIND
+The index in @code{ARGV} of the current file being processed.
+It is always true that @samp{FILENAME == ARGV[ARGIND]}.
+
+@item ARGV
+The array of command line arguments. The array is indexed from 0 to
+@code{ARGC} @minus{} 1. Dynamically changing the contents of @code{ARGV}
+can control the files used for data.@refill
+
+@item CONVFMT
+The conversion format to use when converting numbers to strings.
+
+@item FIELDWIDTHS
+A space separated list of numbers describing the fixed-width input data.
+
+@item ENVIRON
+An array containing the values of the environment variables. The array
+is indexed by variable name, each element being the value of that
+variable. Thus, the environment variable @code{HOME} would be in
+@code{ENVIRON["HOME"]}. Its value might be @file{/u/close}.
+
+Changing this array does not affect the environment seen by programs
+which @code{gawk} spawns via redirection or the @code{system} function.
+(This may change in a future version of @code{gawk}.)
+
+Some operating systems do not have environment variables.
+The array @code{ENVIRON} is empty when running on these systems.
+
+@item ERRNO
+The system error message when an error occurs using @code{getline}
+or @code{close}.
+
+@item FILENAME
+The name of the current input file. If no files are specified on the command
+line, the value of @code{FILENAME} is @samp{-}.
+
+@item FNR
+The input record number in the current input file.
+
+@item FS
+The input field separator, a blank by default.
+
+@item IGNORECASE
+The case-sensitivity flag for regular expression operations. If
+@code{IGNORECASE} has a nonzero value, then pattern matching in rules,
+field splitting with @code{FS}, regular expression matching with
+@samp{~} and @samp{!~}, and the @code{gsub}, @code{index}, @code{match},
+@code{split} and @code{sub} predefined functions all ignore case
+when doing regular expression operations.@refill
+
+@item NF
+The number of fields in the current input record.
+
+@item NR
+The total number of input records seen so far.
+
+@item OFMT
+The output format for numbers for the @code{print} statement,
+@code{"%.6g"} by default.
+
+@item OFS
+The output field separator, a blank by default.
+
+@item ORS
+The output record separator, by default a newline.
+
+@item RS
+The input record separator, by default a newline. @code{RS} is exceptional
+in that only the first character of its string value is used for separating
+records. If @code{RS} is set to the null string, then records are separated by
+blank lines. When @code{RS} is set to the null string, then the newline
+character always acts as a field separator, in addition to whatever value
+@code{FS} may have.@refill
+
+@item RSTART
+The index of the first character matched by @code{match}; 0 if no match.
+
+@item RLENGTH
+The length of the string matched by @code{match}; @minus{}1 if no match.
+
+@item SUBSEP
+The string used to separate multiple subscripts in array elements, by
+default @code{"\034"}.
+@end table
+
+@xref{Built-in Variables}, for more information.
+
+@node Arrays Summary, Data Type Summary, Built-in Summary, Variables/Fields
+@appendixsubsec Arrays
+
+Arrays are subscripted with an expression between square brackets
+(@samp{[} and @samp{]}). Array subscripts are @emph{always} strings;
+numbers are converted to strings as necessary, following the standard
+conversion rules
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).@refill
+
+If you use multiple expressions separated by commas inside the square
+brackets, then the array subscript is a string consisting of the
+concatenation of the individual subscript values, converted to strings,
+separated by the subscript separator (the value of @code{SUBSEP}).
+
+The special operator @code{in} may be used in an @code{if} or
+@code{while} statement to see if an array has an index consisting of a
+particular value.
+
+@example
+if (val in array)
+ print array[val]
+@end example
+
+If the array has multiple subscripts, use @code{(i, j, @dots{}) in array}
+to test for existence of an element.
+
+The @code{in} construct may also be used in a @code{for} loop to iterate
+over all the elements of an array.
+@xref{Scanning an Array, ,Scanning all Elements of an Array}.@refill
+
+An element may be deleted from an array using the @code{delete} statement.
+
+@xref{Arrays, ,Arrays in @code{awk}}, for more detailed information.
+
+@node Data Type Summary, , Arrays Summary, Variables/Fields
+@appendixsubsec Data Types
+
+The value of an @code{awk} expression is always either a number
+or a string.
+
+Certain contexts (such as arithmetic operators) require numeric
+values. They convert strings to numbers by interpreting the text
+of the string as a numeral. If the string does not look like a
+numeral, it converts to 0.
+
+Certain contexts (such as concatenation) require string values.
+They convert numbers to strings by effectively printing them
+with @code{sprintf}.
+@xref{Conversion, ,Conversion of Strings and Numbers}, for the details.@refill
+
+To force conversion of a string value to a number, simply add 0
+to it. If the value you start with is already a number, this
+does not change it.
+
+To force conversion of a numeric value to a string, concatenate it with
+the null string.
+
+The @code{awk} language defines comparisons as being done numerically if
+both operands are numeric, or if one is numeric and the other is a numeric
+string. Otherwise one or both operands are converted to strings and a
+string comparison is performed.
+
+Uninitialized variables have the string value @code{""} (the null, or
+empty, string). In contexts where a number is required, this is
+equivalent to 0.
+
+@xref{Variables}, for more information on variable naming and initialization;
+@pxref{Conversion, ,Conversion of Strings and Numbers}, for more information
+on how variable values are interpreted.@refill
+
+@node Rules Summary, Functions Summary, Variables/Fields, Gawk Summary
+@appendixsec Patterns and Actions
+
+@menu
+* Pattern Summary:: Quick overview of patterns.
+* Regexp Summary:: Quick overview of regular expressions.
+* Actions Summary:: Quick overview of actions.
+@end menu
+
+An @code{awk} program is mostly composed of rules, each consisting of a
+pattern followed by an action. The action is enclosed in @samp{@{} and
+@samp{@}}. Either the pattern may be missing, or the action may be
+missing, but, of course, not both. If the pattern is missing, the
+action is executed for every single line of input. A missing action is
+equivalent to this action,
+
+@example
+@{ print @}
+@end example
+
+@noindent
+which prints the entire line.
+
+Comments begin with the @samp{#} character, and continue until the end of the
+line. Blank lines may be used to separate statements. Normally, a statement
+ends with a newline, however, this is not the case for lines ending in a
+@samp{,}, @samp{@{}, @samp{?}, @samp{:}, @samp{&&}, or @samp{||}. Lines
+ending in @code{do} or @code{else} also have their statements automatically
+continued on the following line. In other cases, a line can be continued by
+ending it with a @samp{\}, in which case the newline is ignored.@refill
+
+Multiple statements may be put on one line by separating them with a @samp{;}.
+This applies to both the statements within the action part of a rule (the
+usual case), and to the rule statements.
+
+@xref{Comments, ,Comments in @code{awk} Programs}, for information on
+@code{awk}'s commenting convention;
+@pxref{Statements/Lines, ,@code{awk} Statements versus Lines}, for a
+description of the line continuation mechanism in @code{awk}.@refill
+
+@node Pattern Summary, Regexp Summary, Rules Summary, Rules Summary
+@appendixsubsec Patterns
+
+@code{awk} patterns may be one of the following:
+
+@example
+/@var{regular expression}/
+@var{relational expression}
+@var{pattern} && @var{pattern}
+@var{pattern} || @var{pattern}
+@var{pattern} ? @var{pattern} : @var{pattern}
+(@var{pattern})
+! @var{pattern}
+@var{pattern1}, @var{pattern2}
+BEGIN
+END
+@end example
+
+@code{BEGIN} and @code{END} are two special kinds of patterns that are not
+tested against the input. The action parts of all @code{BEGIN} rules are
+merged as if all the statements had been written in a single @code{BEGIN}
+rule. They are executed before any of the input is read. Similarly, all the
+@code{END} rules are merged, and executed when all the input is exhausted (or
+when an @code{exit} statement is executed). @code{BEGIN} and @code{END}
+patterns cannot be combined with other patterns in pattern expressions.
+@code{BEGIN} and @code{END} rules cannot have missing action parts.@refill
+
+For @samp{/@var{regular-expression}/} patterns, the associated statement is
+executed for each input line that matches the regular expression. Regular
+expressions are extensions of those in @code{egrep}, and are summarized below.
+
+A @var{relational expression} may use any of the operators defined below in
+the section on actions. These generally test whether certain fields match
+certain regular expressions.
+
+The @samp{&&}, @samp{||}, and @samp{!} operators are logical ``and,''
+logical ``or,'' and logical ``not,'' respectively, as in C. They do
+short-circuit evaluation, also as in C, and are used for combining more
+primitive pattern expressions. As in most languages, parentheses may be
+used to change the order of evaluation.
+
+The @samp{?:} operator is like the same operator in C. If the first
+pattern matches, then the second pattern is matched against the input
+record; otherwise, the third is matched. Only one of the second and
+third patterns is matched.
+
+The @samp{@var{pattern1}, @var{pattern2}} form of a pattern is called a
+range pattern. It matches all input lines starting with a line that
+matches @var{pattern1}, and continuing until a line that matches
+@var{pattern2}, inclusive. A range pattern cannot be used as an operand
+to any of the pattern operators.
+
+@xref{Patterns}, for a full description of the pattern part of @code{awk}
+rules.
+
+@node Regexp Summary, Actions Summary, Pattern Summary, Rules Summary
+@appendixsubsec Regular Expressions
+
+Regular expressions are the extended kind found in @code{egrep}.
+They are composed of characters as follows:
+
+@table @code
+@item @var{c}
+matches the character @var{c} (assuming @var{c} is a character with no
+special meaning in regexps).
+
+@item \@var{c}
+matches the literal character @var{c}.
+
+@item .
+matches any character except newline.
+
+@item ^
+matches the beginning of a line or a string.
+
+@item $
+matches the end of a line or a string.
+
+@item [@var{abc}@dots{}]
+matches any of the characters @var{abc}@dots{} (character class).
+
+@item [^@var{abc}@dots{}]
+matches any character except @var{abc}@dots{} and newline (negated
+character class).
+
+@item @var{r1}|@var{r2}
+matches either @var{r1} or @var{r2} (alternation).
+
+@item @var{r1r2}
+matches @var{r1}, and then @var{r2} (concatenation).
+
+@item @var{r}+
+matches one or more @var{r}'s.
+
+@item @var{r}*
+matches zero or more @var{r}'s.
+
+@item @var{r}?
+matches zero or one @var{r}'s.
+
+@item (@var{r})
+matches @var{r} (grouping).
+@end table
+
+@xref{Regexp, ,Regular Expressions as Patterns}, for a more detailed
+explanation of regular expressions.
+
+The escape sequences allowed in string constants are also valid in
+regular expressions (@pxref{Constants, ,Constant Expressions}).
+
+@node Actions Summary, , Regexp Summary, Rules Summary
+@appendixsubsec Actions
+
+Action statements are enclosed in braces, @samp{@{} and @samp{@}}.
+Action statements consist of the usual assignment, conditional, and looping
+statements found in most languages. The operators, control statements,
+and input/output statements available are patterned after those in C.
+
+@menu
+* Operator Summary:: @code{awk} operators.
+* Control Flow Summary:: The control statements.
+* I/O Summary:: The I/O statements.
+* Printf Summary:: A summary of @code{printf}.
+* Special File Summary:: Special file names interpreted internally.
+* Numeric Functions Summary:: Built-in numeric functions.
+* String Functions Summary:: Built-in string functions.
+* Time Functions Summary:: Built-in time functions.
+* String Constants Summary:: Escape sequences in strings.
+@end menu
+
+@node Operator Summary, Control Flow Summary, Actions Summary, Actions Summary
+@appendixsubsubsec Operators
+
+The operators in @code{awk}, in order of increasing precedence, are:
+
+@table @code
+@item = += -= *= /= %= ^=
+Assignment. Both absolute assignment (@code{@var{var}=@var{value}})
+and operator assignment (the other forms) are supported.
+
+@item ?:
+A conditional expression, as in C. This has the form @code{@var{expr1} ?
+@var{expr2} : @var{expr3}}. If @var{expr1} is true, the value of the
+expression is @var{expr2}; otherwise it is @var{expr3}. Only one of
+@var{expr2} and @var{expr3} is evaluated.@refill
+
+@item ||
+Logical ``or''.
+
+@item &&
+Logical ``and''.
+
+@item ~ !~
+Regular expression match, negated match.
+
+@item < <= > >= != ==
+The usual relational operators.
+
+@item @var{blank}
+String concatenation.
+
+@item + -
+Addition and subtraction.
+
+@item * / %
+Multiplication, division, and modulus.
+
+@item + - !
+Unary plus, unary minus, and logical negation.
+
+@item ^
+Exponentiation (@samp{**} may also be used, and @samp{**=} for the assignment
+operator, but they are not specified in the @sc{posix} standard).
+
+@item ++ --
+Increment and decrement, both prefix and postfix.
+
+@item $
+Field reference.
+@end table
+
+@xref{Expressions, ,Expressions as Action Statements}, for a full
+description of all the operators listed above.
+@xref{Fields, ,Examining Fields}, for a description of the field
+reference operator.@refill
+
+@node Control Flow Summary, I/O Summary, Operator Summary, Actions Summary
+@appendixsubsubsec Control Statements
+
+The control statements are as follows:
+
+@example
+if (@var{condition}) @var{statement} @r{[} else @var{statement} @r{]}
+while (@var{condition}) @var{statement}
+do @var{statement} while (@var{condition})
+for (@var{expr1}; @var{expr2}; @var{expr3}) @var{statement}
+for (@var{var} in @var{array}) @var{statement}
+break
+continue
+delete @var{array}[@var{index}]
+exit @r{[} @var{expression} @r{]}
+@{ @var{statements} @}
+@end example
+
+@xref{Statements, ,Control Statements in Actions}, for a full description
+of all the control statements listed above.
+
+@node I/O Summary, Printf Summary, Control Flow Summary, Actions Summary
+@appendixsubsubsec I/O Statements
+
+The input/output statements are as follows:
+
+@table @code
+@item getline
+Set @code{$0} from next input record; set @code{NF}, @code{NR}, @code{FNR}.
+
+@item getline <@var{file}
+Set @code{$0} from next record of @var{file}; set @code{NF}.
+
+@item getline @var{var}
+Set @var{var} from next input record; set @code{NF}, @code{FNR}.
+
+@item getline @var{var} <@var{file}
+Set @var{var} from next record of @var{file}.
+
+@item next
+Stop processing the current input record. The next input record is read and
+processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+
+@item next file
+Stop processing the current input file. The next input record read comes
+from the next input file. @code{FILENAME} is updated, @code{FNR} is set to 1,
+and processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+
+@item print
+Prints the current record.
+
+@item print @var{expr-list}
+Prints expressions.
+
+@item print @var{expr-list} > @var{file}
+Prints expressions on @var{file}.
+
+@item printf @var{fmt, expr-list}
+Format and print.
+
+@item printf @var{fmt, expr-list} > file
+Format and print on @var{file}.
+@end table
+
+Other input/output redirections are also allowed. For @code{print} and
+@code{printf}, @samp{>> @var{file}} appends output to the @var{file},
+and @samp{| @var{command}} writes on a pipe. In a similar fashion,
+@samp{@var{command} | getline} pipes input into @code{getline}.
+@code{getline} returns 0 on end of file, and @minus{}1 on an error.@refill
+
+@xref{Getline, ,Explicit Input with @code{getline}}, for a full description
+of the @code{getline} statement.
+@xref{Printing, ,Printing Output}, for a full description of @code{print} and
+@code{printf}. Finally, @pxref{Next Statement, ,The @code{next} Statement},
+for a description of how the @code{next} statement works.@refill
+
+@node Printf Summary, Special File Summary, I/O Summary, Actions Summary
+@appendixsubsubsec @code{printf} Summary
+
+The @code{awk} @code{printf} statement and @code{sprintf} function
+accept the following conversion specification formats:
+
+@table @code
+@item %c
+An ASCII character. If the argument used for @samp{%c} is numeric, it is
+treated as a character and printed. Otherwise, the argument is assumed to
+be a string, and the only first character of that string is printed.
+
+@item %d
+@itemx %i
+A decimal number (the integer part).
+
+@item %e
+A floating point number of the form
+@samp{@r{[}-@r{]}d.ddddddE@r{[}+-@r{]}dd}.@refill
+
+@item %f
+A floating point number of the form
+@r{[}@code{-}@r{]}@code{ddd.dddddd}.
+
+@item %g
+Use @samp{%e} or @samp{%f} conversion, whichever produces a shorter string,
+with nonsignificant zeros suppressed.
+
+@item %o
+An unsigned octal number (again, an integer).
+
+@item %s
+A character string.
+
+@item %x
+An unsigned hexadecimal number (an integer).
+
+@item %X
+Like @samp{%x}, except use @samp{A} through @samp{F} instead of @samp{a}
+through @samp{f} for decimal 10 through 15.@refill
+
+@item %%
+A single @samp{%} character; no argument is converted.
+@end table
+
+There are optional, additional parameters that may lie between the @samp{%}
+and the control letter:
+
+@table @code
+@item -
+The expression should be left-justified within its field.
+
+@item @var{width}
+The field should be padded to this width. If @var{width} has a leading zero,
+then the field is padded with zeros. Otherwise it is padded with blanks.
+
+@item .@var{prec}
+A number indicating the maximum width of strings or digits to the right
+of the decimal point.
+@end table
+
+Either or both of the @var{width} and @var{prec} values may be specified
+as @samp{*}. In that case, the particular value is taken from the argument
+list.
+
+@xref{Printf, ,Using @code{printf} Statements for Fancier Printing}, for
+examples and for a more detailed description.
+
+@node Special File Summary, Numeric Functions Summary, Printf Summary, Actions Summary
+@appendixsubsubsec Special File Names
+
+When doing I/O redirection from either @code{print} or @code{printf} into a
+file, or via @code{getline} from a file, @code{gawk} recognizes certain special
+file names internally. These file names allow access to open file descriptors
+inherited from @code{gawk}'s parent process (usually the shell). The
+file names are:
+
+@table @file
+@item /dev/stdin
+The standard input.
+
+@item /dev/stdout
+The standard output.
+
+@item /dev/stderr
+The standard error output.
+
+@item /dev/fd/@var{n}
+The file denoted by the open file descriptor @var{n}.
+@end table
+
+In addition the following files provide process related information
+about the running @code{gawk} program.
+
+@table @file
+@item /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with blanks. The fields represent the
+following information:
+
+@table @code
+@item $1
+The value of the @code{getuid} system call.
+
+@item $2
+The value of the @code{geteuid} system call.
+
+@item $3
+The value of the @code{getgid} system call.
+
+@item $4
+The value of the @code{getegid} system call.
+@end table
+
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)@refill
+@end table
+
+@noindent
+These file names may also be used on the command line to name data files.
+These file names are only recognized internally if you do not
+actually have files by these names on your system.
+
+@xref{Special Files, ,Standard I/O Streams}, for a longer description that
+provides the motivation for this feature.
+
+@node Numeric Functions Summary, String Functions Summary, Special File Summary, Actions Summary
+@appendixsubsubsec Numeric Functions
+
+@code{awk} has the following predefined arithmetic functions:
+
+@table @code
+@item atan2(@var{y}, @var{x})
+returns the arctangent of @var{y/x} in radians.
+
+@item cos(@var{expr})
+returns the cosine in radians.
+
+@item exp(@var{expr})
+the exponential function.
+
+@item int(@var{expr})
+truncates to integer.
+
+@item log(@var{expr})
+the natural logarithm function.
+
+@item rand()
+returns a random number between 0 and 1.
+
+@item sin(@var{expr})
+returns the sine in radians.
+
+@item sqrt(@var{expr})
+the square root function.
+
+@item srand(@var{expr})
+use @var{expr} as a new seed for the random number generator. If no @var{expr}
+is provided, the time of day is used. The return value is the previous
+seed for the random number generator.
+@end table
+
+@node String Functions Summary, Time Functions Summary, Numeric Functions Summary, Actions Summary
+@appendixsubsubsec String Functions
+
+@code{awk} has the following predefined string functions:
+
+@table @code
+@item gsub(@var{r}, @var{s}, @var{t})
+for each substring matching the regular expression @var{r} in the string
+@var{t}, substitute the string @var{s}, and return the number of substitutions.
+If @var{t} is not supplied, use @code{$0}.
+
+@item index(@var{s}, @var{t})
+returns the index of the string @var{t} in the string @var{s}, or 0 if
+@var{t} is not present.
+
+@item length(@var{s})
+returns the length of the string @var{s}. The length of @code{$0}
+is returned if no argument is supplied.
+
+@item match(@var{s}, @var{r})
+returns the position in @var{s} where the regular expression @var{r}
+occurs, or 0 if @var{r} is not present, and sets the values of @code{RSTART}
+and @code{RLENGTH}.
+
+@item split(@var{s}, @var{a}, @var{r})
+splits the string @var{s} into the array @var{a} on the regular expression
+@var{r}, and returns the number of fields. If @var{r} is omitted, @code{FS}
+is used instead.
+
+@item sprintf(@var{fmt}, @var{expr-list})
+prints @var{expr-list} according to @var{fmt}, and returns the resulting string.
+
+@item sub(@var{r}, @var{s}, @var{t})
+this is just like @code{gsub}, but only the first matching substring is
+replaced.
+
+@item substr(@var{s}, @var{i}, @var{n})
+returns the @var{n}-character substring of @var{s} starting at @var{i}.
+If @var{n} is omitted, the rest of @var{s} is used.
+
+@item tolower(@var{str})
+returns a copy of the string @var{str}, with all the upper-case characters in
+@var{str} translated to their corresponding lower-case counterparts.
+Nonalphabetic characters are left unchanged.
+
+@item toupper(@var{str})
+returns a copy of the string @var{str}, with all the lower-case characters in
+@var{str} translated to their corresponding upper-case counterparts.
+Nonalphabetic characters are left unchanged.
+
+@item system(@var{cmd-line})
+Execute the command @var{cmd-line}, and return the exit status.
+@end table
+
+@node Time Functions Summary, String Constants Summary, String Functions Summary, Actions Summary
+@appendixsubsubsec Built-in time functions
+
+The following two functions are available for getting the current
+time of day, and for formatting time stamps.
+
+@table @code
+@item systime()
+returns the current time of day as the number of seconds since a particular
+epoch (Midnight, January 1, 1970 @sc{utc}, on @sc{posix} systems).
+
+@item strftime(@var{format}, @var{timestamp})
+formats @var{timestamp} according to the specification in @var{format}.
+The current time of day is used if no @var{timestamp} is supplied.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for the
+details on the conversion specifiers that @code{strftime} accepts.@refill
+@end table
+
+@iftex
+@xref{Built-in, ,Built-in Functions}, for a description of all of
+@code{awk}'s built-in functions.
+@end iftex
+
+@node String Constants Summary, , Time Functions Summary, Actions Summary
+@appendixsubsubsec String Constants
+
+String constants in @code{awk} are sequences of characters enclosed
+between double quotes (@code{"}). Within strings, certain @dfn{escape sequences}
+are recognized, as in C. These are:
+
+@table @code
+@item \\
+A literal backslash.
+
+@item \a
+The ``alert'' character; usually the ASCII BEL character.
+
+@item \b
+Backspace.
+
+@item \f
+Formfeed.
+
+@item \n
+Newline.
+
+@item \r
+Carriage return.
+
+@item \t
+Horizontal tab.
+
+@item \v
+Vertical tab.
+
+@item \x@var{hex digits}
+The character represented by the string of hexadecimal digits following
+the @samp{\x}. As in @sc{ansi} C, all following hexadecimal digits are
+considered part of the escape sequence. (This feature should tell us
+something about language design by committee.) E.g., @code{"\x1B"} is a
+string containing the ASCII ESC (escape) character. (The @samp{\x}
+escape sequence is not in @sc{posix} @code{awk}.)
+
+@item \@var{ddd}
+The character represented by the 1-, 2-, or 3-digit sequence of octal
+digits. Thus, @code{"\033"} is also a string containing the ASCII ESC
+(escape) character.
+
+@item \@var{c}
+The literal character @var{c}.
+@end table
+
+The escape sequences may also be used inside constant regular expressions
+(e.g., the regexp @code{@w{/[@ \t\f\n\r\v]/}} matches whitespace
+characters).@refill
+
+@xref{Constants, ,Constant Expressions}.
+
+@node Functions Summary, Historical Features, Rules Summary, Gawk Summary
+@appendixsec Functions
+
+Functions in @code{awk} are defined as follows:
+
+@example
+function @var{name}(@var{parameter list}) @{ @var{statements} @}
+@end example
+
+Actual parameters supplied in the function call are used to instantiate
+the formal parameters declared in the function. Arrays are passed by
+reference, other variables are passed by value.
+
+If there are fewer arguments passed than there are names in @var{parameter-list},
+the extra names are given the null string as value. Extra names have the
+effect of local variables.
+
+The open-parenthesis in a function call of a user-defined function must
+immediately follow the function name, without any intervening white space.
+This is to avoid a syntactic ambiguity with the concatenation operator.
+
+The word @code{func} may be used in place of @code{function} (but not in
+@sc{posix} @code{awk}).
+
+Use the @code{return} statement to return a value from a function.
+
+@xref{User-defined, ,User-defined Functions}, for a more complete description.
+
+@node Historical Features, , Functions Summary, Gawk Summary
+@appendixsec Historical Features
+
+There are two features of historical @code{awk} implementations that
+@code{gawk} supports. First, it is possible to call the @code{length}
+built-in function not only with no arguments, but even without parentheses!
+
+@example
+a = length
+@end example
+
+@noindent
+is the same as either of
+
+@example
+a = length()
+a = length($0)
+@end example
+
+@noindent
+This feature is marked as ``deprecated'' in the @sc{posix} standard, and
+@code{gawk} will issue a warning about its use if @samp{-W lint} is
+specified on the command line.
+
+The other feature is the use of the @code{continue} statement outside the
+body of a @code{while}, @code{for}, or @code{do} loop. Traditional
+@code{awk} implementations have treated such usage as equivalent to the
+@code{next} statement. @code{gawk} will support this usage if @samp{-W posix}
+has not been specified.
+
+@node Sample Program, Bugs, Gawk Summary, Top
+@appendix Sample Program
+
+The following example is a complete @code{awk} program, which prints
+the number of occurrences of each word in its input. It illustrates the
+associative nature of @code{awk} arrays by using strings as subscripts. It
+also demonstrates the @samp{for @var{x} in @var{array}} construction.
+Finally, it shows how @code{awk} can be used in conjunction with other
+utility programs to do a useful task of some complexity with a minimum of
+effort. Some explanations follow the program listing.@refill
+
+@example
+awk '
+# Print list of word frequencies
+@{
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+@}
+
+END @{
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+@}'
+@end example
+
+The first thing to notice about this program is that it has two rules. The
+first rule, because it has an empty pattern, is executed on every line of
+the input. It uses @code{awk}'s field-accessing mechanism
+(@pxref{Fields, ,Examining Fields}) to pick out the individual words from
+the line, and the built-in variable @code{NF} (@pxref{Built-in Variables})
+to know how many fields are available.@refill
+
+For each input word, an element of the array @code{freq} is incremented to
+reflect that the word has been seen an additional time.@refill
+
+The second rule, because it has the pattern @code{END}, is not executed
+until the input has been exhausted. It prints out the contents of the
+@code{freq} table that has been built up inside the first action.@refill
+
+Note that this program has several problems that would prevent it from being
+useful by itself on real text files:@refill
+
+@itemize @bullet
+@item
+Words are detected using the @code{awk} convention that fields are
+separated by whitespace and that other characters in the input (except
+newlines) don't have any special meaning to @code{awk}. This means that
+punctuation characters count as part of words.@refill
+
+@item
+The @code{awk} language considers upper and lower case characters to be
+distinct. Therefore, @samp{foo} and @samp{Foo} are not treated by this
+program as the same word. This is undesirable since in normal text, words
+are capitalized if they begin sentences, and a frequency analyzer should not
+be sensitive to that.@refill
+
+@item
+The output does not come out in any useful order. You're more likely to be
+interested in which words occur most frequently, or having an alphabetized
+table of how frequently each word occurs.@refill
+@end itemize
+
+The way to solve these problems is to use some of the more advanced
+features of the @code{awk} language. First, we use @code{tolower} to remove
+case distinctions. Next, we use @code{gsub} to remove punctuation
+characters. Finally, we use the system @code{sort} utility to process the
+output of the @code{awk} script. First, here is the new version of
+the program:@refill
+
+@example
+awk '
+# Print list of word frequencies
+@{
+ $0 = tolower($0) # remove case distinctions
+ gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+@}
+
+END @{
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+@}'
+@end example
+
+Assuming we have saved this program in a file named @file{frequency.awk},
+and that the data is in @file{file1}, the following pipeline
+
+@example
+awk -f frequency.awk file1 | sort +1 -nr
+@end example
+
+@noindent
+produces a table of the words appearing in @file{file1} in order of
+decreasing frequency.
+
+The @code{awk} program suitably massages the data and produces a word
+frequency table, which is not ordered.
+
+The @code{awk} script's output is then sorted by the @code{sort} command and
+printed on the terminal. The options given to @code{sort} in this example
+specify to sort using the second field of each input line (skipping one field),
+that the sort keys should be treated as numeric quantities (otherwise
+@samp{15} would come before @samp{5}), and that the sorting should be done
+in descending (reverse) order.@refill
+
+We could have even done the @code{sort} from within the program, by
+changing the @code{END} action to:
+
+@example
+END @{
+ sort = "sort +1 -nr"
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word] | sort
+ close(sort)
+@}'
+@end example
+
+See the general operating system documentation for more information on how
+to use the @code{sort} command.@refill
+
+@ignore
+@strong{ADR: I have some more substantial programs courtesy of Rick Adams
+at UUNET. I am planning on incorporating those either in addition to or
+instead of this program.}
+
+@strong{I would also like to incorporate the general @code{translate}
+function that I have written.}
+
+@strong{I have a ton of other sample programs to include too.}
+@end ignore
+
+@node Bugs, Notes, Sample Program, Top
+@appendix Reporting Problems and Bugs
+
+@c This chapter stolen shamelessly from the GNU m4 manual.
+@c This chapter has been unshamelessly altered to emulate changes made to
+@c make.texi from whence it was originally shamelessly stolen! :-} --mew
+
+If you have problems with @code{gawk} or think that you have found a bug,
+please report it to the developers; we cannot promise to do anything
+but we might well want to fix it.
+
+Before reporting a bug, make sure you have actually found a real bug.
+Carefully reread the documentation and see if it really says you can do
+what you're trying to do. If it's not clear whether you should be able
+to do something or not, report that too; it's a bug in the documentation!
+
+Before reporting a bug or trying to fix it yourself, try to isolate it
+to the smallest possible @code{awk} program and input data file that
+reproduces the problem. Then send us the program and data file,
+some idea of what kind of Unix system you're using, and the exact results
+@code{gawk} gave you. Also say what you expected to occur; this will help
+us decide whether the problem was really in the documentation.
+
+Once you have a precise problem, send e-mail to (Internet)
+@samp{bug-gnu-utils@@prep.ai.mit.edu} or (UUCP)
+@samp{mit-eddie!prep.ai.mit.edu!bug-gnu-utils}. Please include the
+version number of @code{gawk} you are using. You can get this information
+with the command @samp{gawk -W version '@{@}' /dev/null}.
+You should send carbon copies of your mail to David Trueman at
+@samp{david@@cs.dal.ca}, and to Arnold Robbins, who can be reached at
+@samp{arnold@@skeeve.atl.ga.us}. David is most likely to fix code
+problems, while Arnold is most likely to fix documentation problems.@refill
+
+Non-bug suggestions are always welcome as well. If you have questions
+about things that are unclear in the documentation or are just obscure
+features, ask Arnold Robbins; he will try to help you out, although he
+may not have the time to fix the problem. You can send him electronic mail at the Internet address
+above.
+
+If you find bugs in one of the non-Unix ports of @code{gawk}, please send
+an electronic mail message to the person who maintains that port. They
+are listed below, and also in the @file{README} file in the @code{gawk}
+distribution. Information in the @code{README} file should be considered
+authoritative if it conflicts with this manual.
+
+The people maintaining the non-Unix ports of @code{gawk} are:
+
+@table @asis
+@item MS-DOS
+The port to MS-DOS is maintained by Scott Deifik.
+His electronic mail address is @samp{scottd@@amgen.com}.
+
+@item VMS
+The port to VAX VMS is maintained by Pat Rankin.
+His electronic mail address is @samp{rankin@@eql.caltech.edu}.
+
+@item Atari ST
+The port to the Atari ST is maintained by Michal Jaegermann.
+His electronic mail address is @samp{ntomczak@@vm.ucs.ualberta.ca}.
+
+@end table
+
+If your bug is also reproducible under Unix, please send copies of your
+report to the general GNU bug list, as well as to Arnold Robbins and David
+Trueman, at the addresses listed above.
+
+@node Notes, Glossary, Bugs, Top
+@appendix Implementation Notes
+
+This appendix contains information mainly of interest to implementors and
+maintainers of @code{gawk}. Everything in it applies specifically to
+@code{gawk}, and not to other implementations.
+
+@menu
+* Compatibility Mode:: How to disable certain @code{gawk} extensions.
+* Future Extensions:: New features we may implement soon.
+* Improvements:: Suggestions for improvements by volunteers.
+@end menu
+
+@node Compatibility Mode, Future Extensions, Notes, Notes
+@appendixsec Downward Compatibility and Debugging
+
+@xref{POSIX/GNU, ,Extensions in @code{gawk} not in POSIX @code{awk}},
+for a summary of the GNU extensions to the @code{awk} language and program.
+All of these features can be turned off by invoking @code{gawk} with the
+@samp{-W compat} option, or with the @samp{-W posix} option.@refill
+
+If @code{gawk} is compiled for debugging with @samp{-DDEBUG}, then there
+is one more option available on the command line:
+
+@table @samp
+@item -W parsedebug
+Print out the parse stack information as the program is being parsed.
+@end table
+
+This option is intended only for serious @code{gawk} developers,
+and not for the casual user. It probably has not even been compiled into
+your version of @code{gawk}, since it slows down execution.
+
+@node Future Extensions, Improvements, Compatibility Mode, Notes
+@appendixsec Probable Future Extensions
+
+This section briefly lists extensions that indicate the directions we are
+currently considering for @code{gawk}. The file @file{FUTURES} in the
+@code{gawk} distributions lists these extensions, as well as several others.
+
+@table @asis
+@item @code{RS} as a regexp
+The meaning of @code{RS} may be generalized along the lines of @code{FS}.
+
+@item Control of subprocess environment
+Changes made in @code{gawk} to the array @code{ENVIRON} may be
+propagated to subprocesses run by @code{gawk}.
+
+@item Databases
+It may be possible to map a GDBM/NDBM/SDBM file into an @code{awk} array.
+
+@item Single-character fields
+The null string, @code{""}, as a field separator, will cause field
+splitting and the @code{split} function to separate individual characters.
+Thus, @code{split(a, "abcd", "")} would yield @code{a[1] == "a"},
+@code{a[2] == "b"}, and so on.
+
+@item More @code{lint} warnings
+There are more things that could be checked for portability.
+
+@item @code{RECLEN} variable for fixed length records
+Along with @code{FIELDWIDTHS}, this would speed up the processing of
+fixed-length records.
+
+@item @code{RT} variable to hold the record terminator
+It is occasionally useful to have access to the actual string of
+characters that matched the @code{RS} variable. The @code{RT}
+variable would hold these characters.
+
+@item A @code{restart} keyword
+After modifying @code{$0}, @code{restart} would restart the pattern
+matching loop, without reading a new record from the input.
+
+@item A @samp{|&} redirection
+The @samp{|&} redirection, in place of @samp{|}, would open a two-way
+pipeline for communication with a sub-process (via @code{getline} and
+@code{print} and @code{printf}).
+
+@item @code{IGNORECASE} affecting all comparisons
+The effects of the @code{IGNORECASE} variable may be generalized to
+all string comparisons, and not just regular expression operations.
+
+@item A way to mix command line source code and library files
+There may be a new option that would make it possible to easily use library
+functions from a program entered on the command line.
+@c probably a @samp{-s} option...
+
+@item GNU-style long options
+We will add GNU-style long options
+to @code{gawk} for compatibility with other GNU programs.
+(For example, @samp{--field-separator=:} would be equivalent to
+@samp{-F:}.)@refill
+
+@c this is @emph{very} long term --- not worth including right now.
+@ignore
+@item The C Comma Operator
+We may add the C comma operator, which takes the form
+@code{@var{expr1},@var{expr2}}. The first expression is evaluated, and the
+result is thrown away. The value of the full expression is the value of
+@var{expr2}.@refill
+@end ignore
+@end table
+
+@node Improvements, , Future Extensions, Notes
+@appendixsec Suggestions for Improvements
+
+Here are some projects that would-be @code{gawk} hackers might like to take
+on. They vary in size from a few days to a few weeks of programming,
+depending on which one you choose and how fast a programmer you are. Please
+send any improvements you write to the maintainers at the GNU
+project.@refill
+
+@enumerate
+@item
+Compilation of @code{awk} programs: @code{gawk} uses a Bison (YACC-like)
+parser to convert the script given it into a syntax tree; the syntax
+tree is then executed by a simple recursive evaluator. This method incurs
+a lot of overhead, since the recursive evaluator performs many procedure
+calls to do even the simplest things.@refill
+
+It should be possible for @code{gawk} to convert the script's parse tree
+into a C program which the user would then compile, using the normal
+C compiler and a special @code{gawk} library to provide all the needed
+functions (regexps, fields, associative arrays, type coercion, and so
+on).@refill
+
+An easier possibility might be for an intermediate phase of @code{awk} to
+convert the parse tree into a linear byte code form like the one used
+in GNU Emacs Lisp. The recursive evaluator would then be replaced by
+a straight line byte code interpreter that would be intermediate in speed
+between running a compiled program and doing what @code{gawk} does
+now.@refill
+
+This may actually happen for the 3.0 version of @code{gawk}.
+
+@item
+An error message section has not been included in this version of the
+manual. Perhaps some nice beta testers will document some of the messages
+for the future.
+
+@item
+The programs in the test suite could use documenting in this manual.
+
+@item
+The programs and data files in the manual should be available in
+separate files to facilitate experimentation.
+
+@item
+See the @file{FUTURES} file for more ideas. Contact us if you would
+seriously like to tackle any of the items listed there.
+@end enumerate
+
+@node Glossary, Index, Notes, Top
+@appendix Glossary
+
+@table @asis
+@item Action
+A series of @code{awk} statements attached to a rule. If the rule's
+pattern matches an input record, the @code{awk} language executes the
+rule's action. Actions are always enclosed in curly braces.
+@xref{Actions, ,Overview of Actions}.@refill
+
+@item Amazing @code{awk} Assembler
+Henry Spencer at the University of Toronto wrote a retargetable assembler
+completely as @code{awk} scripts. It is thousands of lines long, including
+machine descriptions for several 8-bit microcomputers.
+@c It is distributed with @code{gawk} (as part of the test suite) and
+It is a good example of a
+program that would have been better written in another language.@refill
+
+@item @sc{ansi}
+The American National Standards Institute. This organization produces
+many standards, among them the standard for the C programming language.
+
+@item Assignment
+An @code{awk} expression that changes the value of some @code{awk}
+variable or data object. An object that you can assign to is called an
+@dfn{lvalue}. @xref{Assignment Ops, ,Assignment Expressions}.@refill
+
+@item @code{awk} Language
+The language in which @code{awk} programs are written.
+
+@item @code{awk} Program
+An @code{awk} program consists of a series of @dfn{patterns} and
+@dfn{actions}, collectively known as @dfn{rules}. For each input record
+given to the program, the program's rules are all processed in turn.
+@code{awk} programs may also contain function definitions.@refill
+
+@item @code{awk} Script
+Another name for an @code{awk} program.
+
+@item Built-in Function
+The @code{awk} language provides built-in functions that perform various
+numerical, time stamp related, and string computations. Examples are
+@code{sqrt} (for the square root of a number) and @code{substr} (for a
+substring of a string). @xref{Built-in, ,Built-in Functions}.@refill
+
+@item Built-in Variable
+@code{ARGC}, @code{ARGIND}, @code{ARGV}, @code{CONVFMT}, @code{ENVIRON},
+@code{ERRNO}, @code{FIELDWIDTHS}, @code{FILENAME}, @code{FNR}, @code{FS},
+@code{IGNORECASE}, @code{NF}, @code{NR}, @code{OFMT}, @code{OFS}, @code{ORS},
+@code{RLENGTH}, @code{RSTART}, @code{RS}, and @code{SUBSEP},
+are the variables that have special
+meaning to @code{awk}. Changing some of them affects @code{awk}'s running
+environment. @xref{Built-in Variables}.@refill
+
+@item Braces
+See ``Curly Braces.''
+
+@item C
+The system programming language that most GNU software is written in. The
+@code{awk} programming language has C-like syntax, and this manual
+points out similarities between @code{awk} and C when appropriate.@refill
+
+@item CHEM
+A preprocessor for @code{pic} that reads descriptions of molecules
+and produces @code{pic} input for drawing them. It was written by
+Brian Kernighan, and is available from @code{netlib@@research.att.com}.@refill
+
+@item Compound Statement
+A series of @code{awk} statements, enclosed in curly braces. Compound
+statements may be nested.
+@xref{Statements, ,Control Statements in Actions}.@refill
+
+@item Concatenation
+Concatenating two strings means sticking them together, one after another,
+giving a new string. For example, the string @samp{foo} concatenated with
+the string @samp{bar} gives the string @samp{foobar}.
+@xref{Concatenation, ,String Concatenation}.@refill
+
+@item Conditional Expression
+An expression using the @samp{?:} ternary operator, such as
+@code{@var{expr1} ? @var{expr2} : @var{expr3}}. The expression
+@var{expr1} is evaluated; if the result is true, the value of the whole
+expression is the value of @var{expr2} otherwise the value is
+@var{expr3}. In either case, only one of @var{expr2} and @var{expr3}
+is evaluated. @xref{Conditional Exp, ,Conditional Expressions}.@refill
+
+@item Constant Regular Expression
+A constant regular expression is a regular expression written within
+slashes, such as @samp{/foo/}. This regular expression is chosen
+when you write the @code{awk} program, and cannot be changed doing
+its execution. @xref{Regexp Usage, ,How to Use Regular Expressions}.
+
+@item Comparison Expression
+A relation that is either true or false, such as @code{(a < b)}.
+Comparison expressions are used in @code{if}, @code{while}, and @code{for}
+statements, and in patterns to select which input records to process.
+@xref{Comparison Ops, ,Comparison Expressions}.@refill
+
+@item Curly Braces
+The characters @samp{@{} and @samp{@}}. Curly braces are used in
+@code{awk} for delimiting actions, compound statements, and function
+bodies.@refill
+
+@item Data Objects
+These are numbers and strings of characters. Numbers are converted into
+strings and vice versa, as needed.
+@xref{Conversion, ,Conversion of Strings and Numbers}.@refill
+
+@item Dynamic Regular Expression
+A dynamic regular expression is a regular expression written as an
+ordinary expression. It could be a string constant, such as
+@code{"foo"}, but it may also be an expression whose value may vary.
+@xref{Regexp Usage, ,How to Use Regular Expressions}.
+
+@item Escape Sequences
+A special sequence of characters used for describing nonprinting
+characters, such as @samp{\n} for newline, or @samp{\033} for the ASCII
+ESC (escape) character. @xref{Constants, ,Constant Expressions}.
+
+@item Field
+When @code{awk} reads an input record, it splits the record into pieces
+separated by whitespace (or by a separator regexp which you can
+change by setting the built-in variable @code{FS}). Such pieces are
+called fields. If the pieces are of fixed length, you can use the built-in
+variable @code{FIELDWIDTHS} to describe their lengths.
+@xref{Records, ,How Input is Split into Records}.@refill
+
+@item Format
+Format strings are used to control the appearance of output in the
+@code{printf} statement. Also, data conversions from numbers to strings
+are controlled by the format string contained in the built-in variable
+@code{CONVFMT}. @xref{Control Letters, ,Format-Control Letters}.@refill
+
+@item Function
+A specialized group of statements often used to encapsulate general
+or program-specific tasks. @code{awk} has a number of built-in
+functions, and also allows you to define your own.
+@xref{Built-in, ,Built-in Functions}.
+Also, see @ref{User-defined, ,User-defined Functions}.@refill
+
+@item @code{gawk}
+The GNU implementation of @code{awk}.
+
+@item GNU
+``GNU's not Unix''. An on-going project of the Free Software Foundation
+to create a complete, freely distributable, @sc{posix}-compliant computing
+environment.
+
+@item Input Record
+A single chunk of data read in by @code{awk}. Usually, an @code{awk} input
+record consists of one line of text.
+@xref{Records, ,How Input is Split into Records}.@refill
+
+@item Keyword
+In the @code{awk} language, a keyword is a word that has special
+meaning. Keywords are reserved and may not be used as variable names.
+
+@code{awk}'s keywords are:
+@code{if},
+@code{else},
+@code{while},
+@code{do@dots{}while},
+@code{for},
+@code{for@dots{}in},
+@code{break},
+@code{continue},
+@code{delete},
+@code{next},
+@code{function},
+@code{func},
+and @code{exit}.@refill
+
+@item Lvalue
+An expression that can appear on the left side of an assignment
+operator. In most languages, lvalues can be variables or array
+elements. In @code{awk}, a field designator can also be used as an
+lvalue.@refill
+
+@item Number
+A numeric valued data object. The @code{gawk} implementation uses double
+precision floating point to represent numbers.@refill
+
+@item Pattern
+Patterns tell @code{awk} which input records are interesting to which
+rules.
+
+A pattern is an arbitrary conditional expression against which input is
+tested. If the condition is satisfied, the pattern is said to @dfn{match}
+the input record. A typical pattern might compare the input record against
+a regular expression. @xref{Patterns}.@refill
+
+@item @sc{posix}
+The name for a series of standards being developed by the @sc{ieee}
+that specify a Portable Operating System interface. The ``IX'' denotes
+the Unix heritage of these standards. The main standard of interest for
+@code{awk} users is P1003.2, the Command Language and Utilities standard.
+
+@item Range (of input lines)
+A sequence of consecutive lines from the input file. A pattern
+can specify ranges of input lines for @code{awk} to process, or it can
+specify single lines. @xref{Patterns}.@refill
+
+@item Recursion
+When a function calls itself, either directly or indirectly.
+If this isn't clear, refer to the entry for ``recursion.''
+
+@item Redirection
+Redirection means performing input from other than the standard input
+stream, or output to other than the standard output stream.
+
+You can redirect the output of the @code{print} and @code{printf} statements
+to a file or a system command, using the @samp{>}, @samp{>>}, and @samp{|}
+operators. You can redirect input to the @code{getline} statement using
+the @samp{<} and @samp{|} operators.
+@xref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}.@refill
+
+@item Regular Expression
+See ``regexp.''
+
+@item Regexp
+Short for @dfn{regular expression}. A regexp is a pattern that denotes a
+set of strings, possibly an infinite set. For example, the regexp
+@samp{R.*xp} matches any string starting with the letter @samp{R}
+and ending with the letters @samp{xp}. In @code{awk}, regexps are
+used in patterns and in conditional expressions. Regexps may contain
+escape sequences. @xref{Regexp, ,Regular Expressions as Patterns}.@refill
+
+@item Rule
+A segment of an @code{awk} program, that specifies how to process single
+input records. A rule consists of a @dfn{pattern} and an @dfn{action}.
+@code{awk} reads an input record; then, for each rule, if the input record
+satisfies the rule's pattern, @code{awk} executes the rule's action.
+Otherwise, the rule does nothing for that input record.@refill
+
+@item Side Effect
+A side effect occurs when an expression has an effect aside from merely
+producing a value. Assignment expressions, increment expressions and
+function calls have side effects. @xref{Assignment Ops, ,Assignment Expressions}.
+
+@item Special File
+A file name interpreted internally by @code{gawk}, instead of being handed
+directly to the underlying operating system. For example, @file{/dev/stdin}.
+@xref{Special Files, ,Standard I/O Streams}.
+
+@item Stream Editor
+A program that reads records from an input stream and processes them one
+or more at a time. This is in contrast with batch programs, which may
+expect to read their input files in entirety before starting to do
+anything, and with interactive programs, which require input from the
+user.@refill
+
+@item String
+A datum consisting of a sequence of characters, such as @samp{I am a
+string}. Constant strings are written with double-quotes in the
+@code{awk} language, and may contain escape sequences.
+@xref{Constants, ,Constant Expressions}.
+
+@item Whitespace
+A sequence of blank or tab characters occurring inside an input record or a
+string.@refill
+@end table
+
+@node Index, , Glossary, Top
+@unnumbered Index
+@printindex cp
+
+@summarycontents
+@contents
+@bye
+
+Unresolved Issues:
+------------------
+1. From: ntomczak@vm.ucs.ualberta.ca (Michal Jaegermann)
+ Examples of usage tend to suggest that /../ and ".." delimiters
+ can be used for regular expressions, even if definition is consistently
+ using /../. I am not sure what the real rules are and in particular
+ what of the following is a bug and what is a feature:
+ # This program matches everything
+ '"\(" { print }'
+ # This one complains about mismatched parenthesis
+ '$0 ~ "\(" { print }'
+ # This one behaves in an expected manner
+ '/\(/ { print }'
+ You may also try to use "\(" as an argument to match() to see what
+ will happen.
+
+2. From ADR.
+
+ The posix (and original Unix!) notion of awk values as both number
+ and string values needs to be put into the manual. This involves
+ major and minor rewrites of most of the manual, but should help in
+ clarifying many of the weirder points of the language.
+
+3. From ADR.
+
+ The manual should be reorganized. Expressions should be introduced
+ early, building up to regexps as expressions, and from there to their
+ use as patterns and then in actions. Built-in vars should come earlier
+ in the manual too. The 'expert info' sections marked with comments
+ should get their own sections or subsections with nodes and titles.
+ The manual should be gone over thoroughly for indexing.
+
+4. From ADR.
+
+ Robert J. Chassell points out that awk programs should have some indication
+ of how to use them. It would be useful to perhaps have a "programming
+ style" section of the manual that would include this and other tips.
+
+5. From ADR in response to moraes@uunet.ca
+ (This would make the beginnings of a good "puzzles" section...)
+
+ Date: Mon, 2 Dec 91 10:08:05 EST
+ From: gatech!cc!arnold (Arnold Robbins)
+ To: cs.dal.ca!david, uunet.ca!moraes
+ Subject: redirecting to /dev/stderr
+ Cc: skeeve!arnold, boeing.com!brennan, research.att.com!bwk
+
+ In 2.13.3 the following program no longer dumps core:
+
+ BEGIN { print "hello" > /dev/stderr ; exit(1) }
+
+ Instead, it creates a file named `0' with the word `hello' in it. AWK
+ semantics strikes again. The meaning of the statement is
+
+ print "hello" > (($0 ~ /dev/) stderr)
+
+ /dev/ tests $0 for the pattern `dev'. This yields a 0. The variable stderr,
+ having never been used, has a null string in it. The concatenation yields
+ a string value of "0" which is used as the file name. Sigh.
+
+ I think with some more time I can come up with a decent fix, but it will
+ probably only print a diagnostic with -Wlint.
+
+ Arnold
+
diff --git a/gnu/usr.bin/awk/getopt.c b/gnu/usr.bin/awk/getopt.c
new file mode 100644
index 000000000000..bbf345c33ca2
--- /dev/null
+++ b/gnu/usr.bin/awk/getopt.c
@@ -0,0 +1,662 @@
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
+ before changing it!
+
+ Copyright (C) 1987, 88, 89, 90, 91, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifdef GAWK
+#include "config.h"
+#endif
+
+#include <stdio.h>
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#include <string.h>
+#endif /* GNU C library. */
+
+
+#ifndef __STDC__
+#define const
+#endif
+
+/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a
+ long-named option. Because this is not POSIX.2 compliant, it is
+ being phased out. */
+#define GETOPT_COMPAT
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg = 0;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+int optind = 0;
+
+/* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we scan,
+ so that eventually all the non-options are at the end. This allows options
+ to be given in any order, even with programs that were not written to
+ expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were written
+ to expect options and other ARGV-elements in any order and that care about
+ the ordering of the two. We describe each non-option ARGV-element
+ as if it were the argument of an option with character code 1.
+ Using `-' as the first character of the list of option characters
+ selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return EOF with `optind' != ARGC. */
+
+static enum
+{
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+#ifdef __GNU_LIBRARY__
+#include <string.h>
+#define my_index strchr
+#define my_bcopy(src, dst, n) memcpy ((dst), (src), (n))
+#else
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+char *getenv ();
+
+static char *
+my_index (string, chr)
+ char *string;
+ int chr;
+{
+ while (*string)
+ {
+ if (*string == chr)
+ return string;
+ string++;
+ }
+ return 0;
+}
+
+static void
+my_bcopy (from, to, size)
+ char *from, *to;
+ int size;
+{
+ int i;
+ for (i = 0; i < size; i++)
+ to[i] = from[i];
+}
+#endif /* GNU C library. */
+
+/* Handle permutation of arguments. */
+
+/* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first of them;
+ `last_nonopt' is the index after the last of them. */
+
+static int first_nonopt;
+static int last_nonopt;
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+static void
+exchange (argv)
+ char **argv;
+{
+ int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *);
+ char **temp = (char **) malloc (nonopts_size);
+
+ /* Interchange the two blocks of data in ARGV. */
+
+ my_bcopy (&argv[first_nonopt], temp, nonopts_size);
+ my_bcopy (&argv[last_nonopt], &argv[first_nonopt],
+ (optind - last_nonopt) * sizeof (char *));
+ my_bcopy (temp, &argv[first_nonopt + optind - last_nonopt], nonopts_size);
+
+ free(temp);
+
+ /* Update records for the slots the non-options now occupy. */
+
+ first_nonopt += (optind - last_nonopt);
+ last_nonopt = optind;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns `EOF'.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+ const struct option *longopts;
+ int *longind;
+ int long_only;
+{
+ int option_index;
+
+ optarg = 0;
+
+ /* Initialize the internal data when the first call is made.
+ Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ if (optind == 0)
+ {
+ first_nonopt = last_nonopt = optind = 1;
+
+ nextchar = NULL;
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (getenv ("POSIXLY_CORRECT") != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+ }
+
+ if (nextchar == NULL || *nextchar == '\0')
+ {
+ if (ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (last_nonopt != optind)
+ first_nonopt = optind;
+
+ /* Now skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (optind < argc
+ && (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#ifdef GETOPT_COMPAT
+ && (longopts == NULL
+ || argv[optind][0] != '+' || argv[optind][1] == '\0')
+#endif /* GETOPT_COMPAT */
+ )
+ optind++;
+ last_nonopt = optind;
+ }
+
+ /* Special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (optind != argc && !strcmp (argv[optind], "--"))
+ {
+ optind++;
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (first_nonopt == last_nonopt)
+ first_nonopt = optind;
+ last_nonopt = argc;
+
+ optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (first_nonopt != last_nonopt)
+ optind = first_nonopt;
+ return EOF;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if ((argv[optind][0] != '-' || argv[optind][1] == '\0')
+#ifdef GETOPT_COMPAT
+ && (longopts == NULL
+ || argv[optind][0] != '+' || argv[optind][1] == '\0')
+#endif /* GETOPT_COMPAT */
+ )
+ {
+ if (ordering == REQUIRE_ORDER)
+ return EOF;
+ optarg = argv[optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Start decoding its characters. */
+
+ nextchar = (argv[optind] + 1
+ + (longopts != NULL && argv[optind][1] == '-'));
+ }
+
+ if (longopts != NULL
+ && ((argv[optind][0] == '-'
+ && (argv[optind][1] == '-' || long_only))
+#ifdef GETOPT_COMPAT
+ || argv[optind][0] == '+'
+#endif /* GETOPT_COMPAT */
+ ))
+ {
+ const struct option *p;
+ char *s = nextchar;
+ int exact = 0;
+ int ambig = 0;
+ const struct option *pfound = NULL;
+ int indfound = 0;
+ extern int strncmp();
+
+ while (*s && *s != '=')
+ s++;
+
+ /* Test all options for either exact match or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name;
+ p++, option_index++)
+ if (!strncmp (p->name, nextchar, s - nextchar))
+ {
+ if (s - nextchar == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' is ambiguous\n",
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ optind++;
+ if (*s)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = s + 1;
+ else
+ {
+ if (opterr)
+ {
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ "%s: option `--%s' doesn't allow an argument\n",
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ "%s: option `%c%s' doesn't allow an argument\n",
+ argv[0], argv[optind - 1][0], pfound->name);
+ }
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' requires an argument\n",
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[optind][1] == '-'
+#ifdef GETOPT_COMPAT
+ || argv[optind][0] == '+'
+#endif /* GETOPT_COMPAT */
+ || my_index (optstring, *nextchar) == NULL)
+ {
+ if (opterr)
+ {
+ if (argv[optind][1] == '-')
+ /* --option */
+ fprintf (stderr, "%s: unrecognized option `--%s'\n",
+ argv[0], nextchar);
+ else
+ /* +option or -option */
+ fprintf (stderr, "%s: unrecognized option `%c%s'\n",
+ argv[0], argv[optind][0], nextchar);
+ }
+ nextchar = (char *) "";
+ optind++;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next option-character. */
+
+ {
+ char c = *nextchar++;
+ char *temp = my_index (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*nextchar == '\0')
+ ++optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (opterr)
+ {
+ if (c < 040 || c >= 0177)
+ fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
+ argv[0], c);
+ else
+ fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c);
+ }
+ return '?';
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ optind++;
+ }
+ else
+ optarg = 0;
+ nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `-%c' requires an argument\n",
+ argv[0], c);
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+ nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+getopt (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/gnu/usr.bin/awk/getopt.h b/gnu/usr.bin/awk/getopt.h
new file mode 100644
index 000000000000..de027434f7cb
--- /dev/null
+++ b/gnu/usr.bin/awk/getopt.h
@@ -0,0 +1,128 @@
+/* Declarations for getopt.
+ Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+#if __STDC__
+ const char *name;
+#else
+ char *name;
+#endif
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+enum _argtype
+{
+ no_argument,
+ required_argument,
+ optional_argument
+};
+
+#if __STDC__
+#if defined(__GNU_LIBRARY__)
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* not __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+ const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind);
+
+/* Internal only. Users should not call this directly. */
+extern int _getopt_internal (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind,
+ int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* not __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _GETOPT_H */
diff --git a/gnu/usr.bin/awk/getopt1.c b/gnu/usr.bin/awk/getopt1.c
new file mode 100644
index 000000000000..e2127cd58d42
--- /dev/null
+++ b/gnu/usr.bin/awk/getopt1.c
@@ -0,0 +1,160 @@
+/* Getopt for GNU.
+ Copyright (C) 1987, 88, 89, 90, 91, 1992 Free Software Foundation, Inc.
+
+This file is part of the libiberty library.
+Libiberty is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+Libiberty is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with libiberty; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#ifdef LIBC
+/* For when compiled as part of the GNU C library. */
+#include <ansidecl.h>
+#endif
+
+#include "getopt.h"
+
+#ifndef __STDC__
+#define const
+#endif
+
+#if defined(STDC_HEADERS) || defined(__GNU_LIBRARY__) || defined (LIBC)
+#include <stdlib.h>
+#else /* STDC_HEADERS or __GNU_LIBRARY__ */
+char *getenv ();
+#endif /* STDC_HEADERS or __GNU_LIBRARY__ */
+
+#if !defined (NULL)
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/gnu/usr.bin/awk/io.c b/gnu/usr.bin/awk/io.c
new file mode 100644
index 000000000000..7004aedd519d
--- /dev/null
+++ b/gnu/usr.bin/awk/io.c
@@ -0,0 +1,1207 @@
+/*
+ * io.c --- routines for dealing with input and output and records
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+#ifndef O_RDONLY
+#include <fcntl.h>
+#endif
+
+#if !defined(S_ISDIR) && defined(S_IFDIR)
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#endif
+
+#ifndef atarist
+#define INVALID_HANDLE (-1)
+#else
+#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
+#endif
+
+#if defined(MSDOS) || defined(atarist)
+#define PIPES_SIMULATED
+#endif
+
+static IOBUF *nextfile P((int skipping));
+static int inrec P((IOBUF *iop));
+static int iop_close P((IOBUF *iop));
+struct redirect *redirect P((NODE *tree, int *errflg));
+static void close_one P((void));
+static int close_redir P((struct redirect *rp));
+#ifndef PIPES_SIMULATED
+static int wait_any P((int interesting));
+#endif
+static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
+static IOBUF *iop_open P((char *file, char *how));
+static int gawk_pclose P((struct redirect *rp));
+static int do_pathopen P((char *file));
+
+extern FILE *fdopen();
+extern FILE *popen();
+
+static struct redirect *red_head = NULL;
+
+extern int output_is_tty;
+extern NODE *ARGC_node;
+extern NODE *ARGV_node;
+extern NODE *ARGIND_node;
+extern NODE *ERRNO_node;
+extern NODE **fields_arr;
+
+static jmp_buf filebuf; /* for do_nextfile() */
+
+/* do_nextfile --- implement gawk "next file" extension */
+
+void
+do_nextfile()
+{
+ (void) nextfile(1);
+ longjmp(filebuf, 1);
+}
+
+static IOBUF *
+nextfile(skipping)
+int skipping;
+{
+ static int i = 1;
+ static int files = 0;
+ NODE *arg;
+ int fd = INVALID_HANDLE;
+ static IOBUF *curfile = NULL;
+
+ if (skipping) {
+ if (curfile != NULL)
+ iop_close(curfile);
+ curfile = NULL;
+ return NULL;
+ }
+ if (curfile != NULL) {
+ if (curfile->cnt == EOF) {
+ (void) iop_close(curfile);
+ curfile = NULL;
+ } else
+ return curfile;
+ }
+ for (; i < (int) (ARGC_node->lnode->numbr); i++) {
+ arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i));
+ if (arg->stptr[0] == '\0')
+ continue;
+ arg->stptr[arg->stlen] = '\0';
+ if (! do_unix) {
+ ARGIND_node->var_value->numbr = i;
+ ARGIND_node->var_value->flags = NUM|NUMBER;
+ }
+ if (!arg_assign(arg->stptr)) {
+ files++;
+ curfile = iop_open(arg->stptr, "r");
+ if (curfile == NULL)
+ fatal("cannot open file `%s' for reading (%s)",
+ arg->stptr, strerror(errno));
+ /* NOTREACHED */
+ /* This is a kludge. */
+ unref(FILENAME_node->var_value);
+ FILENAME_node->var_value =
+ dupnode(arg);
+ FNR = 0;
+ i++;
+ break;
+ }
+ }
+ if (files == 0) {
+ files++;
+ /* no args. -- use stdin */
+ /* FILENAME is init'ed to "-" */
+ /* FNR is init'ed to 0 */
+ curfile = iop_alloc(fileno(stdin));
+ }
+ return curfile;
+}
+
+void
+set_FNR()
+{
+ FNR = (int) FNR_node->var_value->numbr;
+}
+
+void
+set_NR()
+{
+ NR = (int) NR_node->var_value->numbr;
+}
+
+/*
+ * This reads in a record from the input file
+ */
+static int
+inrec(iop)
+IOBUF *iop;
+{
+ char *begin;
+ register int cnt;
+ int retval = 0;
+
+ cnt = get_a_record(&begin, iop, *RS, NULL);
+ if (cnt == EOF) {
+ cnt = 0;
+ retval = 1;
+ } else {
+ NR += 1;
+ FNR += 1;
+ }
+ set_record(begin, cnt, 1);
+
+ return retval;
+}
+
+static int
+iop_close(iop)
+IOBUF *iop;
+{
+ int ret;
+
+ if (iop == NULL)
+ return 0;
+ errno = 0;
+
+#ifdef _CRAY
+ /* Work around bug in UNICOS popen */
+ if (iop->fd < 3)
+ ret = 0;
+ else
+#endif
+ /* save these for re-use; don't free the storage */
+ if ((iop->flag & IOP_IS_INTERNAL) != 0) {
+ iop->off = iop->buf;
+ iop->end = iop->buf + strlen(iop->buf);
+ iop->cnt = 0;
+ iop->secsiz = 0;
+ return 0;
+ }
+
+ /* Don't close standard files or else crufty code elsewhere will lose */
+ if (iop->fd == fileno(stdin) ||
+ iop->fd == fileno(stdout) ||
+ iop->fd == fileno(stderr))
+ ret = 0;
+ else
+ ret = close(iop->fd);
+ if (ret == -1)
+ warning("close of fd %d failed (%s)", iop->fd, strerror(errno));
+ if ((iop->flag & IOP_NO_FREE) == 0) {
+ /*
+ * be careful -- $0 may still reference the buffer even though
+ * an explicit close is being done; in the future, maybe we
+ * can do this a bit better
+ */
+ if (iop->buf) {
+ if ((fields_arr[0]->stptr >= iop->buf)
+ && (fields_arr[0]->stptr < iop->end)) {
+ NODE *t;
+
+ t = make_string(fields_arr[0]->stptr,
+ fields_arr[0]->stlen);
+ unref(fields_arr[0]);
+ fields_arr [0] = t;
+ reset_record ();
+ }
+ free(iop->buf);
+ }
+ free((char *)iop);
+ }
+ return ret == -1 ? 1 : 0;
+}
+
+void
+do_input()
+{
+ IOBUF *iop;
+ extern int exiting;
+
+ if (setjmp(filebuf) != 0) {
+ }
+ while ((iop = nextfile(0)) != NULL) {
+ if (inrec(iop) == 0)
+ while (interpret(expression_value) && inrec(iop) == 0)
+ ;
+ if (exiting)
+ break;
+ }
+}
+
+/* Redirection for printf and print commands */
+struct redirect *
+redirect(tree, errflg)
+NODE *tree;
+int *errflg;
+{
+ register NODE *tmp;
+ register struct redirect *rp;
+ register char *str;
+ int tflag = 0;
+ int outflag = 0;
+ char *direction = "to";
+ char *mode;
+ int fd;
+ char *what = NULL;
+
+ switch (tree->type) {
+ case Node_redirect_append:
+ tflag = RED_APPEND;
+ /* FALL THROUGH */
+ case Node_redirect_output:
+ outflag = (RED_FILE|RED_WRITE);
+ tflag |= outflag;
+ if (tree->type == Node_redirect_output)
+ what = ">";
+ else
+ what = ">>";
+ break;
+ case Node_redirect_pipe:
+ tflag = (RED_PIPE|RED_WRITE);
+ what = "|";
+ break;
+ case Node_redirect_pipein:
+ tflag = (RED_PIPE|RED_READ);
+ what = "|";
+ break;
+ case Node_redirect_input:
+ tflag = (RED_FILE|RED_READ);
+ what = "<";
+ break;
+ default:
+ fatal ("invalid tree type %d in redirect()", tree->type);
+ break;
+ }
+ tmp = tree_eval(tree->subnode);
+ if (do_lint && ! (tmp->flags & STR))
+ warning("expression in `%s' redirection only has numeric value",
+ what);
+ tmp = force_string(tmp);
+ str = tmp->stptr;
+ if (str == NULL || *str == '\0')
+ fatal("expression for `%s' redirection has null string value",
+ what);
+ if (do_lint
+ && (STREQN(str, "0", tmp->stlen) || STREQN(str, "1", tmp->stlen)))
+ warning("filename `%s' for `%s' redirection may be result of logical expression", str, what);
+ for (rp = red_head; rp != NULL; rp = rp->next)
+ if (strlen(rp->value) == tmp->stlen
+ && STREQN(rp->value, str, tmp->stlen)
+ && ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag
+ || (outflag
+ && (rp->flag & (RED_FILE|RED_WRITE)) == outflag)))
+ break;
+ if (rp == NULL) {
+ emalloc(rp, struct redirect *, sizeof(struct redirect),
+ "redirect");
+ emalloc(str, char *, tmp->stlen+1, "redirect");
+ memcpy(str, tmp->stptr, tmp->stlen);
+ str[tmp->stlen] = '\0';
+ rp->value = str;
+ rp->flag = tflag;
+ rp->fp = NULL;
+ rp->iop = NULL;
+ rp->pid = 0; /* unlikely that we're worried about init */
+ rp->status = 0;
+ /* maintain list in most-recently-used first order */
+ if (red_head)
+ red_head->prev = rp;
+ rp->prev = NULL;
+ rp->next = red_head;
+ red_head = rp;
+ }
+ while (rp->fp == NULL && rp->iop == NULL) {
+ if (rp->flag & RED_EOF)
+ /* encountered EOF on file or pipe -- must be cleared
+ * by explicit close() before reading more
+ */
+ return rp;
+ mode = NULL;
+ errno = 0;
+ switch (tree->type) {
+ case Node_redirect_output:
+ mode = "w";
+ if (rp->flag & RED_USED)
+ mode = "a";
+ break;
+ case Node_redirect_append:
+ mode = "a";
+ break;
+ case Node_redirect_pipe:
+ if ((rp->fp = popen(str, "w")) == NULL)
+ fatal("can't open pipe (\"%s\") for output (%s)",
+ str, strerror(errno));
+ rp->flag |= RED_NOBUF;
+ break;
+ case Node_redirect_pipein:
+ direction = "from";
+ if (gawk_popen(str, rp) == NULL)
+ fatal("can't open pipe (\"%s\") for input (%s)",
+ str, strerror(errno));
+ break;
+ case Node_redirect_input:
+ direction = "from";
+ rp->iop = iop_open(str, "r");
+ break;
+ default:
+ cant_happen();
+ }
+ if (mode != NULL) {
+ fd = devopen(str, mode);
+ if (fd > INVALID_HANDLE) {
+ if (fd == fileno(stdin))
+ rp->fp = stdin;
+ else if (fd == fileno(stdout))
+ rp->fp = stdout;
+ else if (fd == fileno(stderr))
+ rp->fp = stderr;
+ else
+ rp->fp = fdopen(fd, mode);
+ if (isatty(fd))
+ rp->flag |= RED_NOBUF;
+ }
+ }
+ if (rp->fp == NULL && rp->iop == NULL) {
+ /* too many files open -- close one and try again */
+ if (errno == EMFILE)
+ close_one();
+ else {
+ /*
+ * Some other reason for failure.
+ *
+ * On redirection of input from a file,
+ * just return an error, so e.g. getline
+ * can return -1. For output to file,
+ * complain. The shell will complain on
+ * a bad command to a pipe.
+ */
+ *errflg = errno;
+ if (tree->type == Node_redirect_output
+ || tree->type == Node_redirect_append)
+ fatal("can't redirect %s `%s' (%s)",
+ direction, str, strerror(errno));
+ else {
+ free_temp(tmp);
+ return NULL;
+ }
+ }
+ }
+ }
+ free_temp(tmp);
+ return rp;
+}
+
+static void
+close_one()
+{
+ register struct redirect *rp;
+ register struct redirect *rplast = NULL;
+
+ /* go to end of list first, to pick up least recently used entry */
+ for (rp = red_head; rp != NULL; rp = rp->next)
+ rplast = rp;
+ /* now work back up through the list */
+ for (rp = rplast; rp != NULL; rp = rp->prev)
+ if (rp->fp && (rp->flag & RED_FILE)) {
+ rp->flag |= RED_USED;
+ errno = 0;
+ if (fclose(rp->fp))
+ warning("close of \"%s\" failed (%s).",
+ rp->value, strerror(errno));
+ rp->fp = NULL;
+ break;
+ }
+ if (rp == NULL)
+ /* surely this is the only reason ??? */
+ fatal("too many pipes or input files open");
+}
+
+NODE *
+do_close(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ register struct redirect *rp;
+
+ tmp = force_string(tree_eval(tree->subnode));
+ for (rp = red_head; rp != NULL; rp = rp->next) {
+ if (strlen(rp->value) == tmp->stlen
+ && STREQN(rp->value, tmp->stptr, tmp->stlen))
+ break;
+ }
+ free_temp(tmp);
+ if (rp == NULL) /* no match */
+ return tmp_number((AWKNUM) 0.0);
+ fflush(stdout); /* synchronize regular output */
+ tmp = tmp_number((AWKNUM)close_redir(rp));
+ rp = NULL;
+ return tmp;
+}
+
+static int
+close_redir(rp)
+register struct redirect *rp;
+{
+ int status = 0;
+
+ if (rp == NULL)
+ return 0;
+ if (rp->fp == stdout || rp->fp == stderr)
+ return 0;
+ errno = 0;
+ if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE))
+ status = pclose(rp->fp);
+ else if (rp->fp)
+ status = fclose(rp->fp);
+ else if (rp->iop) {
+ if (rp->flag & RED_PIPE)
+ status = gawk_pclose(rp);
+ else {
+ status = iop_close(rp->iop);
+ rp->iop = NULL;
+ }
+ }
+ /* SVR4 awk checks and warns about status of close */
+ if (status) {
+ char *s = strerror(errno);
+
+ warning("failure status (%d) on %s close of \"%s\" (%s).",
+ status,
+ (rp->flag & RED_PIPE) ? "pipe" :
+ "file", rp->value, s);
+
+ if (! do_unix) {
+ /* set ERRNO too so that program can get at it */
+ unref(ERRNO_node->var_value);
+ ERRNO_node->var_value = make_string(s, strlen(s));
+ }
+ }
+ if (rp->next)
+ rp->next->prev = rp->prev;
+ if (rp->prev)
+ rp->prev->next = rp->next;
+ else
+ red_head = rp->next;
+ free(rp->value);
+ free((char *)rp);
+ return status;
+}
+
+int
+flush_io ()
+{
+ register struct redirect *rp;
+ int status = 0;
+
+ errno = 0;
+ if (fflush(stdout)) {
+ warning("error writing standard output (%s).", strerror(errno));
+ status++;
+ }
+ if (fflush(stderr)) {
+ warning("error writing standard error (%s).", strerror(errno));
+ status++;
+ }
+ for (rp = red_head; rp != NULL; rp = rp->next)
+ /* flush both files and pipes, what the heck */
+ if ((rp->flag & RED_WRITE) && rp->fp != NULL) {
+ if (fflush(rp->fp)) {
+ warning("%s flush of \"%s\" failed (%s).",
+ (rp->flag & RED_PIPE) ? "pipe" :
+ "file", rp->value, strerror(errno));
+ status++;
+ }
+ }
+ return status;
+}
+
+int
+close_io ()
+{
+ register struct redirect *rp;
+ register struct redirect *next;
+ int status = 0;
+
+ errno = 0;
+ if (fclose(stdout)) {
+ warning("error writing standard output (%s).", strerror(errno));
+ status++;
+ }
+ if (fclose(stderr)) {
+ warning("error writing standard error (%s).", strerror(errno));
+ status++;
+ }
+ for (rp = red_head; rp != NULL; rp = next) {
+ next = rp->next;
+ if (close_redir(rp))
+ status++;
+ rp = NULL;
+ }
+ return status;
+}
+
+/* str2mode --- convert a string mode to an integer mode */
+
+static int
+str2mode(mode)
+char *mode;
+{
+ int ret;
+
+ switch(mode[0]) {
+ case 'r':
+ ret = O_RDONLY;
+ break;
+
+ case 'w':
+ ret = O_WRONLY|O_CREAT|O_TRUNC;
+ break;
+
+ case 'a':
+ ret = O_WRONLY|O_APPEND|O_CREAT;
+ break;
+ default:
+ cant_happen();
+ }
+ return ret;
+}
+
+/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */
+
+/*
+ * This separate version is still needed for output, since file and pipe
+ * output is done with stdio. iop_open() handles input with IOBUFs of
+ * more "special" files. Those files are not handled here since it makes
+ * no sense to use them for output.
+ */
+
+int
+devopen(name, mode)
+char *name, *mode;
+{
+ int openfd = INVALID_HANDLE;
+ char *cp, *ptr;
+ int flag = 0;
+ struct stat buf;
+ extern double strtod();
+
+ flag = str2mode(mode);
+
+ if (do_unix)
+ goto strictopen;
+
+#ifdef VMS
+ if ((openfd = vms_devopen(name, flag)) >= 0)
+ return openfd;
+#endif /* VMS */
+
+ if (STREQ(name, "-"))
+ openfd = fileno(stdin);
+ else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
+ cp = name + 5;
+
+ if (STREQ(cp, "stdin") && (flag & O_RDONLY) == O_RDONLY)
+ openfd = fileno(stdin);
+ else if (STREQ(cp, "stdout") && (flag & O_WRONLY) == O_WRONLY)
+ openfd = fileno(stdout);
+ else if (STREQ(cp, "stderr") && (flag & O_WRONLY) == O_WRONLY)
+ openfd = fileno(stderr);
+ else if (STREQN(cp, "fd/", 3)) {
+ cp += 3;
+ openfd = (int)strtod(cp, &ptr);
+ if (openfd <= INVALID_HANDLE || ptr == cp)
+ openfd = INVALID_HANDLE;
+ }
+ }
+
+strictopen:
+ if (openfd == INVALID_HANDLE)
+ openfd = open(name, flag, 0666);
+ if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
+ if (S_ISDIR(buf.st_mode))
+ fatal("file `%s' is a directory", name);
+ return openfd;
+}
+
+
+/* spec_setup --- setup an IOBUF for a special internal file */
+
+void
+spec_setup(iop, len, allocate)
+IOBUF *iop;
+int len;
+int allocate;
+{
+ char *cp;
+
+ if (allocate) {
+ emalloc(cp, char *, len+2, "spec_setup");
+ iop->buf = cp;
+ } else {
+ len = strlen(iop->buf);
+ iop->buf[len++] = '\n'; /* get_a_record clobbered it */
+ iop->buf[len] = '\0'; /* just in case */
+ }
+ iop->off = iop->buf;
+ iop->cnt = 0;
+ iop->secsiz = 0;
+ iop->size = len;
+ iop->end = iop->buf + len;
+ iop->fd = -1;
+ iop->flag = IOP_IS_INTERNAL;
+}
+
+/* specfdopen --- open a fd special file */
+
+int
+specfdopen(iop, name, mode)
+IOBUF *iop;
+char *name, *mode;
+{
+ int fd;
+ IOBUF *tp;
+
+ fd = devopen(name, mode);
+ if (fd == INVALID_HANDLE)
+ return INVALID_HANDLE;
+ tp = iop_alloc(fd);
+ if (tp == NULL)
+ return INVALID_HANDLE;
+ *iop = *tp;
+ iop->flag |= IOP_NO_FREE;
+ free(tp);
+ return 0;
+}
+
+/* pidopen --- "open" /dev/pid, /dev/ppid, and /dev/pgrpid */
+
+int
+pidopen(iop, name, mode)
+IOBUF *iop;
+char *name, *mode;
+{
+ char tbuf[BUFSIZ];
+ int i;
+
+ if (name[6] == 'g')
+/* following #if will improve in 2.16 */
+#if defined(__svr4__) || defined(i860) || defined(_AIX) || defined(BSD4_4) || defined(__386BSD__)
+ sprintf(tbuf, "%d\n", getpgrp());
+#else
+ sprintf(tbuf, "%d\n", getpgrp(getpid()));
+#endif
+ else if (name[6] == 'i')
+ sprintf(tbuf, "%d\n", getpid());
+ else
+ sprintf(tbuf, "%d\n", getppid());
+ i = strlen(tbuf);
+ spec_setup(iop, i, 1);
+ strcpy(iop->buf, tbuf);
+ return 0;
+}
+
+/* useropen --- "open" /dev/user */
+
+/*
+ * /dev/user creates a record as follows:
+ * $1 = getuid()
+ * $2 = geteuid()
+ * $3 = getgid()
+ * $4 = getegid()
+ * If multiple groups are supported, the $5 through $NF are the
+ * supplementary group set.
+ */
+
+int
+useropen(iop, name, mode)
+IOBUF *iop;
+char *name, *mode;
+{
+ char tbuf[BUFSIZ], *cp;
+ int i;
+#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
+ int groupset[NGROUPS_MAX];
+ int ngroups;
+#endif
+
+ sprintf(tbuf, "%d %d %d %d", getuid(), geteuid(), getgid(), getegid());
+
+ cp = tbuf + strlen(tbuf);
+#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
+ ngroups = getgroups(NGROUPS_MAX, groupset);
+ if (ngroups == -1)
+ fatal("could not find groups: %s", strerror(errno));
+
+ for (i = 0; i < ngroups; i++) {
+ *cp++ = ' ';
+ sprintf(cp, "%d", groupset[i]);
+ cp += strlen(cp);
+ }
+#endif
+ *cp++ = '\n';
+ *cp++ = '\0';
+
+
+ i = strlen(tbuf);
+ spec_setup(iop, i, 1);
+ strcpy(iop->buf, tbuf);
+ return 0;
+}
+
+/* iop_open --- handle special and regular files for input */
+
+static IOBUF *
+iop_open(name, mode)
+char *name, *mode;
+{
+ int openfd = INVALID_HANDLE;
+ char *cp, *ptr;
+ int flag = 0;
+ int i;
+ struct stat buf;
+ IOBUF *iop;
+ static struct internal {
+ char *name;
+ int compare;
+ int (*fp)();
+ IOBUF iob;
+ } table[] = {
+ { "/dev/fd/", 8, specfdopen },
+ { "/dev/stdin", 10, specfdopen },
+ { "/dev/stdout", 11, specfdopen },
+ { "/dev/stderr", 11, specfdopen },
+ { "/dev/pid", 8, pidopen },
+ { "/dev/ppid", 9, pidopen },
+ { "/dev/pgrpid", 11, pidopen },
+ { "/dev/user", 9, useropen },
+ };
+ int devcount = sizeof(table) / sizeof(table[0]);
+
+ flag = str2mode(mode);
+
+ if (do_unix)
+ goto strictopen;
+
+ if (STREQ(name, "-"))
+ openfd = fileno(stdin);
+ else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
+ int i;
+
+ for (i = 0; i < devcount; i++) {
+ if (STREQN(name, table[i].name, table[i].compare)) {
+ IOBUF *iop = & table[i].iob;
+
+ if (iop->buf != NULL) {
+ spec_setup(iop, 0, 0);
+ return iop;
+ } else if ((*table[i].fp)(iop, name, mode) == 0)
+ return iop;
+ else {
+ warning("could not open %s, mode `%s'",
+ name, mode);
+ return NULL;
+ }
+ }
+ }
+ }
+
+strictopen:
+ if (openfd == INVALID_HANDLE)
+ openfd = open(name, flag, 0666);
+ if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
+ if ((buf.st_mode & S_IFMT) == S_IFDIR)
+ fatal("file `%s' is a directory", name);
+ iop = iop_alloc(openfd);
+ return iop;
+}
+
+#ifndef PIPES_SIMULATED
+ /* real pipes */
+static int
+wait_any(interesting)
+int interesting; /* pid of interest, if any */
+{
+ SIGTYPE (*hstat)(), (*istat)(), (*qstat)();
+ int pid;
+ int status = 0;
+ struct redirect *redp;
+ extern int errno;
+
+ hstat = signal(SIGHUP, SIG_IGN);
+ istat = signal(SIGINT, SIG_IGN);
+ qstat = signal(SIGQUIT, SIG_IGN);
+ for (;;) {
+#ifdef NeXT
+ pid = wait((union wait *)&status);
+#else
+ pid = wait(&status);
+#endif /* NeXT */
+ if (interesting && pid == interesting) {
+ break;
+ } else if (pid != -1) {
+ for (redp = red_head; redp != NULL; redp = redp->next)
+ if (pid == redp->pid) {
+ redp->pid = -1;
+ redp->status = status;
+ if (redp->fp) {
+ pclose(redp->fp);
+ redp->fp = 0;
+ }
+ if (redp->iop) {
+ (void) iop_close(redp->iop);
+ redp->iop = 0;
+ }
+ break;
+ }
+ }
+ if (pid == -1 && errno == ECHILD)
+ break;
+ }
+ signal(SIGHUP, hstat);
+ signal(SIGINT, istat);
+ signal(SIGQUIT, qstat);
+ return(status);
+}
+
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+ int p[2];
+ register int pid;
+
+ /* used to wait for any children to synchronize input and output,
+ * but this could cause gawk to hang when it is started in a pipeline
+ * and thus has a child process feeding it input (shell dependant)
+ */
+ /*(void) wait_any(0);*/ /* wait for outstanding processes */
+
+ if (pipe(p) < 0)
+ fatal("cannot open pipe \"%s\" (%s)", cmd, strerror(errno));
+ if ((pid = fork()) == 0) {
+ if (close(1) == -1)
+ fatal("close of stdout in child failed (%s)",
+ strerror(errno));
+ if (dup(p[1]) != 1)
+ fatal("dup of pipe failed (%s)", strerror(errno));
+ if (close(p[0]) == -1 || close(p[1]) == -1)
+ fatal("close of pipe failed (%s)", strerror(errno));
+ if (close(0) == -1)
+ fatal("close of stdin in child failed (%s)",
+ strerror(errno));
+ execl("/bin/sh", "sh", "-c", cmd, 0);
+ _exit(127);
+ }
+ if (pid == -1)
+ fatal("cannot fork for \"%s\" (%s)", cmd, strerror(errno));
+ rp->pid = pid;
+ if (close(p[1]) == -1)
+ fatal("close of pipe failed (%s)", strerror(errno));
+ return (rp->iop = iop_alloc(p[0]));
+}
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+ (void) iop_close(rp->iop);
+ rp->iop = NULL;
+
+ /* process previously found, return stored status */
+ if (rp->pid == -1)
+ return (rp->status >> 8) & 0xFF;
+ rp->status = wait_any(rp->pid);
+ rp->pid = -1;
+ return (rp->status >> 8) & 0xFF;
+}
+
+#else /* PIPES_SIMULATED */
+ /* use temporary file rather than pipe */
+
+#ifdef VMS
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+ FILE *current;
+
+ if ((current = popen(cmd, "r")) == NULL)
+ return NULL;
+ return (rp->iop = iop_alloc(fileno(current)));
+}
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+ int rval, aval, fd = rp->iop->fd;
+ FILE *kludge = fdopen(fd, "r"); /* pclose needs FILE* w/ right fileno */
+
+ rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */
+ rval = iop_close(rp->iop);
+ rp->iop = NULL;
+ aval = pclose(kludge);
+ return (rval < 0 ? rval : aval);
+}
+#else /* VMS */
+
+static
+struct {
+ char *command;
+ char *name;
+} pipes[_NFILE];
+
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+ extern char *strdup(const char *);
+ int current;
+ char *name;
+ static char cmdbuf[256];
+
+ /* get a name to use. */
+ if ((name = tempnam(".", "pip")) == NULL)
+ return NULL;
+ sprintf(cmdbuf,"%s > %s", cmd, name);
+ system(cmdbuf);
+ if ((current = open(name,O_RDONLY)) == INVALID_HANDLE)
+ return NULL;
+ pipes[current].name = name;
+ pipes[current].command = strdup(cmd);
+ rp->iop = iop_alloc(current);
+ return (rp->iop = iop_alloc(current));
+}
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+ int cur = rp->iop->fd;
+ int rval;
+
+ rval = iop_close(rp->iop);
+ rp->iop = NULL;
+
+ /* check for an open file */
+ if (pipes[cur].name == NULL)
+ return -1;
+ unlink(pipes[cur].name);
+ free(pipes[cur].name);
+ pipes[cur].name = NULL;
+ free(pipes[cur].command);
+ return rval;
+}
+#endif /* VMS */
+
+#endif /* PIPES_SIMULATED */
+
+NODE *
+do_getline(tree)
+NODE *tree;
+{
+ struct redirect *rp = NULL;
+ IOBUF *iop;
+ int cnt = EOF;
+ char *s = NULL;
+ int errcode;
+
+ while (cnt == EOF) {
+ if (tree->rnode == NULL) { /* no redirection */
+ iop = nextfile(0);
+ if (iop == NULL) /* end of input */
+ return tmp_number((AWKNUM) 0.0);
+ } else {
+ int redir_error = 0;
+
+ rp = redirect(tree->rnode, &redir_error);
+ if (rp == NULL && redir_error) { /* failed redirect */
+ if (! do_unix) {
+ char *s = strerror(redir_error);
+
+ unref(ERRNO_node->var_value);
+ ERRNO_node->var_value =
+ make_string(s, strlen(s));
+ }
+ return tmp_number((AWKNUM) -1.0);
+ }
+ iop = rp->iop;
+ if (iop == NULL) /* end of input */
+ return tmp_number((AWKNUM) 0.0);
+ }
+ errcode = 0;
+ cnt = get_a_record(&s, iop, *RS, & errcode);
+ if (! do_unix && errcode != 0) {
+ char *s = strerror(errcode);
+
+ unref(ERRNO_node->var_value);
+ ERRNO_node->var_value = make_string(s, strlen(s));
+ return tmp_number((AWKNUM) -1.0);
+ }
+ if (cnt == EOF) {
+ if (rp) {
+ /*
+ * Don't do iop_close() here if we are
+ * reading from a pipe; otherwise
+ * gawk_pclose will not be called.
+ */
+ if (!(rp->flag & RED_PIPE)) {
+ (void) iop_close(iop);
+ rp->iop = NULL;
+ }
+ rp->flag |= RED_EOF; /* sticky EOF */
+ return tmp_number((AWKNUM) 0.0);
+ } else
+ continue; /* try another file */
+ }
+ if (!rp) {
+ NR += 1;
+ FNR += 1;
+ }
+ if (tree->lnode == NULL) /* no optional var. */
+ set_record(s, cnt, 1);
+ else { /* assignment to variable */
+ Func_ptr after_assign = NULL;
+ NODE **lhs;
+
+ lhs = get_lhs(tree->lnode, &after_assign);
+ unref(*lhs);
+ *lhs = make_string(s, strlen(s));
+ (*lhs)->flags |= MAYBE_NUM;
+ /* we may have to regenerate $0 here! */
+ if (after_assign)
+ (*after_assign)();
+ }
+ }
+ return tmp_number((AWKNUM) 1.0);
+}
+
+int
+pathopen (file)
+char *file;
+{
+ int fd = do_pathopen(file);
+
+#ifdef DEFAULT_FILETYPE
+ if (! do_unix && fd <= INVALID_HANDLE) {
+ char *file_awk;
+ int save = errno;
+#ifdef VMS
+ int vms_save = vaxc$errno;
+#endif
+
+ /* append ".awk" and try again */
+ emalloc(file_awk, char *, strlen(file) +
+ sizeof(DEFAULT_FILETYPE) + 1, "pathopen");
+ sprintf(file_awk, "%s%s", file, DEFAULT_FILETYPE);
+ fd = do_pathopen(file_awk);
+ free(file_awk);
+ if (fd <= INVALID_HANDLE) {
+ errno = save;
+#ifdef VMS
+ vaxc$errno = vms_save;
+#endif
+ }
+ }
+#endif /*DEFAULT_FILETYPE*/
+
+ return fd;
+}
+
+static int
+do_pathopen (file)
+char *file;
+{
+ static char *savepath = DEFPATH; /* defined in config.h */
+ static int first = 1;
+ char *awkpath, *cp;
+ char trypath[BUFSIZ];
+ int fd;
+
+ if (STREQ(file, "-"))
+ return (0);
+
+ if (do_unix)
+ return (devopen(file, "r"));
+
+ if (first) {
+ first = 0;
+ if ((awkpath = getenv ("AWKPATH")) != NULL && *awkpath)
+ savepath = awkpath; /* used for restarting */
+ }
+ awkpath = savepath;
+
+ /* some kind of path name, no search */
+#ifdef VMS /* (strchr not equal implies either or both not NULL) */
+ if (strchr(file, ':') != strchr(file, ']')
+ || strchr(file, '>') != strchr(file, '/'))
+#else /*!VMS*/
+#ifdef MSDOS
+ if (strchr(file, '/') != strchr(file, '\\')
+ || strchr(file, ':') != NULL)
+#else
+ if (strchr(file, '/') != NULL)
+#endif /*MSDOS*/
+#endif /*VMS*/
+ return (devopen(file, "r"));
+
+ do {
+ trypath[0] = '\0';
+ /* this should take into account limits on size of trypath */
+ for (cp = trypath; *awkpath && *awkpath != ENVSEP; )
+ *cp++ = *awkpath++;
+
+ if (cp != trypath) { /* nun-null element in path */
+ /* add directory punctuation only if needed */
+#ifdef VMS
+ if (strchr(":]>/", *(cp-1)) == NULL)
+#else
+#ifdef MSDOS
+ if (strchr(":\\/", *(cp-1)) == NULL)
+#else
+ if (*(cp-1) != '/')
+#endif
+#endif
+ *cp++ = '/';
+ /* append filename */
+ strcpy (cp, file);
+ } else
+ strcpy (trypath, file);
+ if ((fd = devopen(trypath, "r")) >= 0)
+ return (fd);
+
+ /* no luck, keep going */
+ if(*awkpath == ENVSEP && awkpath[1] != '\0')
+ awkpath++; /* skip colon */
+ } while (*awkpath);
+ /*
+ * You might have one of the awk
+ * paths defined, WITHOUT the current working directory in it.
+ * Therefore try to open the file in the current directory.
+ */
+ return (devopen(file, "r"));
+}
diff --git a/gnu/usr.bin/awk/iop.c b/gnu/usr.bin/awk/iop.c
new file mode 100644
index 000000000000..0d7af1213db6
--- /dev/null
+++ b/gnu/usr.bin/awk/iop.c
@@ -0,0 +1,318 @@
+/*
+ * iop.c - do i/o related things.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+#ifndef atarist
+#define INVALID_HANDLE (-1)
+#else
+#include <stddef.h>
+#include <fcntl.h>
+#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
+#endif /* atarist */
+
+
+#ifdef TEST
+int bufsize = 8192;
+
+void
+fatal(s)
+char *s;
+{
+ printf("%s\n", s);
+ exit(1);
+}
+#endif
+
+int
+optimal_bufsize(fd)
+int fd;
+{
+ struct stat stb;
+
+#ifdef VMS
+ /*
+ * These values correspond with the RMS multi-block count used by
+ * vms_open() in vms/vms_misc.c.
+ */
+ if (isatty(fd) > 0)
+ return BUFSIZ;
+ else if (fstat(fd, &stb) < 0)
+ return 8*512; /* conservative in case of DECnet access */
+ else
+ return 24*512;
+
+#else
+ /*
+ * System V doesn't have the file system block size in the
+ * stat structure. So we have to make some sort of reasonable
+ * guess. We use stdio's BUFSIZ, since that is what it was
+ * meant for in the first place.
+ */
+#ifdef BLKSIZE_MISSING
+#define DEFBLKSIZE BUFSIZ
+#else
+#define DEFBLKSIZE (stb.st_blksize ? stb.st_blksize : BUFSIZ)
+#endif
+
+#ifdef TEST
+ return bufsize;
+#else
+#ifndef atarist
+ if (isatty(fd))
+#else
+ /*
+ * On ST redirected stdin does not have a name attached
+ * (this could be hard to do to) and fstat would fail
+ */
+ if (0 == fd || isatty(fd))
+#endif /*atarist */
+ return BUFSIZ;
+#ifndef BLKSIZE_MISSING
+ /* VMS POSIX 1.0: st_blksize is never assigned a value, so zero it */
+ stb.st_blksize = 0;
+#endif
+ if (fstat(fd, &stb) == -1)
+ fatal("can't stat fd %d (%s)", fd, strerror(errno));
+ if (lseek(fd, (off_t)0, 0) == -1)
+ return DEFBLKSIZE;
+ return ((int) (stb.st_size < DEFBLKSIZE ? stb.st_size : DEFBLKSIZE));
+#endif /*! TEST */
+#endif /*! VMS */
+}
+
+IOBUF *
+iop_alloc(fd)
+int fd;
+{
+ IOBUF *iop;
+
+ if (fd == INVALID_HANDLE)
+ return NULL;
+ emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
+ iop->flag = 0;
+ if (isatty(fd))
+ iop->flag |= IOP_IS_TTY;
+ iop->size = optimal_bufsize(fd);
+ iop->secsiz = -2;
+ errno = 0;
+ iop->fd = fd;
+ iop->off = iop->buf = NULL;
+ iop->cnt = 0;
+ return iop;
+}
+
+/*
+ * Get the next record. Uses a "split buffer" where the latter part is
+ * the normal read buffer and the head part is an "overflow" area that is used
+ * when a record spans the end of the normal buffer, in which case the first
+ * part of the record is copied into the overflow area just before the
+ * normal buffer. Thus, the eventual full record can be returned as a
+ * contiguous area of memory with a minimum of copying. The overflow area
+ * is expanded as needed, so that records are unlimited in length.
+ * We also mark both the end of the buffer and the end of the read() with
+ * a sentinel character (the current record separator) so that the inside
+ * loop can run as a single test.
+ */
+int
+get_a_record(out, iop, grRS, errcode)
+char **out;
+IOBUF *iop;
+register int grRS;
+int *errcode;
+{
+ register char *bp = iop->off;
+ char *bufend;
+ char *start = iop->off; /* beginning of record */
+ int saw_newline;
+ char rs;
+ int eat_whitespace;
+
+ if (iop->cnt == EOF) /* previous read hit EOF */
+ return EOF;
+
+ if (grRS == 0) { /* special case: grRS == "" */
+ rs = '\n';
+ eat_whitespace = 0;
+ saw_newline = 0;
+ } else
+ rs = (char) grRS;
+
+ /* set up sentinel */
+ if (iop->buf) {
+ bufend = iop->buf + iop->size + iop->secsiz;
+ *bufend = rs;
+ } else
+ bufend = NULL;
+
+ for (;;) { /* break on end of record, read error or EOF */
+
+ /* Following code is entered on the first call of this routine
+ * for a new iop, or when we scan to the end of the buffer.
+ * In the latter case, we copy the current partial record to
+ * the space preceding the normal read buffer. If necessary,
+ * we expand this space. This is done so that we can return
+ * the record as a contiguous area of memory.
+ */
+ if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) {
+ char *oldbuf = NULL;
+ char *oldsplit = iop->buf + iop->secsiz;
+ long len; /* record length so far */
+
+ if ((iop->flag & IOP_IS_INTERNAL) != 0)
+ cant_happen();
+
+ len = bp - start;
+ if (len > iop->secsiz) {
+ /* expand secondary buffer */
+ if (iop->secsiz == -2)
+ iop->secsiz = 256;
+ while (len > iop->secsiz)
+ iop->secsiz *= 2;
+ oldbuf = iop->buf;
+ emalloc(iop->buf, char *,
+ iop->size+iop->secsiz+2, "get_a_record");
+ bufend = iop->buf + iop->size + iop->secsiz;
+ *bufend = rs;
+ }
+ if (len > 0) {
+ char *newsplit = iop->buf + iop->secsiz;
+
+ if (start < oldsplit) {
+ memcpy(newsplit - len, start,
+ oldsplit - start);
+ memcpy(newsplit - (bp - oldsplit),
+ oldsplit, bp - oldsplit);
+ } else
+ memcpy(newsplit - len, start, len);
+ }
+ bp = iop->end = iop->off = iop->buf + iop->secsiz;
+ start = bp - len;
+ if (oldbuf) {
+ free(oldbuf);
+ oldbuf = NULL;
+ }
+ }
+ /* Following code is entered whenever we have no more data to
+ * scan. In most cases this will read into the beginning of
+ * the main buffer, but in some cases (terminal, pipe etc.)
+ * we may be doing smallish reads into more advanced positions.
+ */
+ if (bp >= iop->end) {
+ if ((iop->flag & IOP_IS_INTERNAL) != 0) {
+ iop->cnt = EOF;
+ break;
+ }
+ iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
+ if (iop->cnt == -1) {
+ if (! do_unix && errcode != NULL) {
+ *errcode = errno;
+ iop->cnt = EOF;
+ break;
+ } else
+ fatal("error reading input: %s",
+ strerror(errno));
+ } else if (iop->cnt == 0) {
+ iop->cnt = EOF;
+ break;
+ }
+ iop->end += iop->cnt;
+ *iop->end = rs;
+ }
+ if (grRS == 0) {
+ extern int default_FS;
+
+ if (default_FS && (bp == start || eat_whitespace)) {
+ while (bp < iop->end && isspace(*bp))
+ bp++;
+ if (bp == iop->end) {
+ eat_whitespace = 1;
+ continue;
+ } else
+ eat_whitespace = 0;
+ }
+ if (saw_newline && *bp == rs) {
+ bp++;
+ break;
+ }
+ saw_newline = 0;
+ }
+
+ while (*bp++ != rs)
+ ;
+
+ if (bp <= iop->end) {
+ if (grRS == 0)
+ saw_newline = 1;
+ else
+ break;
+ } else
+ bp--;
+
+ if ((iop->flag & IOP_IS_INTERNAL) != 0)
+ iop->cnt = bp - start;
+ }
+ if (iop->cnt == EOF
+ && (((iop->flag & IOP_IS_INTERNAL) != 0) || start == bp))
+ return EOF;
+
+ iop->off = bp;
+ bp--;
+ if (*bp != rs)
+ bp++;
+ *bp = '\0';
+ if (grRS == 0) {
+ if (*--bp == rs)
+ *bp = '\0';
+ else
+ bp++;
+ }
+
+ *out = start;
+ return bp - start;
+}
+
+#ifdef TEST
+main(argc, argv)
+int argc;
+char *argv[];
+{
+ IOBUF *iop;
+ char *out;
+ int cnt;
+ char rs[2];
+
+ rs[0] = 0;
+ if (argc > 1)
+ bufsize = atoi(argv[1]);
+ if (argc > 2)
+ rs[0] = *argv[2];
+ iop = iop_alloc(0);
+ while ((cnt = get_a_record(&out, iop, rs[0], NULL)) > 0) {
+ fwrite(out, 1, cnt, stdout);
+ fwrite(rs, 1, 1, stdout);
+ }
+}
+#endif
diff --git a/gnu/usr.bin/awk/main.c b/gnu/usr.bin/awk/main.c
new file mode 100644
index 000000000000..77d0bf74e143
--- /dev/null
+++ b/gnu/usr.bin/awk/main.c
@@ -0,0 +1,731 @@
+/*
+ * main.c -- Expression tree constructors and main program for gawk.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "getopt.h"
+#include "awk.h"
+#include "patchlevel.h"
+
+static void usage P((int exitval));
+static void copyleft P((void));
+static void cmdline_fs P((char *str));
+static void init_args P((int argc0, int argc, char *argv0, char **argv));
+static void init_vars P((void));
+static void pre_assign P((char *v));
+SIGTYPE catchsig P((int sig, int code));
+static void gawk_option P((char *optstr));
+static void nostalgia P((void));
+static void version P((void));
+char *gawk_name P((char *filespec));
+
+#ifdef MSDOS
+extern int isatty P((int));
+#endif
+
+extern void resetup P((void));
+
+/* These nodes store all the special variables AWK uses */
+NODE *FS_node, *NF_node, *RS_node, *NR_node;
+NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node;
+NODE *CONVFMT_node;
+NODE *ERRNO_node;
+NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node;
+NODE *ENVIRON_node, *IGNORECASE_node;
+NODE *ARGC_node, *ARGV_node, *ARGIND_node;
+NODE *FIELDWIDTHS_node;
+
+int NF;
+int NR;
+int FNR;
+int IGNORECASE;
+char *RS;
+char *OFS;
+char *ORS;
+char *OFMT;
+char *CONVFMT;
+
+/*
+ * The parse tree and field nodes are stored here. Parse_end is a dummy item
+ * used to free up unneeded fields without freeing the program being run
+ */
+int errcount = 0; /* error counter, used by yyerror() */
+
+/* The global null string */
+NODE *Nnull_string;
+
+/* The name the program was invoked under, for error messages */
+const char *myname;
+
+/* A block of AWK code to be run before running the program */
+NODE *begin_block = 0;
+
+/* A block of AWK code to be run after the last input file */
+NODE *end_block = 0;
+
+int exiting = 0; /* Was an "exit" statement executed? */
+int exit_val = 0; /* optional exit value */
+
+#if defined(YYDEBUG) || defined(DEBUG)
+extern int yydebug;
+#endif
+
+struct src *srcfiles = NULL; /* source file name(s) */
+int numfiles = -1; /* how many source files */
+
+int do_unix = 0; /* turn off gnu extensions */
+int do_posix = 0; /* turn off gnu and unix extensions */
+int do_lint = 0; /* provide warnings about questionable stuff */
+int do_nostalgia = 0; /* provide a blast from the past */
+
+int in_begin_rule = 0; /* we're in a BEGIN rule */
+int in_end_rule = 0; /* we're in a END rule */
+
+int output_is_tty = 0; /* control flushing of output */
+
+extern char *version_string; /* current version, for printing */
+
+NODE *expression_value;
+
+static struct option optab[] = {
+ { "compat", no_argument, & do_unix, 1 },
+ { "lint", no_argument, & do_lint, 1 },
+ { "posix", no_argument, & do_posix, 1 },
+ { "nostalgia", no_argument, & do_nostalgia, 1 },
+ { "copyleft", no_argument, NULL, 'C' },
+ { "copyright", no_argument, NULL, 'C' },
+ { "field-separator", required_argument, NULL, 'F' },
+ { "file", required_argument, NULL, 'f' },
+ { "assign", required_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' },
+ { "usage", no_argument, NULL, 'u' },
+ { "help", no_argument, NULL, 'u' },
+ { "source", required_argument, NULL, 's' },
+#ifdef DEBUG
+ { "parsedebug", no_argument, NULL, 'D' },
+#endif
+ { 0, 0, 0, 0 }
+};
+
+int
+main(argc, argv)
+int argc;
+char **argv;
+{
+ int c;
+ char *scan;
+ extern int optind;
+ extern int opterr;
+ extern char *optarg;
+ int i;
+
+ (void) signal(SIGFPE, (SIGTYPE (*) P((int))) catchsig);
+ (void) signal(SIGSEGV, (SIGTYPE (*) P((int))) catchsig);
+#ifdef SIGBUS
+ (void) signal(SIGBUS, (SIGTYPE (*) P((int))) catchsig);
+#endif
+
+ myname = gawk_name(argv[0]);
+ argv[0] = (char *)myname;
+#ifdef VMS
+ vms_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
+#endif
+
+ /* remove sccs gunk */
+ if (strncmp(version_string, "@(#)", 4) == 0)
+ version_string += 4;
+
+ if (argc < 2)
+ usage(1);
+
+ /* initialize the null string */
+ Nnull_string = make_string("", 0);
+ Nnull_string->numbr = 0.0;
+ Nnull_string->type = Node_val;
+ Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);
+
+ /* Set up the special variables */
+
+ /*
+ * Note that this must be done BEFORE arg parsing else -F
+ * breaks horribly
+ */
+ init_vars();
+
+ /* worst case */
+ emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main");
+ memset(srcfiles, '\0', argc * sizeof(struct src));
+
+ /* Tell the regex routines how they should work. . . */
+ resetup();
+
+ /* we do error messages ourselves on invalid options */
+ opterr = 0;
+
+ /* the + on the front tells GNU getopt not to rearrange argv */
+ while ((c = getopt_long(argc, argv, "+F:f:v:W:", optab, NULL)) != EOF) {
+ if (do_posix)
+ opterr = 1;
+ switch (c) {
+ case 'F':
+ cmdline_fs(optarg);
+ break;
+
+ case 'f':
+ /*
+ * a la MKS awk, allow multiple -f options.
+ * this makes function libraries real easy.
+ * most of the magic is in the scanner.
+ */
+ /* The following is to allow for whitespace at the end
+ * of a #! /bin/gawk line in an executable file
+ */
+ scan = optarg;
+ while (isspace(*scan))
+ scan++;
+ ++numfiles;
+ srcfiles[numfiles].stype = SOURCEFILE;
+ if (*scan == '\0')
+ srcfiles[numfiles].val = argv[optind++];
+ else
+ srcfiles[numfiles].val = optarg;
+ break;
+
+ case 'v':
+ pre_assign(optarg);
+ break;
+
+ case 'W': /* gawk specific options */
+ gawk_option(optarg);
+ break;
+
+ /* These can only come from long form options */
+ case 'V':
+ version();
+ break;
+
+ case 'C':
+ copyleft();
+ break;
+
+ case 'u':
+ usage(0);
+ break;
+
+ case 's':
+ if (strlen(optarg) == 0)
+ warning("empty argument to --source ignored");
+ else {
+ srcfiles[++numfiles].stype = CMDLINE;
+ srcfiles[numfiles].val = optarg;
+ }
+ break;
+
+#ifdef DEBUG
+ case 'D':
+ yydebug = 2;
+ break;
+#endif
+
+ case '?':
+ default:
+ /*
+ * New behavior. If not posix, an unrecognized
+ * option stops argument processing so that it can
+ * go into ARGV for the awk program to see. This
+ * makes use of ``#! /bin/gawk -f'' easier.
+ */
+ if (! do_posix)
+ goto out;
+ /* else
+ let getopt print error message for us */
+ break;
+ }
+ }
+out:
+
+ if (do_nostalgia)
+ nostalgia();
+
+ /* POSIX compliance also implies no Unix extensions either */
+ if (do_posix)
+ do_unix = 1;
+
+#ifdef DEBUG
+ setbuf(stdout, (char *) NULL); /* make debugging easier */
+#endif
+ if (isatty(fileno(stdout)))
+ output_is_tty = 1;
+ /* No -f or --source options, use next arg */
+ if (numfiles == -1) {
+ if (optind > argc - 1) /* no args left */
+ usage(1);
+ srcfiles[++numfiles].stype = CMDLINE;
+ srcfiles[numfiles].val = argv[optind];
+ optind++;
+ }
+ init_args(optind, argc, (char *) myname, argv);
+ (void) tokexpand();
+
+ /* Read in the program */
+ if (yyparse() || errcount)
+ exit(1);
+
+ /* Set up the field variables */
+ init_fields();
+
+ if (begin_block) {
+ in_begin_rule = 1;
+ (void) interpret(begin_block);
+ }
+ in_begin_rule = 0;
+ if (!exiting && (expression_value || end_block))
+ do_input();
+ if (end_block) {
+ in_end_rule = 1;
+ (void) interpret(end_block);
+ }
+ in_end_rule = 0;
+ if (close_io() != 0 && exit_val == 0)
+ exit_val = 1;
+ exit(exit_val); /* more portable */
+ return exit_val; /* to suppress warnings */
+}
+
+/* usage --- print usage information and exit */
+
+static void
+usage(exitval)
+int exitval;
+{
+ char *opt1 = " -f progfile [--]";
+ char *opt2 = " [--] 'program'";
+ char *regops = " [POSIX or GNU style options]";
+
+ version();
+ fprintf(stderr, "usage: %s%s%s file ...\n %s%s%s file ...\n",
+ myname, regops, opt1, myname, regops, opt2);
+
+ /* GNU long options info. Gack. */
+ fputs("\nPOSIX options:\t\tGNU long options:\n", stderr);
+ fputs("\t-f progfile\t\t--file=progfile\n", stderr);
+ fputs("\t-F fs\t\t\t--field-separator=fs\n", stderr);
+ fputs("\t-v var=val\t\t--assign=var=val\n", stderr);
+ fputs("\t-W compat\t\t--compat\n", stderr);
+ fputs("\t-W copyleft\t\t--copyleft\n", stderr);
+ fputs("\t-W copyright\t\t--copyright\n", stderr);
+ fputs("\t-W help\t\t\t--help\n", stderr);
+ fputs("\t-W lint\t\t\t--lint\n", stderr);
+#if 0
+ fputs("\t-W nostalgia\t\t--nostalgia\n", stderr);
+#endif
+#ifdef DEBUG
+ fputs("\t-W parsedebug\t\t--parsedebug\n", stderr);
+#endif
+ fputs("\t-W posix\t\t--posix\n", stderr);
+ fputs("\t-W source=program-text\t--source=program-text\n", stderr);
+ fputs("\t-W usage\t\t--usage\n", stderr);
+ fputs("\t-W version\t\t--version\n", stderr);
+ exit(exitval);
+}
+
+static void
+copyleft ()
+{
+ static char blurb_part1[] =
+"Copyright (C) 1989, 1991, 1992, Free Software Foundation.\n\
+\n\
+This program is free software; you can redistribute it and/or modify\n\
+it under the terms of the GNU General Public License as published by\n\
+the Free Software Foundation; either version 2 of the License, or\n\
+(at your option) any later version.\n\
+\n";
+ static char blurb_part2[] =
+"This program is distributed in the hope that it will be useful,\n\
+but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
+GNU General Public License for more details.\n\
+\n";
+ static char blurb_part3[] =
+"You should have received a copy of the GNU General Public License\n\
+along with this program; if not, write to the Free Software\n\
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n";
+
+ version();
+ fputs(blurb_part1, stderr);
+ fputs(blurb_part2, stderr);
+ fputs(blurb_part3, stderr);
+ fflush(stderr);
+}
+
+static void
+cmdline_fs(str)
+char *str;
+{
+ register NODE **tmp;
+ int len = strlen(str);
+
+ tmp = get_lhs(FS_node, (Func_ptr *) 0);
+ unref(*tmp);
+ /*
+ * Only if in full compatibility mode check for the stupid special
+ * case so -F\t works as documented in awk even though the shell
+ * hands us -Ft. Bleah!
+ *
+ * Thankfully, Posix didn't propogate this "feature".
+ */
+ if (str[0] == 't' && str[1] == '\0') {
+ if (do_lint)
+ warning("-Ft does not set FS to tab in POSIX awk");
+ if (do_unix && ! do_posix)
+ str[0] = '\t';
+ }
+ *tmp = make_str_node(str, len, SCAN); /* do process escapes */
+ set_FS();
+}
+
+static void
+init_args(argc0, argc, argv0, argv)
+int argc0, argc;
+char *argv0;
+char **argv;
+{
+ int i, j;
+ NODE **aptr;
+
+ ARGV_node = install("ARGV", node(Nnull_string, Node_var, (NODE *)NULL));
+ aptr = assoc_lookup(ARGV_node, tmp_number(0.0));
+ *aptr = make_string(argv0, strlen(argv0));
+ (*aptr)->flags |= MAYBE_NUM;
+ for (i = argc0, j = 1; i < argc; i++) {
+ aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j));
+ *aptr = make_string(argv[i], strlen(argv[i]));
+ (*aptr)->flags |= MAYBE_NUM;
+ j++;
+ }
+ ARGC_node = install("ARGC",
+ node(make_number((AWKNUM) j), Node_var, (NODE *) NULL));
+}
+
+/*
+ * Set all the special variables to their initial values.
+ */
+struct varinit {
+ NODE **spec;
+ char *name;
+ NODETYPE type;
+ char *strval;
+ AWKNUM numval;
+ Func_ptr assign;
+};
+static struct varinit varinit[] = {
+{&NF_node, "NF", Node_NF, 0, -1, set_NF },
+{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, 0 },
+{&NR_node, "NR", Node_NR, 0, 0, set_NR },
+{&FNR_node, "FNR", Node_FNR, 0, 0, set_FNR },
+{&FS_node, "FS", Node_FS, " ", 0, 0 },
+{&RS_node, "RS", Node_RS, "\n", 0, set_RS },
+{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, 0, 0, set_IGNORECASE },
+{&FILENAME_node, "FILENAME", Node_var, "-", 0, 0 },
+{&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS },
+{&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS },
+{&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT },
+{&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT },
+{&RLENGTH_node, "RLENGTH", Node_var, 0, 0, 0 },
+{&RSTART_node, "RSTART", Node_var, 0, 0, 0 },
+{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, 0 },
+{&ARGIND_node, "ARGIND", Node_var, 0, 0, 0 },
+{&ERRNO_node, "ERRNO", Node_var, 0, 0, 0 },
+{0, 0, Node_illegal, 0, 0, 0 },
+};
+
+static void
+init_vars()
+{
+ register struct varinit *vp;
+
+ for (vp = varinit; vp->name; vp++) {
+ *(vp->spec) = install(vp->name,
+ node(vp->strval == 0 ? make_number(vp->numval)
+ : make_string(vp->strval, strlen(vp->strval)),
+ vp->type, (NODE *) NULL));
+ if (vp->assign)
+ (*(vp->assign))();
+ }
+}
+
+void
+load_environ()
+{
+#if !defined(MSDOS) && !(defined(VMS) && defined(__DECC))
+ extern char **environ;
+#endif
+ register char *var, *val;
+ NODE **aptr;
+ register int i;
+
+ ENVIRON_node = install("ENVIRON",
+ node(Nnull_string, Node_var, (NODE *) NULL));
+ for (i = 0; environ[i]; i++) {
+ static char nullstr[] = "";
+
+ var = environ[i];
+ val = strchr(var, '=');
+ if (val)
+ *val++ = '\0';
+ else
+ val = nullstr;
+ aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen (var)));
+ *aptr = make_string(val, strlen (val));
+ (*aptr)->flags |= MAYBE_NUM;
+
+ /* restore '=' so that system() gets a valid environment */
+ if (val != nullstr)
+ *--val = '=';
+ }
+}
+
+/* Process a command-line assignment */
+char *
+arg_assign(arg)
+char *arg;
+{
+ char *cp;
+ Func_ptr after_assign = NULL;
+ NODE *var;
+ NODE *it;
+ NODE **lhs;
+
+ cp = strchr(arg, '=');
+ if (cp != NULL) {
+ *cp++ = '\0';
+ /*
+ * Recent versions of nawk expand escapes inside assignments.
+ * This makes sense, so we do it too.
+ */
+ it = make_str_node(cp, strlen(cp), SCAN);
+ it->flags |= MAYBE_NUM;
+ var = variable(arg, 0);
+ lhs = get_lhs(var, &after_assign);
+ unref(*lhs);
+ *lhs = it;
+ if (after_assign)
+ (*after_assign)();
+ *--cp = '='; /* restore original text of ARGV */
+ }
+ return cp;
+}
+
+static void
+pre_assign(v)
+char *v;
+{
+ if (!arg_assign(v)) {
+ fprintf (stderr,
+ "%s: '%s' argument to -v not in 'var=value' form\n",
+ myname, v);
+ usage(1);
+ }
+}
+
+SIGTYPE
+catchsig(sig, code)
+int sig, code;
+{
+#ifdef lint
+ code = 0; sig = code; code = sig;
+#endif
+ if (sig == SIGFPE) {
+ fatal("floating point exception");
+ } else if (sig == SIGSEGV
+#ifdef SIGBUS
+ || sig == SIGBUS
+#endif
+ ) {
+ msg("fatal error: internal error");
+ /* fatal won't abort() if not compiled for debugging */
+ abort();
+ } else
+ cant_happen();
+ /* NOTREACHED */
+}
+
+/* gawk_option --- do gawk specific things */
+
+static void
+gawk_option(optstr)
+char *optstr;
+{
+ char *cp;
+
+ for (cp = optstr; *cp; cp++) {
+ switch (*cp) {
+ case ' ':
+ case '\t':
+ case ',':
+ break;
+ case 'v':
+ case 'V':
+ /* print version */
+ if (strncasecmp(cp, "version", 7) != 0)
+ goto unknown;
+ else
+ cp += 6;
+ version();
+ break;
+ case 'c':
+ case 'C':
+ if (strncasecmp(cp, "copyright", 9) == 0) {
+ cp += 8;
+ copyleft();
+ } else if (strncasecmp(cp, "copyleft", 8) == 0) {
+ cp += 7;
+ copyleft();
+ } else if (strncasecmp(cp, "compat", 6) == 0) {
+ cp += 5;
+ do_unix = 1;
+ } else
+ goto unknown;
+ break;
+ case 'n':
+ case 'N':
+ /*
+ * Undocumented feature,
+ * inspired by nostalgia, and a T-shirt
+ */
+ if (strncasecmp(cp, "nostalgia", 9) != 0)
+ goto unknown;
+ nostalgia();
+ break;
+ case 'p':
+ case 'P':
+#ifdef DEBUG
+ if (strncasecmp(cp, "parsedebug", 10) == 0) {
+ cp += 9;
+ yydebug = 2;
+ break;
+ }
+#endif
+ if (strncasecmp(cp, "posix", 5) != 0)
+ goto unknown;
+ cp += 4;
+ do_posix = do_unix = 1;
+ break;
+ case 'l':
+ case 'L':
+ if (strncasecmp(cp, "lint", 4) != 0)
+ goto unknown;
+ cp += 3;
+ do_lint = 1;
+ break;
+ case 'H':
+ case 'h':
+ if (strncasecmp(cp, "help", 4) != 0)
+ goto unknown;
+ cp += 3;
+ usage(0);
+ break;
+ case 'U':
+ case 'u':
+ if (strncasecmp(cp, "usage", 5) != 0)
+ goto unknown;
+ cp += 4;
+ usage(0);
+ break;
+ case 's':
+ case 'S':
+ if (strncasecmp(cp, "source=", 7) != 0)
+ goto unknown;
+ cp += 7;
+ if (strlen(cp) == 0)
+ warning("empty argument to -Wsource ignored");
+ else {
+ srcfiles[++numfiles].stype = CMDLINE;
+ srcfiles[numfiles].val = cp;
+ return;
+ }
+ break;
+ default:
+ unknown:
+ fprintf(stderr, "'%c' -- unknown option, ignored\n",
+ *cp);
+ break;
+ }
+ }
+}
+
+/* nostalgia --- print the famous error message and die */
+
+static void
+nostalgia()
+{
+ fprintf(stderr, "awk: bailing out near line 1\n");
+ abort();
+}
+
+/* version --- print version message */
+
+static void
+version()
+{
+ fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL);
+}
+
+/* static */
+char *
+gawk_name(filespec)
+char *filespec;
+{
+ char *p;
+
+#ifdef VMS /* "device:[root.][directory.subdir]GAWK.EXE;n" -> "GAWK" */
+ char *q;
+
+ p = strrchr(filespec, ']'); /* directory punctuation */
+ q = strrchr(filespec, '>'); /* alternate <international> punct */
+
+ if (p == NULL || q > p) p = q;
+ p = strdup(p == NULL ? filespec : (p + 1));
+ if ((q = strrchr(p, '.')) != NULL) *q = '\0'; /* strip .typ;vers */
+
+ return p;
+#endif /*VMS*/
+
+#if defined(MSDOS) || defined(atarist)
+ char *q;
+
+ p = filespec;
+
+ if (q = strrchr(p, '\\'))
+ p = q + 1;
+ if (q = strchr(p, '.'))
+ *q = '\0';
+ strlwr(p);
+
+ return (p == NULL ? filespec : p);
+#endif /* MSDOS || atarist */
+
+ /* "path/name" -> "name" */
+ p = strrchr(filespec, '/');
+ return (p == NULL ? filespec : p + 1);
+}
diff --git a/gnu/usr.bin/awk/msg.c b/gnu/usr.bin/awk/msg.c
new file mode 100644
index 000000000000..b60fe9d1e5e9
--- /dev/null
+++ b/gnu/usr.bin/awk/msg.c
@@ -0,0 +1,106 @@
+/*
+ * msg.c - routines for error messages
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+int sourceline = 0;
+char *source = NULL;
+
+/* VARARGS2 */
+void
+err(s, emsg, argp)
+char *s;
+char *emsg;
+va_list argp;
+{
+ char *file;
+
+ (void) fflush(stdout);
+ (void) fprintf(stderr, "%s: ", myname);
+ if (sourceline) {
+ if (source)
+ (void) fprintf(stderr, "%s:", source);
+ else
+ (void) fprintf(stderr, "cmd. line:");
+
+ (void) fprintf(stderr, "%d: ", sourceline);
+ }
+ if (FNR) {
+ file = FILENAME_node->var_value->stptr;
+ if (file)
+ (void) fprintf(stderr, "(FILENAME=%s ", file);
+ (void) fprintf(stderr, "FNR=%d) ", FNR);
+ }
+ (void) fprintf(stderr, s);
+ vfprintf(stderr, emsg, argp);
+ (void) fprintf(stderr, "\n");
+ (void) fflush(stderr);
+}
+
+/*VARARGS0*/
+void
+msg(va_alist)
+va_dcl
+{
+ va_list args;
+ char *mesg;
+
+ va_start(args);
+ mesg = va_arg(args, char *);
+ err("", mesg, args);
+ va_end(args);
+}
+
+/*VARARGS0*/
+void
+warning(va_alist)
+va_dcl
+{
+ va_list args;
+ char *mesg;
+
+ va_start(args);
+ mesg = va_arg(args, char *);
+ err("warning: ", mesg, args);
+ va_end(args);
+}
+
+/*VARARGS0*/
+void
+fatal(va_alist)
+va_dcl
+{
+ va_list args;
+ char *mesg;
+
+ va_start(args);
+ mesg = va_arg(args, char *);
+ err("fatal: ", mesg, args);
+ va_end(args);
+#ifdef DEBUG
+ abort();
+#endif
+ exit(2);
+}
diff --git a/gnu/usr.bin/awk/node.c b/gnu/usr.bin/awk/node.c
new file mode 100644
index 000000000000..65ecb0ed1723
--- /dev/null
+++ b/gnu/usr.bin/awk/node.c
@@ -0,0 +1,429 @@
+/*
+ * node.c -- routines for node management
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+extern double strtod();
+
+AWKNUM
+r_force_number(n)
+register NODE *n;
+{
+ register char *cp;
+ register char *cpend;
+ char save;
+ char *ptr;
+ unsigned int newflags = 0;
+
+#ifdef DEBUG
+ if (n == NULL)
+ cant_happen();
+ if (n->type != Node_val)
+ cant_happen();
+ if(n->flags == 0)
+ cant_happen();
+ if (n->flags & NUM)
+ return n->numbr;
+#endif
+
+ /* all the conditionals are an attempt to avoid the expensive strtod */
+
+ n->numbr = 0.0;
+ n->flags |= NUM;
+
+ if (n->stlen == 0)
+ return 0.0;
+
+ cp = n->stptr;
+ if (isalpha(*cp))
+ return 0.0;
+
+ cpend = cp + n->stlen;
+ while (cp < cpend && isspace(*cp))
+ cp++;
+ if (cp == cpend || isalpha(*cp))
+ return 0.0;
+
+ if (n->flags & MAYBE_NUM) {
+ newflags = NUMBER;
+ n->flags &= ~MAYBE_NUM;
+ }
+ if (cpend - cp == 1) {
+ if (isdigit(*cp)) {
+ n->numbr = (AWKNUM)(*cp - '0');
+ n->flags |= newflags;
+ }
+ return n->numbr;
+ }
+
+ errno = 0;
+ save = *cpend;
+ *cpend = '\0';
+ n->numbr = (AWKNUM) strtod((const char *)cp, &ptr);
+
+ /* POSIX says trailing space is OK for NUMBER */
+ while (isspace(*ptr))
+ ptr++;
+ *cpend = save;
+ /* the >= should be ==, but for SunOS 3.5 strtod() */
+ if (errno == 0 && ptr >= cpend)
+ n->flags |= newflags;
+ else
+ errno = 0;
+
+ return n->numbr;
+}
+
+/*
+ * the following lookup table is used as an optimization in force_string
+ * (more complicated) variations on this theme didn't seem to pay off, but
+ * systematic testing might be in order at some point
+ */
+static char *values[] = {
+ "0",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ "6",
+ "7",
+ "8",
+ "9",
+};
+#define NVAL (sizeof(values)/sizeof(values[0]))
+
+NODE *
+r_force_string(s)
+register NODE *s;
+{
+ char buf[128];
+ register char *sp = buf;
+ register long num = 0;
+
+#ifdef DEBUG
+ if (s == NULL) cant_happen();
+ if (s->type != Node_val) cant_happen();
+ if (s->flags & STR) return s;
+ if (!(s->flags & NUM)) cant_happen();
+ if (s->stref != 0) ; /*cant_happen();*/
+#endif
+
+ /* avoids floating point exception in DOS*/
+ if ( s->numbr <= LONG_MAX && s->numbr >= -LONG_MAX)
+ num = (long)s->numbr;
+ if ((AWKNUM) num == s->numbr) { /* integral value */
+ if (num < NVAL && num >= 0) {
+ sp = values[num];
+ s->stlen = 1;
+ } else {
+ (void) sprintf(sp, "%ld", num);
+ s->stlen = strlen(sp);
+ }
+ s->stfmt = -1;
+ } else {
+ (void) sprintf(sp, CONVFMT, s->numbr);
+ s->stlen = strlen(sp);
+ s->stfmt = (char)CONVFMTidx;
+ }
+ s->stref = 1;
+ emalloc(s->stptr, char *, s->stlen + 2, "force_string");
+ memcpy(s->stptr, sp, s->stlen+1);
+ s->flags |= STR;
+ return s;
+}
+
+/*
+ * Duplicate a node. (For strings, "duplicate" means crank up the
+ * reference count.)
+ */
+NODE *
+dupnode(n)
+NODE *n;
+{
+ register NODE *r;
+
+ if (n->flags & TEMP) {
+ n->flags &= ~TEMP;
+ n->flags |= MALLOC;
+ return n;
+ }
+ if ((n->flags & (MALLOC|STR)) == (MALLOC|STR)) {
+ if (n->stref < 255)
+ n->stref++;
+ return n;
+ }
+ getnode(r);
+ *r = *n;
+ r->flags &= ~(PERM|TEMP);
+ r->flags |= MALLOC;
+ if (n->type == Node_val && (n->flags & STR)) {
+ r->stref = 1;
+ emalloc(r->stptr, char *, r->stlen + 2, "dupnode");
+ memcpy(r->stptr, n->stptr, r->stlen+1);
+ }
+ return r;
+}
+
+/* this allocates a node with defined numbr */
+NODE *
+mk_number(x, flags)
+AWKNUM x;
+unsigned int flags;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = Node_val;
+ r->numbr = x;
+ r->flags = flags;
+#ifdef DEBUG
+ r->stref = 1;
+ r->stptr = 0;
+ r->stlen = 0;
+#endif
+ return r;
+}
+
+/*
+ * Make a string node.
+ */
+NODE *
+make_str_node(s, len, flags)
+char *s;
+size_t len;
+int flags;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = Node_val;
+ r->flags = (STRING|STR|MALLOC);
+ if (flags & ALREADY_MALLOCED)
+ r->stptr = s;
+ else {
+ emalloc(r->stptr, char *, len + 2, s);
+ memcpy(r->stptr, s, len);
+ }
+ r->stptr[len] = '\0';
+
+ if (flags & SCAN) { /* scan for escape sequences */
+ char *pf;
+ register char *ptm;
+ register int c;
+ register char *end;
+
+ end = &(r->stptr[len]);
+ for (pf = ptm = r->stptr; pf < end;) {
+ c = *pf++;
+ if (c == '\\') {
+ c = parse_escape(&pf);
+ if (c < 0) {
+ if (do_lint)
+ warning("backslash at end of string");
+ c = '\\';
+ }
+ *ptm++ = c;
+ } else
+ *ptm++ = c;
+ }
+ len = ptm - r->stptr;
+ erealloc(r->stptr, char *, len + 1, "make_str_node");
+ r->stptr[len] = '\0';
+ r->flags |= PERM;
+ }
+ r->stlen = len;
+ r->stref = 1;
+ r->stfmt = -1;
+
+ return r;
+}
+
+NODE *
+tmp_string(s, len)
+char *s;
+size_t len;
+{
+ register NODE *r;
+
+ r = make_string(s, len);
+ r->flags |= TEMP;
+ return r;
+}
+
+
+#define NODECHUNK 100
+
+NODE *nextfree = NULL;
+
+NODE *
+more_nodes()
+{
+ register NODE *np;
+
+ /* get more nodes and initialize list */
+ emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "newnode");
+ for (np = nextfree; np < &nextfree[NODECHUNK - 1]; np++)
+ np->nextp = np + 1;
+ np->nextp = NULL;
+ np = nextfree;
+ nextfree = nextfree->nextp;
+ return np;
+}
+
+#ifdef DEBUG
+void
+freenode(it)
+NODE *it;
+{
+#ifdef MPROF
+ it->stref = 0;
+ free((char *) it);
+#else /* not MPROF */
+ /* add it to head of freelist */
+ it->nextp = nextfree;
+ nextfree = it;
+#endif /* not MPROF */
+}
+#endif /* DEBUG */
+
+void
+unref(tmp)
+register NODE *tmp;
+{
+ if (tmp == NULL)
+ return;
+ if (tmp->flags & PERM)
+ return;
+ if (tmp->flags & (MALLOC|TEMP)) {
+ tmp->flags &= ~TEMP;
+ if (tmp->flags & STR) {
+ if (tmp->stref > 1) {
+ if (tmp->stref != 255)
+ tmp->stref--;
+ return;
+ }
+ free(tmp->stptr);
+ }
+ freenode(tmp);
+ }
+}
+
+/*
+ * Parse a C escape sequence. STRING_PTR points to a variable containing a
+ * pointer to the string to parse. That pointer is updated past the
+ * characters we use. The value of the escape sequence is returned.
+ *
+ * A negative value means the sequence \ newline was seen, which is supposed to
+ * be equivalent to nothing at all.
+ *
+ * If \ is followed by a null character, we return a negative value and leave
+ * the string pointer pointing at the null character.
+ *
+ * If \ is followed by 000, we return 0 and leave the string pointer after the
+ * zeros. A value of 0 does not mean end of string.
+ *
+ * Posix doesn't allow \x.
+ */
+
+int
+parse_escape(string_ptr)
+char **string_ptr;
+{
+ register int c = *(*string_ptr)++;
+ register int i;
+ register int count;
+
+ switch (c) {
+ case 'a':
+ return BELL;
+ case 'b':
+ return '\b';
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+ case '\n':
+ return -2;
+ case 0:
+ (*string_ptr)--;
+ return -1;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ i = c - '0';
+ count = 0;
+ while (++count < 3) {
+ if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
+ i *= 8;
+ i += c - '0';
+ } else {
+ (*string_ptr)--;
+ break;
+ }
+ }
+ return i;
+ case 'x':
+ if (do_lint) {
+ static int didwarn;
+
+ if (! didwarn) {
+ didwarn = 1;
+ warning("Posix does not allow \"\\x\" escapes");
+ }
+ }
+ if (do_posix)
+ return ('x');
+ i = 0;
+ while (1) {
+ if (isxdigit((c = *(*string_ptr)++))) {
+ i *= 16;
+ if (isdigit(c))
+ i += c - '0';
+ else if (isupper(c))
+ i += c - 'A' + 10;
+ else
+ i += c - 'a' + 10;
+ } else {
+ (*string_ptr)--;
+ break;
+ }
+ }
+ return i;
+ default:
+ return c;
+ }
+}
diff --git a/gnu/usr.bin/awk/patchlevel.h b/gnu/usr.bin/awk/patchlevel.h
new file mode 100644
index 000000000000..c6161a1f274c
--- /dev/null
+++ b/gnu/usr.bin/awk/patchlevel.h
@@ -0,0 +1 @@
+#define PATCHLEVEL 2
diff --git a/gnu/usr.bin/awk/protos.h b/gnu/usr.bin/awk/protos.h
new file mode 100644
index 000000000000..25af32165b02
--- /dev/null
+++ b/gnu/usr.bin/awk/protos.h
@@ -0,0 +1,115 @@
+/*
+ * protos.h -- function prototypes for when the headers don't have them.
+ */
+
+/*
+ * Copyright (C) 1991, 1992, the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef __STDC__
+#define aptr_t void * /* arbitrary pointer type */
+#else
+#define aptr_t char *
+#endif
+extern aptr_t malloc P((MALLOC_ARG_T));
+extern aptr_t realloc P((aptr_t, MALLOC_ARG_T));
+extern aptr_t calloc P((MALLOC_ARG_T, MALLOC_ARG_T));
+
+extern void free P((aptr_t));
+extern char *getenv P((char *));
+
+extern char *strcpy P((char *, const char *));
+extern char *strcat P((char *, const char *));
+extern char *strncpy P((char *, const char *, int));
+extern int strcmp P((const char *, const char *));
+extern int strncmp P((const char *, const char *, int));
+#ifndef VMS
+extern char *strerror P((int));
+#else
+extern char *strerror P((int,...));
+#endif
+extern char *strchr P((const char *, int));
+extern char *strrchr P((const char *, int));
+extern char *strstr P((const char *s1, const char *s2));
+extern int strlen P((const char *));
+extern long strtol P((const char *, char **, int));
+#if !defined(_MSC_VER) && !defined(__GNU_LIBRARY__)
+extern int strftime P((char *, int, const char *, const struct tm *));
+#endif
+extern time_t time P((time_t *));
+extern aptr_t memset P((aptr_t, int, size_t));
+extern aptr_t memcpy P((aptr_t, const aptr_t, size_t));
+extern aptr_t memmove P((aptr_t, const aptr_t, size_t));
+extern aptr_t memchr P((const aptr_t, int, size_t));
+extern int memcmp P((const aptr_t, const aptr_t, size_t));
+
+/* extern int fprintf P((FILE *, char *, ...)); */
+extern int fprintf P(());
+#if !defined(MSDOS) && !defined(__GNU_LIBRARY__)
+extern int fwrite P((const char *, int, int, FILE *));
+extern int fputs P((const char *, FILE *));
+extern int unlink P((const char *));
+#endif
+extern int fflush P((FILE *));
+extern int fclose P((FILE *));
+extern FILE *popen P((const char *, const char *));
+extern int pclose P((FILE *));
+extern void abort P(());
+extern int isatty P((int));
+extern void exit P((int));
+extern int system P((const char *));
+extern int sscanf P((/* char *, char *, ... */));
+#ifndef toupper
+extern int toupper P((int));
+#endif
+#ifndef tolower
+extern int tolower P((int));
+#endif
+
+extern double pow P((double x, double y));
+extern double atof P((char *));
+extern double strtod P((const char *, char **));
+extern int fstat P((int, struct stat *));
+extern int stat P((const char *, struct stat *));
+extern off_t lseek P((int, off_t, int));
+extern int fseek P((FILE *, long, int));
+extern int close P((int));
+extern int creat P(());
+extern int open P(());
+extern int pipe P((int *));
+extern int dup P((int));
+extern int dup2 P((int,int));
+extern int fork P(());
+extern int execl P((/* char *, char *, ... */));
+extern int read P((int, char *, int));
+extern int wait P((int *));
+extern void _exit P((int));
+
+#ifndef __STDC__
+extern long time P((long *));
+#endif
+
+#ifdef NON_STD_SPRINTF
+extern char *sprintf();
+#else
+extern int sprintf();
+#endif /* SPRINTF_INT */
+
+#undef aptr_t
diff --git a/gnu/usr.bin/awk/re.c b/gnu/usr.bin/awk/re.c
new file mode 100644
index 000000000000..495b0963cadb
--- /dev/null
+++ b/gnu/usr.bin/awk/re.c
@@ -0,0 +1,208 @@
+/*
+ * re.c - compile regular expressions.
+ */
+
+/*
+ * Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+/* Generate compiled regular expressions */
+
+Regexp *
+make_regexp(s, len, ignorecase, dfa)
+char *s;
+int len;
+int ignorecase;
+int dfa;
+{
+ Regexp *rp;
+ char *err;
+ char *src = s;
+ char *temp;
+ char *end = s + len;
+ register char *dest;
+ register int c;
+
+ /* Handle escaped characters first. */
+
+ /* Build a copy of the string (in dest) with the
+ escaped characters translated, and generate the regex
+ from that.
+ */
+ emalloc(dest, char *, len + 2, "make_regexp");
+ temp = dest;
+
+ while (src < end) {
+ if (*src == '\\') {
+ c = *++src;
+ switch (c) {
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ case 'x':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ c = parse_escape(&src);
+ if (c < 0)
+ cant_happen();
+ *dest++ = (char)c;
+ break;
+ default:
+ *dest++ = '\\';
+ *dest++ = (char)c;
+ src++;
+ break;
+ } /* switch */
+ } else {
+ *dest++ = *src++; /* not '\\' */
+ }
+ } /* for */
+
+ *dest = '\0' ; /* Only necessary if we print dest ? */
+ emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
+ memset((char *) rp, 0, sizeof(*rp));
+ emalloc(rp->pat.buffer, char *, 16, "make_regexp");
+ rp->pat.allocated = 16;
+ emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
+
+ if (ignorecase)
+ rp->pat.translate = casetable;
+ else
+ rp->pat.translate = NULL;
+ len = dest - temp;
+ if ((err = re_compile_pattern(temp, (size_t) len, &(rp->pat))) != NULL)
+ fatal("%s: /%s/", err, temp);
+ if (dfa && !ignorecase) {
+ regcompile(temp, len, &(rp->dfareg), 1);
+ rp->dfa = 1;
+ } else
+ rp->dfa = 0;
+ free(temp);
+ return rp;
+}
+
+int
+research(rp, str, start, len, need_start)
+Regexp *rp;
+register char *str;
+int start;
+register int len;
+int need_start;
+{
+ char *ret = str;
+
+ if (rp->dfa) {
+ char save1;
+ char save2;
+ int count = 0;
+ int try_backref;
+
+ save1 = str[start+len];
+ str[start+len] = '\n';
+ save2 = str[start+len+1];
+ ret = regexecute(&(rp->dfareg), str+start, str+start+len+1, 1,
+ &count, &try_backref);
+ str[start+len] = save1;
+ str[start+len+1] = save2;
+ }
+ if (ret) {
+ if (need_start || rp->dfa == 0)
+ return re_search(&(rp->pat), str, start+len, start,
+ len, &(rp->regs));
+ else
+ return 1;
+ } else
+ return -1;
+}
+
+void
+refree(rp)
+Regexp *rp;
+{
+ free(rp->pat.buffer);
+ free(rp->pat.fastmap);
+ if (rp->dfa)
+ reg_free(&(rp->dfareg));
+ free(rp);
+}
+
+void
+reg_error(s)
+const char *s;
+{
+ fatal(s);
+}
+
+Regexp *
+re_update(t)
+NODE *t;
+{
+ NODE *t1;
+
+# define CASE 1
+ if ((t->re_flags & CASE) == IGNORECASE) {
+ if (t->re_flags & CONST)
+ return t->re_reg;
+ t1 = force_string(tree_eval(t->re_exp));
+ if (t->re_text) {
+ if (cmp_nodes(t->re_text, t1) == 0) {
+ free_temp(t1);
+ return t->re_reg;
+ }
+ unref(t->re_text);
+ }
+ t->re_text = dupnode(t1);
+ free_temp(t1);
+ }
+ if (t->re_reg)
+ refree(t->re_reg);
+ if (t->re_cnt)
+ t->re_cnt++;
+ if (t->re_cnt > 10)
+ t->re_cnt = 0;
+ if (!t->re_text) {
+ t1 = force_string(tree_eval(t->re_exp));
+ t->re_text = dupnode(t1);
+ free_temp(t1);
+ }
+ t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen, IGNORECASE, t->re_cnt);
+ t->re_flags &= ~CASE;
+ t->re_flags |= IGNORECASE;
+ return t->re_reg;
+}
+
+void
+resetup()
+{
+ (void) re_set_syntax(RE_SYNTAX_AWK);
+ regsyntax(RE_SYNTAX_AWK, 0);
+}
diff --git a/gnu/usr.bin/awk/regex.c b/gnu/usr.bin/awk/regex.c
new file mode 100644
index 000000000000..f4dd4c2cd24d
--- /dev/null
+++ b/gnu/usr.bin/awk/regex.c
@@ -0,0 +1,2854 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+
+/* To test, compile with -Dtest. This Dtestable feature turns this into
+ a self-contained program which reads a pattern, describes how it
+ compiles, then reads a string and searches for it.
+
+ On the other hand, if you compile with both -Dtest and -Dcanned you
+ can run some tests we've already thought of. */
+
+
+#ifdef emacs
+
+/* The `emacs' switch turns on certain special matching commands
+ that make sense only in emacs. */
+
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+
+/* We write fatal error messages on standard error. */
+#include <stdio.h>
+
+/* isalpha(3) etc. are used for the character classes. */
+#include <ctype.h>
+
+#else /* not emacs */
+
+#include "awk.h"
+
+#define NO_ALLOCA /* try it out for now */
+#ifndef NO_ALLOCA
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#ifndef atarist
+#ifndef alloca
+#define alloca __builtin_alloca
+#endif
+#endif /* atarist */
+#else
+#if defined(sparc) && !defined(__GNUC__)
+#include <alloca.h>
+#else
+char *alloca ();
+#endif
+#endif /* __GNUC__ */
+
+#define FREE_AND_RETURN_VOID(stackb) return
+#define FREE_AND_RETURN(stackb,val) return(val)
+#define DOUBLE_STACK(stackx,stackb,len) \
+ (stackx = (unsigned char **) alloca (2 * len \
+ * sizeof (unsigned char *)),\
+ /* Only copy what is in use. */ \
+ (unsigned char **) memcpy (stackx, stackb, len * sizeof (char *)))
+#else /* NO_ALLOCA defined */
+#define FREE_AND_RETURN_VOID(stackb) free(stackb);return
+#define FREE_AND_RETURN(stackb,val) free(stackb);return(val)
+#define DOUBLE_STACK(stackx,stackb,len) \
+ (unsigned char **) realloc (stackb, 2 * len * sizeof (unsigned char *))
+#endif /* NO_ALLOCA */
+
+static void store_jump P((char *, int, char *));
+static void insert_jump P((int, char *, char *, char *));
+static void store_jump_n P((char *, int, char *, unsigned));
+static void insert_jump_n P((int, char *, char *, char *, unsigned));
+static void insert_op_2 P((int, char *, char *, int, int ));
+static int memcmp_translate P((unsigned char *, unsigned char *,
+ int, unsigned char *));
+long re_set_syntax P((long));
+
+/* Define the syntax stuff, so we can do the \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+#ifndef Sword
+#define Sword 1
+#endif
+
+#define SYNTAX(c) re_syntax_table[c]
+
+
+#ifdef SYNTAX_TABLE
+
+char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+static char re_syntax_table[256];
+static void init_syntax_once P((void));
+
+
+static void
+init_syntax_once ()
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ memset (re_syntax_table, 0, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ /* Add specific syntax for ISO Latin-1. */
+ for (c = 0300; c <= 0377; c++)
+ re_syntax_table[c] = Sword;
+ re_syntax_table[0327] = 0;
+ re_syntax_table[0367] = 0;
+
+ done = 1;
+}
+
+#endif /* SYNTAX_TABLE */
+#undef P
+#endif /* emacs */
+
+
+/* Sequents are missing isgraph. */
+#ifndef isgraph
+#define isgraph(c) (isprint((c)) && !isspace((c)))
+#endif
+
+/* Get the interface, including the syntax bits. */
+#include "regex.h"
+
+
+/* These are the command codes that appear in compiled regular
+ expressions, one per byte. Some command codes are followed by
+ argument bytes. A command code can specify any interpretation
+ whatsoever for its arguments. Zero-bytes may appear in the compiled
+ regular expression.
+
+ The value of `exactn' is needed in search.c (search_buffer) in emacs.
+ So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+ `exactn' we use here must also be 1. */
+
+enum regexpcode
+ {
+ unused=0,
+ exactn=1, /* Followed by one byte giving n, then by n literal bytes. */
+ begline, /* Fail unless at beginning of line. */
+ endline, /* Fail unless at end of line. */
+ jump, /* Followed by two bytes giving relative address to jump to. */
+ on_failure_jump, /* Followed by two bytes giving relative address of
+ place to resume at in case of failure. */
+ finalize_jump, /* Throw away latest failure point and then jump to
+ address. */
+ maybe_finalize_jump, /* Like jump but finalize if safe to do so.
+ This is used to jump back to the beginning
+ of a repeat. If the command that follows
+ this jump is clearly incompatible with the
+ one at the beginning of the repeat, such that
+ we can be sure that there is no use backtracking
+ out of repetitions already completed,
+ then we finalize. */
+ dummy_failure_jump, /* Jump, and push a dummy failure point. This
+ failure point will be thrown away if an attempt
+ is made to use it for a failure. A + construct
+ makes this before the first repeat. Also
+ use it as an intermediary kind of jump when
+ compiling an or construct. */
+ succeed_n, /* Used like on_failure_jump except has to succeed n times;
+ then gets turned into an on_failure_jump. The relative
+ address following it is useless until then. The
+ address is followed by two bytes containing n. */
+ jump_n, /* Similar to jump, but jump n times only; also the relative
+ address following is in turn followed by yet two more bytes
+ containing n. */
+ set_number_at, /* Set the following relative location to the
+ subsequent number. */
+ anychar, /* Matches any (more or less) one character. */
+ charset, /* Matches any one char belonging to specified set.
+ First following byte is number of bitmap bytes.
+ Then come bytes for a bitmap saying which chars are in.
+ Bits in each byte are ordered low-bit-first.
+ A character is in the set if its bit is 1.
+ A character too large to have a bit in the map
+ is automatically not in the set. */
+ charset_not, /* Same parameters as charset, but match any character
+ that is not one of those specified. */
+ start_memory, /* Start remembering the text that is matched, for
+ storing in a memory register. Followed by one
+ byte containing the register number. Register numbers
+ must be in the range 0 through RE_NREGS. */
+ stop_memory, /* Stop remembering the text that is matched
+ and store it in a memory register. Followed by
+ one byte containing the register number. Register
+ numbers must be in the range 0 through RE_NREGS. */
+ duplicate, /* Match a duplicate of something remembered.
+ Followed by one byte containing the index of the memory
+ register. */
+ before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+ begbuf, /* Succeeds if at beginning of buffer. */
+ endbuf, /* Succeeds if at end of buffer. */
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound,/* Succeeds if not at a word boundary. */
+ syntaxspec, /* Matches any character whose syntax is specified.
+ followed by a byte which contains a syntax code,
+ e.g., Sword. */
+ notsyntaxspec /* Matches any character whose syntax differs from
+ that specified. */
+ };
+
+
+/* Number of failure points to allocate space for initially,
+ when matching. If this number is exceeded, more space is allocated,
+ so it is not a hard limit. */
+
+#ifndef NFAILURES
+#define NFAILURES 80
+#endif
+
+#ifdef CHAR_UNSIGNED
+#define SIGN_EXTEND_CHAR(c) ((c)>(char)127?(c)-256:(c)) /* for IBM RT */
+#endif
+#ifndef SIGN_EXTEND_CHAR
+#define SIGN_EXTEND_CHAR(x) (x)
+#endif
+
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+#define STORE_NUMBER(destination, number) \
+ { (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; }
+
+/* Same as STORE_NUMBER, except increment the destination pointer to
+ the byte after where the number is stored. Watch out that values for
+ DESTINATION such as p + 1 won't work, whereas p will. */
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ { STORE_NUMBER(destination, number); \
+ (destination) += 2; }
+
+
+/* Put into DESTINATION a number stored in two contingous bytes starting
+ at SOURCE. */
+#define EXTRACT_NUMBER(destination, source) \
+ { (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
+
+/* Same as EXTRACT_NUMBER, except increment the pointer for source to
+ point to second byte of SOURCE. Note that SOURCE has to be a value
+ such as p, not, e.g., p + 1. */
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ { EXTRACT_NUMBER (destination, source); \
+ (source) += 2; }
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit-mask comprised of the various bits
+ defined in regex.h. */
+
+long
+re_set_syntax (syntax)
+ long syntax;
+{
+ long ret;
+
+ ret = obscure_syntax;
+ obscure_syntax = syntax;
+ return ret;
+}
+
+/* Set by re_set_syntax to the current regexp syntax to recognize. */
+long obscure_syntax = 0;
+
+
+
+/* Macros for re_compile_pattern, which is found below these definitions. */
+
+#define CHAR_CLASS_MAX_LENGTH 6
+
+/* Fetch the next character in the uncompiled pattern, translating it if
+ necessary. */
+#define PATFETCH(c) \
+ {if (p == pend) goto end_of_pattern; \
+ c = * (unsigned char *) p++; \
+ if (translate) c = translate[c]; }
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ {if (p == pend) goto end_of_pattern; \
+ c = * (unsigned char *) p++; }
+
+#define PATUNFETCH p--
+
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 28
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ { \
+ while (b - bufp->buffer + (n) >= bufp->allocated) \
+ EXTEND_BUFFER; \
+ }
+
+/* Make sure we have one more byte of buffer space and then add CH to it. */
+#define BUFPUSH(ch) \
+ { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (char) (ch); \
+ }
+
+/* Extend the buffer by twice its current size via reallociation and
+ reset the pointers that pointed into the old allocation to point to
+ the correct places in the new allocation. If extending the buffer
+ results in it being larger than 1 << 16, then flag memory exhausted. */
+#define EXTEND_BUFFER \
+ { char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == (1L<<16)) goto too_big; \
+ bufp->allocated *= 2; \
+ if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \
+ bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated); \
+ if (bufp->buffer == 0) \
+ goto memory_exhausted; \
+ b = (b - old_buffer) + bufp->buffer; \
+ if (fixup_jump) \
+ fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ }
+
+/* Set the bit for character C in a character set list. */
+#define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (isdigit (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+/* Subroutines for re_compile_pattern. */
+/* static void store_jump (), insert_jump (), store_jump_n (),
+ insert_jump_n (), insert_op_2 (); */
+
+
+/* re_compile_pattern takes a regular-expression string
+ and converts it into a buffer full of byte commands for matching.
+
+ PATTERN is the address of the pattern string
+ SIZE is the length of it.
+ BUFP is a struct re_pattern_buffer * which points to the info
+ on where to store the byte commands.
+ This structure contains a char * which points to the
+ actual space, which should have been obtained with malloc.
+ re_compile_pattern may use realloc to grow the buffer space.
+
+ The number of bytes of commands can be found out by looking in
+ the `struct re_pattern_buffer' that bufp pointed to, after
+ re_compile_pattern returns. */
+
+char *
+re_compile_pattern (pattern, size, bufp)
+ char *pattern;
+ size_t size;
+ struct re_pattern_buffer *bufp;
+{
+ register char *b = bufp->buffer;
+ register char *p = pattern;
+ char *pend = pattern + size;
+ register unsigned c, c1;
+ char *p0;
+ unsigned char *translate = (unsigned char *) bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell whether a new exact-match
+ character can be added to that command or requires a new `exactn'
+ command. */
+
+ char *pending_exact = 0;
+
+ /* Address of the place where a forward-jump should go to the end of
+ the containing expression. Each alternative of an `or', except the
+ last, ends with a forward-jump of this sort. */
+
+ char *fixup_jump = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells postfix * where to find the start of its operand. */
+
+ char *laststart = 0;
+
+ /* In processing a repeat, 1 means zero matches is allowed. */
+
+ char zero_times_ok;
+
+ /* In processing a repeat, 1 means many matches is allowed. */
+
+ char many_times_ok;
+
+ /* Address of beginning of regexp, or inside of last \(. */
+
+ char *begalt = b;
+
+ /* In processing an interval, at least this many matches must be made. */
+ int lower_bound;
+
+ /* In processing an interval, at most this many matches can be made. */
+ int upper_bound;
+
+ /* Place in pattern (i.e., the {) to which to go back if the interval
+ is invalid. */
+ char *beg_interval = 0;
+
+ /* Stack of information saved by \( and restored by \).
+ Four stack elements are pushed by each \(:
+ First, the value of b.
+ Second, the value of fixup_jump.
+ Third, the value of regnum.
+ Fourth, the value of begalt. */
+
+ int stackb[40];
+ int *stackp = stackb;
+ int *stacke = stackb + 40;
+ int *stackt;
+
+ /* Counts \('s as they are encountered. Remembered for the matching \),
+ where it becomes the register number to put in the stop_memory
+ command. */
+
+ int regnum = 1;
+
+ bufp->fastmap_accurate = 0;
+
+#ifndef emacs
+#ifndef SYNTAX_TABLE
+ /* Initialize the syntax table. */
+ init_syntax_once();
+#endif
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ bufp->allocated = INIT_BUF_SIZE;
+ if (bufp->buffer)
+ /* EXTEND_BUFFER loses when bufp->allocated is 0. */
+ bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
+ else
+ /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
+ if (!bufp->buffer) goto memory_exhausted;
+ begalt = b = bufp->buffer;
+ }
+
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '$':
+ {
+ char *p1 = p;
+ /* When testing what follows the $,
+ look past the \-constructs that don't consume anything. */
+ if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ while (p1 != pend)
+ {
+ if (*p1 == '\\' && p1 + 1 != pend
+ && (p1[1] == '<' || p1[1] == '>'
+ || p1[1] == '`' || p1[1] == '\''
+#ifdef emacs
+ || p1[1] == '='
+#endif
+ || p1[1] == 'b' || p1[1] == 'B'))
+ p1 += 2;
+ else
+ break;
+ }
+ if (obscure_syntax & RE_TIGHT_VBAR)
+ {
+ if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
+ goto normal_char;
+ /* Make operand of last vbar end before this `$'. */
+ if (fixup_jump)
+ store_jump (fixup_jump, jump, b);
+ fixup_jump = 0;
+ BUFPUSH (endline);
+ break;
+ }
+ /* $ means succeed if at end of line, but only in special contexts.
+ If validly in the middle of a pattern, it is a normal character. */
+
+ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
+ goto invalid_pattern;
+ if (p1 == pend || *p1 == '\n'
+ || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
+ || (obscure_syntax & RE_NO_BK_PARENS
+ ? *p1 == ')'
+ : *p1 == '\\' && p1[1] == ')')
+ || (obscure_syntax & RE_NO_BK_VBAR
+ ? *p1 == '|'
+ : *p1 == '\\' && p1[1] == '|'))
+ {
+ BUFPUSH (endline);
+ break;
+ }
+ goto normal_char;
+ }
+ case '^':
+ /* ^ means succeed if at beg of line, but only if no preceding
+ pattern. */
+
+ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
+ goto invalid_pattern;
+ if (laststart && p - 2 >= pattern && p[-2] != '\n'
+ && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ if (obscure_syntax & RE_TIGHT_VBAR)
+ {
+ if (p != pattern + 1
+ && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ BUFPUSH (begline);
+ begalt = b;
+ }
+ else
+ BUFPUSH (begline);
+ break;
+
+ case '+':
+ case '?':
+ if ((obscure_syntax & RE_BK_PLUS_QM)
+ || (obscure_syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern, char not special. */
+ if (!laststart)
+ {
+ if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+ goto invalid_pattern;
+ else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+ /* If there is a sequence of repetition chars,
+ collapse it down to just one. */
+ zero_times_ok = 0;
+ many_times_ok = 0;
+ while (1)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+ if (p == pend)
+ break;
+ PATFETCH (c);
+ if (c == '*')
+ ;
+ else if (!(obscure_syntax & RE_BK_PLUS_QM)
+ && (c == '+' || c == '?'))
+ ;
+ else if ((obscure_syntax & RE_BK_PLUS_QM)
+ && c == '\\')
+ {
+ /* int c1; */
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ {
+ /* If more than one repetition is allowed, put in at the
+ end a backward relative jump from b to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump). */
+ GET_BUFFER_SPACE (3);
+ store_jump (b, maybe_finalize_jump, laststart - 3);
+ b += 3; /* Because store_jump put stuff here. */
+ }
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
+ insert_jump (on_failure_jump, laststart, b + 3, b);
+ pending_exact = 0;
+ b += 3;
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ dummy-failure before the initial on-failure-jump
+ instruction of the loop. This effects a skip over that
+ instruction the first time we hit that loop. */
+ GET_BUFFER_SPACE (6);
+ insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
+ b += 3;
+ }
+ break;
+
+ case '.':
+ laststart = b;
+ BUFPUSH (anychar);
+ break;
+
+ case '[':
+ if (p == pend)
+ goto invalid_pattern;
+ while (b - bufp->buffer
+ > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
+ EXTEND_BUFFER;
+
+ laststart = b;
+ if (*p == '^')
+ {
+ BUFPUSH (charset_not);
+ p++;
+ }
+ else
+ BUFPUSH (charset);
+ p0 = p;
+
+ BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+ /* Clear the whole map */
+ memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
+ SET_LIST_BIT ('\n');
+
+
+ /* Read in characters and ranges, setting map bits. */
+ while (1)
+ {
+ /* Don't translate while fetching, in case it's a range bound.
+ When we set the bit for the character, we translate it. */
+ PATFETCH_RAW (c);
+
+ /* If set, \ escapes characters when inside [...]. */
+ if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
+ {
+ PATFETCH(c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+ if (c == ']')
+ {
+ if (p == p0 + 1)
+ {
+ /* If this is an empty bracket expression. */
+ if ((obscure_syntax & RE_NO_EMPTY_BRACKETS)
+ && p == pend)
+ goto invalid_pattern;
+ }
+ else
+ /* Stop if this isn't merely a ] inside a bracket
+ expression, but rather the end of a bracket
+ expression. */
+ break;
+ }
+ /* Get a range. */
+ if (p[0] == '-' && p[1] != ']')
+ {
+ PATFETCH (c1);
+ /* Don't translate the range bounds while fetching them. */
+ PATFETCH_RAW (c1);
+
+ if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
+ goto invalid_pattern;
+
+ if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END)
+ && c1 == '-' && *p != ']')
+ goto invalid_pattern;
+
+ while (c <= c1)
+ {
+ /* Translate each char that's in the range. */
+ if (translate)
+ SET_LIST_BIT (translate[c]);
+ else
+ SET_LIST_BIT (c);
+ c++;
+ }
+ }
+ else if ((obscure_syntax & RE_CHAR_CLASSES)
+ && c == '[' && p[0] == ':')
+ {
+ /* Longest valid character class word has six characters. */
+ char str[CHAR_CLASS_MAX_LENGTH];
+ PATFETCH (c);
+ c1 = 0;
+ /* If no ] at end. */
+ if (p == pend)
+ goto invalid_pattern;
+ while (1)
+ {
+ /* Don't translate the ``character class'' characters. */
+ PATFETCH_RAW (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+ if (p == pend
+ || c == ']' /* End of the bracket expression. */
+ || p[0] != ']'
+ || p + 1 == pend
+ || (strcmp (str, "alpha") != 0
+ && strcmp (str, "upper") != 0
+ && strcmp (str, "lower") != 0
+ && strcmp (str, "digit") != 0
+ && strcmp (str, "alnum") != 0
+ && strcmp (str, "xdigit") != 0
+ && strcmp (str, "space") != 0
+ && strcmp (str, "print") != 0
+ && strcmp (str, "punct") != 0
+ && strcmp (str, "graph") != 0
+ && strcmp (str, "cntrl") != 0))
+ {
+ /* Undo the ending character, the letters, and leave
+ the leading : and [ (but set bits for them). */
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ }
+ else
+ {
+ /* The ] at the end of the character class. */
+ PATFETCH (c);
+ if (c != ']')
+ goto invalid_pattern;
+ for (c = 0; c < (1 << BYTEWIDTH); c++)
+ {
+ if ((strcmp (str, "alpha") == 0 && isalpha (c))
+ || (strcmp (str, "upper") == 0 && isupper (c))
+ || (strcmp (str, "lower") == 0 && islower (c))
+ || (strcmp (str, "digit") == 0 && isdigit (c))
+ || (strcmp (str, "alnum") == 0 && isalnum (c))
+ || (strcmp (str, "xdigit") == 0 && isxdigit (c))
+ || (strcmp (str, "space") == 0 && isspace (c))
+ || (strcmp (str, "print") == 0 && isprint (c))
+ || (strcmp (str, "punct") == 0 && ispunct (c))
+ || (strcmp (str, "graph") == 0 && isgraph (c))
+ || (strcmp (str, "cntrl") == 0 && iscntrl (c)))
+ SET_LIST_BIT (c);
+ }
+ }
+ }
+ else if (translate)
+ SET_LIST_BIT (translate[c]);
+ else
+ SET_LIST_BIT (c);
+ }
+
+ /* Discard any character set/class bitmap bytes that are all
+ 0 at the end of the map. Decrement the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ break;
+
+ case '(':
+ if (! (obscure_syntax & RE_NO_BK_PARENS))
+ goto normal_char;
+ else
+ goto handle_open;
+
+ case ')':
+ if (! (obscure_syntax & RE_NO_BK_PARENS))
+ goto normal_char;
+ else
+ goto handle_close;
+
+ case '\n':
+ if (! (obscure_syntax & RE_NEWLINE_OR))
+ goto normal_char;
+ else
+ goto handle_bar;
+
+ case '|':
+ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+ && (! laststart || p == pend))
+ goto invalid_pattern;
+ else if (! (obscure_syntax & RE_NO_BK_VBAR))
+ goto normal_char;
+ else
+ goto handle_bar;
+
+ case '{':
+ if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ && (obscure_syntax & RE_INTERVALS)))
+ goto normal_char;
+ else
+ goto handle_interval;
+
+ case '\\':
+ if (p == pend) goto invalid_pattern;
+ PATFETCH_RAW (c);
+ switch (c)
+ {
+ case '(':
+ if (obscure_syntax & RE_NO_BK_PARENS)
+ goto normal_backsl;
+ handle_open:
+ if (stackp == stacke) goto nesting_too_deep;
+
+ /* Laststart should point to the start_memory that we are about
+ to push (unless the pattern has RE_NREGS or more ('s). */
+ *stackp++ = b - bufp->buffer;
+ if (regnum < RE_NREGS)
+ {
+ BUFPUSH (start_memory);
+ BUFPUSH (regnum);
+ }
+ *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
+ *stackp++ = regnum++;
+ *stackp++ = begalt - bufp->buffer;
+ fixup_jump = 0;
+ laststart = 0;
+ begalt = b;
+ break;
+
+ case ')':
+ if (obscure_syntax & RE_NO_BK_PARENS)
+ goto normal_backsl;
+ handle_close:
+ if (stackp == stackb) goto unmatched_close;
+ begalt = *--stackp + bufp->buffer;
+ if (fixup_jump)
+ store_jump (fixup_jump, jump, b);
+ if (stackp[-1] < RE_NREGS)
+ {
+ BUFPUSH (stop_memory);
+ BUFPUSH (stackp[-1]);
+ }
+ stackp -= 2;
+ fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
+ laststart = *--stackp + bufp->buffer;
+ break;
+
+ case '|':
+ if ((obscure_syntax & RE_LIMITED_OPS)
+ || (obscure_syntax & RE_NO_BK_VBAR))
+ goto normal_backsl;
+ handle_bar:
+ if (obscure_syntax & RE_LIMITED_OPS)
+ goto normal_char;
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (6);
+ insert_jump (on_failure_jump, begalt, b + 6, b);
+ pending_exact = 0;
+ b += 3;
+ /* The alternative before the previous alternative has a
+ jump after it which gets executed if it gets matched.
+ Adjust that jump so it will jump to the previous
+ alternative's analogous jump (put in below, which in
+ turn will jump to the next (if any) alternative's such
+ jump, etc.). The last such jump jumps to the correct
+ final destination. */
+ if (fixup_jump)
+ store_jump (fixup_jump, jump, b);
+
+ /* Leave space for a jump after previous alternative---to be
+ filled in later. */
+ fixup_jump = b;
+ b += 3;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+ case '{':
+ if (! (obscure_syntax & RE_INTERVALS)
+ /* Let \{ be a literal. */
+ || ((obscure_syntax & RE_INTERVALS)
+ && (obscure_syntax & RE_NO_BK_CURLY_BRACES))
+ /* If it's the string "\{". */
+ || (p - 2 == pattern && p == pend))
+ goto normal_backsl;
+ handle_interval:
+ beg_interval = p - 1; /* The {. */
+ /* If there is no previous pattern, this isn't an interval. */
+ if (!laststart)
+ {
+ if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+ goto invalid_pattern;
+ else
+ goto normal_backsl;
+ }
+ /* It also isn't an interval if not preceded by an re
+ matching a single character or subexpression, or if
+ the current type of intervals can't handle back
+ references and the previous thing is a back reference. */
+ if (! (*laststart == anychar
+ || *laststart == charset
+ || *laststart == charset_not
+ || *laststart == start_memory
+ || (*laststart == exactn && laststart[1] == 1)
+ || (! (obscure_syntax & RE_NO_BK_REFS)
+ && *laststart == duplicate)))
+ {
+ if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ goto normal_char;
+
+ /* Posix extended syntax is handled in previous
+ statement; this is for Posix basic syntax. */
+ if (obscure_syntax & RE_INTERVALS)
+ goto invalid_pattern;
+
+ goto normal_backsl;
+ }
+ lower_bound = -1; /* So can see if are set. */
+ upper_bound = -1;
+ GET_UNSIGNED_NUMBER (lower_bound);
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0)
+ upper_bound = RE_DUP_MAX;
+ }
+ if (upper_bound < 0)
+ upper_bound = lower_bound;
+ if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES))
+ {
+ if (c != '\\')
+ goto invalid_pattern;
+ PATFETCH (c);
+ }
+ if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound
+ || ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ && p != pend && *p == '{'))
+ {
+ if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ goto unfetch_interval;
+ else
+ goto invalid_pattern;
+ }
+
+ /* If upper_bound is zero, don't want to succeed at all;
+ jump from laststart to b + 3, which will be the end of
+ the buffer after this jump is inserted. */
+
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (3);
+ insert_jump (jump, laststart, b + 3, b);
+ b += 3;
+ }
+
+ /* Otherwise, after lower_bound number of succeeds, jump
+ to after the jump_n which will be inserted at the end
+ of the buffer, and insert that jump_n. */
+ else
+ { /* Set to 5 if only one repetition is allowed and
+ hence no jump_n is inserted at the current end of
+ the buffer; then only space for the succeed_n is
+ needed. Otherwise, need space for both the
+ succeed_n and the jump_n. */
+
+ unsigned slots_needed = upper_bound == 1 ? 5 : 10;
+
+ GET_BUFFER_SPACE (slots_needed);
+ /* Initialize the succeed_n to n, even though it will
+ be set by its attendant set_number_at, because
+ re_compile_fastmap will need to know it. Jump to
+ what the end of buffer will be after inserting
+ this succeed_n and possibly appending a jump_n. */
+ insert_jump_n (succeed_n, laststart, b + slots_needed,
+ b, lower_bound);
+ b += 5; /* Just increment for the succeed_n here. */
+
+ /* More than one repetition is allowed, so put in at
+ the end of the buffer a backward jump from b to the
+ succeed_n we put in above. By the time we've gotten
+ to this jump when matching, we'll have matched once
+ already, so jump back only upper_bound - 1 times. */
+
+ if (upper_bound > 1)
+ {
+ store_jump_n (b, jump_n, laststart, upper_bound - 1);
+ b += 5;
+ /* When hit this when matching, reset the
+ preceding jump_n's n to upper_bound - 1. */
+ BUFPUSH (set_number_at);
+ GET_BUFFER_SPACE (2);
+ STORE_NUMBER_AND_INCR (b, -5);
+ STORE_NUMBER_AND_INCR (b, upper_bound - 1);
+ }
+ /* When hit this when matching, set the succeed_n's n. */
+ GET_BUFFER_SPACE (5);
+ insert_op_2 (set_number_at, laststart, b, 5, lower_bound);
+ b += 5;
+ }
+ pending_exact = 0;
+ beg_interval = 0;
+ break;
+
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ if (beg_interval)
+ p = beg_interval;
+ else
+ {
+ fprintf (stderr,
+ "regex: no interval beginning to which to backtrack.\n");
+ exit (1);
+ }
+
+ beg_interval = 0;
+ PATFETCH (c); /* normal_char expects char in `c'. */
+ goto normal_char;
+ break;
+
+#ifdef emacs
+ case '=':
+ BUFPUSH (at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ BUFPUSH (syntaxspec);
+ PATFETCH (c);
+ BUFPUSH (syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ BUFPUSH (notsyntaxspec);
+ PATFETCH (c);
+ BUFPUSH (syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+ case 'w':
+ laststart = b;
+ BUFPUSH (wordchar);
+ break;
+
+ case 'W':
+ laststart = b;
+ BUFPUSH (notwordchar);
+ break;
+
+ case '<':
+ BUFPUSH (wordbeg);
+ break;
+
+ case '>':
+ BUFPUSH (wordend);
+ break;
+
+ case 'b':
+ BUFPUSH (wordbound);
+ break;
+
+ case 'B':
+ BUFPUSH (notwordbound);
+ break;
+
+ case '`':
+ BUFPUSH (begbuf);
+ break;
+
+ case '\'':
+ BUFPUSH (endbuf);
+ break;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (obscure_syntax & RE_NO_BK_REFS)
+ goto normal_char;
+ c1 = c - '0';
+ if (c1 >= regnum)
+ {
+ if (obscure_syntax & RE_NO_EMPTY_BK_REF)
+ goto invalid_pattern;
+ else
+ goto normal_char;
+ }
+ /* Can't back reference to a subexpression if inside of it. */
+ for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
+ if (*stackt == c1)
+ goto normal_char;
+ laststart = b;
+ BUFPUSH (duplicate);
+ BUFPUSH (c1);
+ break;
+
+ case '+':
+ case '?':
+ if (obscure_syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backsl;
+ break;
+
+ default:
+ normal_backsl:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ if (translate) c = translate[c];
+ goto normal_char;
+ }
+ break;
+
+ default:
+ normal_char: /* Expects the character in `c'. */
+ if (!pending_exact || pending_exact + *pending_exact + 1 != b
+ || *pending_exact == 0177 || *p == '*' || *p == '^'
+ || ((obscure_syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : (*p == '+' || *p == '?'))
+ || ((obscure_syntax & RE_INTERVALS)
+ && ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
+ {
+ laststart = b;
+ BUFPUSH (exactn);
+ pending_exact = b;
+ BUFPUSH (0);
+ }
+ BUFPUSH (c);
+ (*pending_exact)++;
+ }
+ }
+
+ if (fixup_jump)
+ store_jump (fixup_jump, jump, b);
+
+ if (stackp != stackb) goto unmatched_open;
+
+ bufp->used = b - bufp->buffer;
+ return 0;
+
+ invalid_pattern:
+ return "Invalid regular expression";
+
+ unmatched_open:
+ return "Unmatched \\(";
+
+ unmatched_close:
+ return "Unmatched \\)";
+
+ end_of_pattern:
+ return "Premature end of regular expression";
+
+ nesting_too_deep:
+ return "Nesting too deep";
+
+ too_big:
+ return "Regular expression too big";
+
+ memory_exhausted:
+ return "Memory exhausted";
+}
+
+
+/* Store a jump of the form <OPCODE> <relative address>.
+ Store in the location FROM a jump operation to jump to relative
+ address FROM - TO. OPCODE is the opcode to store. */
+
+static void
+store_jump (from, opcode, to)
+ char *from, *to;
+ int opcode;
+{
+ from[0] = (char)opcode;
+ STORE_NUMBER(from + 1, to - (from + 3));
+}
+
+
+/* Open up space before char FROM, and insert there a jump to TO.
+ CURRENT_END gives the end of the storage not in use, so we know
+ how much data to copy up. OP is the opcode of the jump to insert.
+
+ If you call this function, you must zero out pending_exact. */
+
+static void
+insert_jump (op, from, to, current_end)
+ int op;
+ char *from, *to, *current_end;
+{
+ register char *pfrom = current_end; /* Copy from here... */
+ register char *pto = current_end + 3; /* ...to here. */
+
+ while (pfrom != from)
+ *--pto = *--pfrom;
+ store_jump (from, op, to);
+}
+
+
+/* Store a jump of the form <opcode> <relative address> <n> .
+
+ Store in the location FROM a jump operation to jump to relative
+ address FROM - TO. OPCODE is the opcode to store, N is a number the
+ jump uses, say, to decide how many times to jump.
+
+ If you call this function, you must zero out pending_exact. */
+
+static void
+store_jump_n (from, opcode, to, n)
+ char *from, *to;
+ int opcode;
+ unsigned n;
+{
+ from[0] = (char)opcode;
+ STORE_NUMBER (from + 1, to - (from + 3));
+ STORE_NUMBER (from + 3, n);
+}
+
+
+/* Similar to insert_jump, but handles a jump which needs an extra
+ number to handle minimum and maximum cases. Open up space at
+ location FROM, and insert there a jump to TO. CURRENT_END gives the
+ end of the storage in use, so we know how much data to copy up. OP is
+ the opcode of the jump to insert.
+
+ If you call this function, you must zero out pending_exact. */
+
+static void
+insert_jump_n (op, from, to, current_end, n)
+ int op;
+ char *from, *to, *current_end;
+ unsigned n;
+{
+ register char *pfrom = current_end; /* Copy from here... */
+ register char *pto = current_end + 5; /* ...to here. */
+
+ while (pfrom != from)
+ *--pto = *--pfrom;
+ store_jump_n (from, op, to, n);
+}
+
+
+/* Open up space at location THERE, and insert operation OP followed by
+ NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so
+ we know how much data to copy up.
+
+ If you call this function, you must zero out pending_exact. */
+
+static void
+insert_op_2 (op, there, current_end, num_1, num_2)
+ int op;
+ char *there, *current_end;
+ int num_1, num_2;
+{
+ register char *pfrom = current_end; /* Copy from here... */
+ register char *pto = current_end + 5; /* ...to here. */
+
+ while (pfrom != there)
+ *--pto = *--pfrom;
+
+ there[0] = (char)op;
+ STORE_NUMBER (there + 1, num_1);
+ STORE_NUMBER (there + 3, num_2);
+}
+
+
+
+/* Given a pattern, compute a fastmap from it. The fastmap records
+ which of the (1 << BYTEWIDTH) possible characters can start a string
+ that matches the pattern. This fastmap is used by re_search to skip
+ quickly over totally implausible text.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as bufp->fastmap.
+ The other components of bufp describe the pattern to be used. */
+
+void
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ unsigned char *pattern = (unsigned char *) bufp->buffer;
+ int size = bufp->used;
+ register char *fastmap = bufp->fastmap;
+ register unsigned char *p = pattern;
+ register unsigned char *pend = pattern + size;
+ register int j, k;
+ unsigned char *translate = (unsigned char *) bufp->translate;
+ unsigned is_a_succeed_n;
+
+#ifndef NO_ALLOCA
+ unsigned char *stackb[NFAILURES];
+ unsigned char **stackp = stackb;
+
+#else
+ unsigned char **stackb;
+ unsigned char **stackp;
+ stackb = (unsigned char **) malloc (NFAILURES * sizeof (unsigned char *));
+ stackp = stackb;
+
+#endif /* NO_ALLOCA */
+ memset (fastmap, 0, (1 << BYTEWIDTH));
+ bufp->fastmap_accurate = 1;
+ bufp->can_be_null = 0;
+
+ while (p)
+ {
+ is_a_succeed_n = 0;
+ if (p == pend)
+ {
+ bufp->can_be_null = 1;
+ break;
+ }
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((enum regexpcode) *p++))
+#else
+ switch ((enum regexpcode) *p++)
+#endif
+ {
+ case exactn:
+ if (translate)
+ fastmap[translate[p[1]]] = 1;
+ else
+ fastmap[p[1]] = 1;
+ break;
+
+ case begline:
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ continue;
+
+ case endline:
+ if (translate)
+ fastmap[translate['\n']] = 1;
+ else
+ fastmap['\n'] = 1;
+
+ if (bufp->can_be_null != 1)
+ bufp->can_be_null = 2;
+ break;
+
+ case jump_n:
+ case finalize_jump:
+ case maybe_finalize_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (j > 0)
+ continue;
+ /* Jump backward reached implies we just went through
+ the body of a loop and matched nothing.
+ Opcode jumped to should be an on_failure_jump.
+ Just treat it like an ordinary jump.
+ For a * loop, it has pushed its failure point already;
+ If so, discard that as redundant. */
+
+ if ((enum regexpcode) *p != on_failure_jump
+ && (enum regexpcode) *p != succeed_n)
+ continue;
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (stackp != stackb && *stackp == p)
+ stackp--;
+ continue;
+
+ case on_failure_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ *++stackp = p + j;
+ if (is_a_succeed_n)
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ continue;
+
+ case succeed_n:
+ is_a_succeed_n = 1;
+ /* Get to the number of times to succeed. */
+ p += 2;
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 4;
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+ case start_memory:
+ case stop_memory:
+ p++;
+ continue;
+
+ case duplicate:
+ bufp->can_be_null = 1;
+ fastmap['\n'] = 1;
+ case anychar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (j != '\n')
+ fastmap[j] = 1;
+ if (bufp->can_be_null)
+ {
+ FREE_AND_RETURN_VOID(stackb);
+ }
+ /* Don't return; check the alternative paths
+ so we can set can_be_null if appropriate. */
+ break;
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+#else /* not emacs */
+ case syntaxspec:
+ case notsyntaxspec:
+ break;
+#endif /* not emacs */
+
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ {
+ if (translate)
+ fastmap[translate[j]] = 1;
+ else
+ fastmap[j] = 1;
+ }
+ break;
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ if (translate)
+ fastmap[translate[j]] = 1;
+ else
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ {
+ if (translate)
+ fastmap[translate[j]] = 1;
+ else
+ fastmap[j] = 1;
+ }
+ break;
+
+ case unused: /* pacify gcc -Wall */
+ break;
+ }
+
+ /* Get here means we have successfully found the possible starting
+ characters of one path of the pattern. We need not follow this
+ path any farther. Instead, look at the next alternative
+ remembered in the stack. */
+ if (stackp != stackb)
+ p = *stackp--;
+ else
+ break;
+ }
+ FREE_AND_RETURN_VOID(stackb);
+}
+
+
+
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+
+int
+re_search (pbufp, string, size, startpos, range, regs)
+ struct re_pattern_buffer *pbufp;
+ char *string;
+ int size, startpos, range;
+ struct re_registers *regs;
+{
+ return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range,
+ regs, size);
+}
+
+
+/* Using the compiled pattern in PBUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on. RANGE is the number of
+ places to try before giving up. If RANGE is negative, it searches
+ backwards, i.e., the starting positions tried are STARTPOS, STARTPOS
+ - 1, etc. STRING1 and STRING2 are of SIZE1 and SIZE2, respectively.
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire PBUFP->buffer and its contained
+ subexpressions. Do not consider matching one past the index MSTOP in
+ the virtual concatenation of STRING1 and STRING2.
+
+ The value returned is the position in the strings at which the match
+ was found, or -1 if no match was found, or -2 if error (such as
+ failure stack overflow). */
+
+int
+re_search_2 (pbufp, string1, size1, string2, size2, startpos, range,
+ regs, mstop)
+ struct re_pattern_buffer *pbufp;
+ char *string1, *string2;
+ int size1, size2;
+ int startpos;
+ register int range;
+ struct re_registers *regs;
+ int mstop;
+{
+ register char *fastmap = pbufp->fastmap;
+ register unsigned char *translate = (unsigned char *) pbufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+ int val;
+
+ /* Check for out-of-range starting position. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up range if it would eventually take startpos outside of the
+ virtual concatenation of string1 and string2. */
+ if (endpos < -1)
+ range = -1 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !pbufp->fastmap_accurate)
+ re_compile_fastmap (pbufp);
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ long search for a pattern that says it is anchored. */
+ if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
+ && range > 0)
+ {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+
+ while (1)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot possibly be the start of a match. Note, however, that
+ if the pattern can possibly match the null string, we must
+ test it at each starting point so that we take the first null
+ string we get. */
+
+ if (fastmap && startpos < total_size && pbufp->can_be_null != 1)
+ {
+ if (range > 0) /* Searching forwards. */
+ {
+ register int lim = 0;
+ register unsigned char *p;
+ int irange = range;
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ p = ((unsigned char *)
+ &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
+
+ while (range > lim && !fastmap[translate
+ ? translate[*p++]
+ : *p++])
+ range--;
+ startpos += irange - range;
+ }
+ else /* Searching backwards. */
+ {
+ register unsigned char c;
+
+ if (string1 == 0 || startpos >= size1)
+ c = string2[startpos - size1];
+ else
+ c = string1[startpos];
+
+ c &= 0xff;
+ if (translate ? !fastmap[translate[c]] : !fastmap[c])
+ goto advance;
+ }
+ }
+
+ if (range >= 0 && startpos == total_size
+ && fastmap && pbufp->can_be_null == 0)
+ return -1;
+
+ val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,
+ regs, mstop);
+ if (val >= 0)
+ return startpos;
+ if (val == -2)
+ return -2;
+
+#ifndef NO_ALLOCA
+#ifdef C_ALLOCA
+ alloca (0);
+#endif /* C_ALLOCA */
+
+#endif /* NO_ALLOCA */
+ advance:
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+}
+
+
+
+#ifndef emacs /* emacs never uses this. */
+int
+re_match (pbufp, string, size, pos, regs)
+ struct re_pattern_buffer *pbufp;
+ char *string;
+ int size, pos;
+ struct re_registers *regs;
+{
+ return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size);
+}
+#endif /* not emacs */
+
+
+/* The following are used for re_match_2, defined below: */
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed. */
+
+int re_max_failures = 2000;
+
+/* Routine used by re_match_2. */
+/* static int memcmp_translate (); *//* already declared */
+
+
+/* Structure and accessing macros used in re_match_2: */
+
+struct register_info
+{
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+};
+
+#define IS_ACTIVE(R) ((R).is_active)
+#define MATCHED_SOMETHING(R) ((R).matched_something)
+
+
+/* Macros used by re_match_2: */
+
+
+/* I.e., regstart, regend, and reg_info. */
+
+#define NUM_REG_ITEMS 3
+
+/* We push at most this many things on the stack whenever we
+ fail. The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
+ arguments to the PUSH_FAILURE_POINT macro. */
+
+#define MAX_NUM_FAILURE_ITEMS (RE_NREGS * NUM_REG_ITEMS + 2)
+
+
+/* We push this many things on the stack whenever we fail. */
+
+#define NUM_FAILURE_ITEMS (last_used_reg * NUM_REG_ITEMS + 2)
+
+
+/* This pushes most of the information about the current state we will want
+ if we ever fail back to it. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place) \
+ { \
+ long last_used_reg, this_reg; \
+ \
+ /* Find out how many registers are active or have been matched. \
+ (Aside from register zero, which is only set at the end.) */ \
+ for (last_used_reg = RE_NREGS - 1; last_used_reg > 0; last_used_reg--)\
+ if (regstart[last_used_reg] != (unsigned char *)(-1L)) \
+ break; \
+ \
+ if (stacke - stackp < NUM_FAILURE_ITEMS) \
+ { \
+ unsigned char **stackx; \
+ unsigned int len = stacke - stackb; \
+ if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS) \
+ { \
+ FREE_AND_RETURN(stackb,(-2)); \
+ } \
+ \
+ /* Roughly double the size of the stack. */ \
+ stackx = DOUBLE_STACK(stackx,stackb,len); \
+ /* Rearrange the pointers. */ \
+ stackp = stackx + (stackp - stackb); \
+ stackb = stackx; \
+ stacke = stackb + 2 * len; \
+ } \
+ \
+ /* Now push the info for each of those registers. */ \
+ for (this_reg = 1; this_reg <= last_used_reg; this_reg++) \
+ { \
+ *stackp++ = regstart[this_reg]; \
+ *stackp++ = regend[this_reg]; \
+ *stackp++ = (unsigned char *) &reg_info[this_reg]; \
+ } \
+ \
+ /* Push how many registers we saved. */ \
+ *stackp++ = (unsigned char *) last_used_reg; \
+ \
+ *stackp++ = pattern_place; \
+ *stackp++ = string_place; \
+ }
+
+
+/* This pops what PUSH_FAILURE_POINT pushes. */
+
+#define POP_FAILURE_POINT() \
+ { \
+ int temp; \
+ stackp -= 2; /* Remove failure points. */ \
+ temp = (int) *--stackp; /* How many regs pushed. */ \
+ temp *= NUM_REG_ITEMS; /* How much to take off the stack. */ \
+ stackp -= temp; /* Remove the register info. */ \
+ }
+
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Is true if there is a first string and if PTR is pointing anywhere
+ inside it or just past the end. */
+
+#define IS_IN_FIRST_STRING(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+
+#define PREFETCH \
+ while (d == dend) \
+ { \
+ /* end of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* end of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Call this when have matched something; it sets `matched' flags for the
+ registers corresponding to the subexpressions of which we currently
+ are inside. */
+#define SET_REGS_MATCHED \
+ { unsigned this_reg; \
+ for (this_reg = 0; this_reg < RE_NREGS; this_reg++) \
+ { \
+ if (IS_ACTIVE(reg_info[this_reg])) \
+ MATCHED_SOMETHING(reg_info[this_reg]) = 1; \
+ else \
+ MATCHED_SOMETHING(reg_info[this_reg]) = 0; \
+ } \
+ }
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of string1 and string2. If there is only one string, we've put it in
+ string2. */
+
+#define AT_STRINGS_BEG (d == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END (d == end2)
+
+#define AT_WORD_BOUNDARY \
+ (AT_STRINGS_BEG || AT_STRINGS_END || IS_A_LETTER (d - 1) != IS_A_LETTER (d))
+
+/* We have two special cases to check for:
+ 1) if we're past the end of string1, we have to look at the first
+ character in string2;
+ 2) if we're before the beginning of string2, we have to look at the
+ last character in string1; we assume there is a string1, so use
+ this in conjunction with AT_STRINGS_BEG. */
+#define IS_A_LETTER(d) \
+ (SYNTAX ((d) == end1 ? *string2 : (d) == string2 - 1 ? *(end1 - 1) : *(d))\
+ == Sword)
+
+
+/* Match the pattern described by PBUFP against the virtual
+ concatenation of STRING1 and STRING2, which are of SIZE1 and SIZE2,
+ respectively. Start the match at index POS in the virtual
+ concatenation of STRING1 and STRING2. In REGS, return the indices of
+ the virtual concatenation of STRING1 and STRING2 that matched the
+ entire PBUFP->buffer and its contained subexpressions. Do not
+ consider matching one past the index MSTOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ If pbufp->fastmap is nonzero, then it had better be up to date.
+
+ The reason that the data to match are specified as two components
+ which are to be regarded as concatenated is so this function can be
+ used directly on the contents of an Emacs buffer.
+
+ -1 is returned if there is no match. -2 is returned if there is an
+ error (such as match stack overflow). Otherwise the value is the
+ length of the substring which was matched. */
+
+int
+re_match_2 (pbufp, string1_arg, size1, string2_arg, size2, pos, regs, mstop)
+ struct re_pattern_buffer *pbufp;
+ char *string1_arg, *string2_arg;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int mstop;
+{
+ register unsigned char *p = (unsigned char *) pbufp->buffer;
+
+ /* Pointer to beyond end of buffer. */
+ register unsigned char *pend = p + pbufp->used;
+
+ unsigned char *string1 = (unsigned char *) string1_arg;
+ unsigned char *string2 = (unsigned char *) string2_arg;
+ unsigned char *end1; /* Just past end of first string. */
+ unsigned char *end2; /* Just past end of second string. */
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ unsigned char *end_match_1, *end_match_2;
+
+ register unsigned char *d, *dend;
+ register int mcnt; /* Multipurpose. */
+ unsigned char *translate = (unsigned char *) pbufp->translate;
+ unsigned is_a_jump_n = 0;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to the
+ subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where to
+ resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is a
+ ``dummy''; if a failure happens and the failure point is a dummy, it
+ gets discarded and the next next one is tried. */
+
+#ifndef NO_ALLOCA
+ unsigned char *initial_stack[MAX_NUM_FAILURE_ITEMS * NFAILURES];
+#endif
+ unsigned char **stackb;
+ unsigned char **stackp;
+ unsigned char **stacke;
+
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+
+ unsigned char *regstart[RE_NREGS];
+ unsigned char *regend[RE_NREGS];
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+
+ struct register_info reg_info[RE_NREGS];
+
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+
+ unsigned best_regs_set = 0;
+ unsigned char *best_regstart[RE_NREGS];
+ unsigned char *best_regend[RE_NREGS];
+
+ /* Initialize the stack. */
+#ifdef NO_ALLOCA
+ stackb = (unsigned char **) malloc (MAX_NUM_FAILURE_ITEMS * NFAILURES * sizeof (char *));
+#else
+ stackb = initial_stack;
+#endif
+ stackp = stackb;
+ stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
+
+#ifdef DEBUG_REGEX
+ fprintf (stderr, "Entering re_match_2(%s%s)\n", string1_arg, string2_arg);
+#endif
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ \( or ( and \) or ) has been seen for. Also set all registers to
+ inactive and mark them as not having matched anything or ever
+ failed. */
+ for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt] = (unsigned char *) (-1L);
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ if (regs)
+ for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+
+ /* Set up pointers to ends of strings.
+ Don't allow the second string to be empty unless both are empty. */
+ if (size2 == 0)
+ {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+ if (mstop <= size1)
+ {
+ end_match_1 = string1 + mstop;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ end_match_2 = string2 + mstop - size1;
+ }
+
+ /* `p' scans through the pattern as `d' scans through the data. `dend'
+ is the end of the input string that `d' points within. `d' is
+ advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal string2. */
+
+ if (size1 != 0 && pos <= size1)
+ d = string1 + pos, dend = end_match_1;
+ else
+ d = string2 + pos - size1, dend = end_match_2;
+
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if match is complete, or it drops through if match fails
+ at this starting point in the input data. */
+
+ while (1)
+ {
+#ifdef DEBUG_REGEX
+ fprintf (stderr,
+ "regex loop(%d): matching 0x%02d\n",
+ p - (unsigned char *) pbufp->buffer,
+ *p);
+#endif
+ is_a_jump_n = 0;
+ /* End of pattern means we might have succeeded. */
+ if (p == pend)
+ {
+ /* If not end of string, try backtracking. Otherwise done. */
+ if (d != end_match_2)
+ {
+ if (stackp != stackb)
+ {
+ /* More failure points to try. */
+
+ unsigned in_same_string =
+ IS_IN_FIRST_STRING (best_regend[0])
+ == MATCHING_IN_FIRST_STRING;
+
+ /* If exceeds best match so far, save it. */
+ if (! best_regs_set
+ || (in_same_string && d > best_regend[0])
+ || (! in_same_string && ! MATCHING_IN_FIRST_STRING))
+ {
+ best_regs_set = 1;
+ best_regend[0] = d; /* Never use regstart[0]. */
+
+ for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+ /* If no failure points, don't restore garbage. */
+ else if (best_regs_set)
+ {
+ restore_best_regs:
+ /* Restore best match. */
+ d = best_regend[0];
+
+ for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ }
+
+ /* If caller wants register contents data back, convert it
+ to indices. */
+ if (regs)
+ {
+ regs->start[0] = pos;
+ if (MATCHING_IN_FIRST_STRING)
+ regs->end[0] = d - string1;
+ else
+ regs->end[0] = d - string2 + size1;
+ for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
+ {
+ if (regend[mcnt] == (unsigned char *)(-1L))
+ {
+ regs->start[mcnt] = -1;
+ regs->end[mcnt] = -1;
+ continue;
+ }
+ if (IS_IN_FIRST_STRING (regstart[mcnt]))
+ regs->start[mcnt] = regstart[mcnt] - string1;
+ else
+ regs->start[mcnt] = regstart[mcnt] - string2 + size1;
+
+ if (IS_IN_FIRST_STRING (regend[mcnt]))
+ regs->end[mcnt] = regend[mcnt] - string1;
+ else
+ regs->end[mcnt] = regend[mcnt] - string2 + size1;
+ }
+ }
+ FREE_AND_RETURN(stackb,
+ (d - pos - (MATCHING_IN_FIRST_STRING ?
+ string1 :
+ string2 - size1)));
+ }
+
+ /* Otherwise match next pattern command. */
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((enum regexpcode) *p++))
+#else
+ switch ((enum regexpcode) *p++)
+#endif
+ {
+
+ /* \( [or `(', as appropriate] is represented by start_memory,
+ \) by stop_memory. Both of those commands are followed by
+ a register number in the next byte. The text matched
+ within the \( and \) is recorded under that number. */
+ case start_memory:
+ regstart[*p] = d;
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+ p++;
+ break;
+
+ case stop_memory:
+ regend[*p] = d;
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* If just failed to match something this time around with a sub-
+ expression that's in a loop, try to force exit from the loop. */
+ if ((! MATCHED_SOMETHING (reg_info[*p])
+ || (enum regexpcode) p[-3] == start_memory)
+ && (p + 1) != pend)
+ {
+ register unsigned char *p2 = p + 1;
+ mcnt = 0;
+ switch (*p2++)
+ {
+ case jump_n:
+ is_a_jump_n = 1;
+ case finalize_jump:
+ case maybe_finalize_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p2);
+ if (is_a_jump_n)
+ p2 += 2;
+ break;
+ }
+ p2 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump, exit from the loop by forcing a
+ failure after pushing on the stack the on_failure_jump's
+ jump in the pattern, and d. */
+ if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump)
+ {
+ EXTRACT_NUMBER_AND_INCR (mcnt, p2);
+ PUSH_FAILURE_POINT (p2 + mcnt, d);
+ goto fail;
+ }
+ }
+ p++;
+ break;
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ int regno = *p++; /* Get which register to match against */
+ register unsigned char *d2, *dend2;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((IS_IN_FIRST_STRING (regstart[regno])
+ == IS_IN_FIRST_STRING (regend[regno]))
+ ? regend[regno] : end_match_1);
+ while (1)
+ {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2)
+ {
+ if (dend2 == end_match_2) break;
+ if (dend2 == regend[regno]) break;
+ d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2) break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH;
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? memcmp_translate (d, d2, mcnt, translate)
+ : memcmp ((char *)d, (char *)d2, mcnt))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+ }
+ }
+ break;
+
+ case anychar:
+ PREFETCH; /* Fetch a data character. */
+ /* Match anything but a newline, maybe even a null. */
+ if ((translate ? translate[*d] : *d) == '\n'
+ || ((obscure_syntax & RE_DOT_NOT_NULL)
+ && (translate ? translate[*d] : *d) == '\000'))
+ goto fail;
+ SET_REGS_MATCHED;
+ d++;
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ int not = 0; /* Nonzero for charset_not. */
+ register int c;
+ if (*(p - 1) == (unsigned char) charset_not)
+ not = 1;
+
+ PREFETCH; /* Fetch a data character. */
+
+ if (translate)
+ c = translate[*d];
+ else
+ c = *d;
+
+ if (c < *p * BYTEWIDTH
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not) goto fail;
+ SET_REGS_MATCHED;
+ d++;
+ break;
+ }
+
+ case begline:
+ if ((size1 != 0 && d == string1)
+ || (size1 == 0 && size2 != 0 && d == string2)
+ || (d && d[-1] == '\n')
+ || (size1 == 0 && size2 == 0))
+ break;
+ else
+ goto fail;
+
+ case endline:
+ if (d == end2
+ || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
+ break;
+ goto fail;
+
+ /* `or' constructs are handled by starting each alternative with
+ an on_failure_jump that points to the start of the next
+ alternative. Each alternative except the last ends with a
+ jump to the joining point. (Actually, each jump except for
+ the last one really jumps to the following jump, because
+ tensioning the jumps is a hassle.) */
+
+ /* The start of a stupid repeat has an on_failure_jump that points
+ past the end of the repeat text. This makes a failure point so
+ that on failure to match a repetition, matching restarts past
+ as many repetitions have been found with no way to fail and
+ look for another one. */
+
+ /* A smart repeat is similar but loops back to the on_failure_jump
+ so that each repetition makes another failure point. */
+
+ case on_failure_jump:
+ on_failure:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ PUSH_FAILURE_POINT (p + mcnt, d);
+ break;
+
+ /* The end of a smart repeat has a maybe_finalize_jump back.
+ Change it either to a finalize_jump or an ordinary jump. */
+ case maybe_finalize_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ {
+ register unsigned char *p2 = p;
+ /* Compare what follows with the beginning of the repeat.
+ If we can establish that there is nothing that they would
+ both match, we can change to finalize_jump. */
+ while (p2 + 1 != pend
+ && (*p2 == (unsigned char) stop_memory
+ || *p2 == (unsigned char) start_memory))
+ p2 += 2; /* Skip over reg number. */
+ if (p2 == pend)
+ p[-3] = (unsigned char) finalize_jump;
+ else if (*p2 == (unsigned char) exactn
+ || *p2 == (unsigned char) endline)
+ {
+ register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
+ register unsigned char *p1 = p + mcnt;
+ /* p1[0] ... p1[2] are an on_failure_jump.
+ Examine what follows that. */
+ if (p1[3] == (unsigned char) exactn && p1[5] != c)
+ p[-3] = (unsigned char) finalize_jump;
+ else if (p1[3] == (unsigned char) charset
+ || p1[3] == (unsigned char) charset_not)
+ {
+ int not = p1[3] == (unsigned char) charset_not;
+ if (c < p1[4] * BYTEWIDTH
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+ /* `not' is 1 if c would match. */
+ /* That means it is not safe to finalize. */
+ if (!not)
+ p[-3] = (unsigned char) finalize_jump;
+ }
+ }
+ }
+ p -= 2; /* Point at relative address again. */
+ if (p[-1] != (unsigned char) finalize_jump)
+ {
+ p[-1] = (unsigned char) jump;
+ goto nofinalize;
+ }
+ /* Note fall through. */
+
+ /* The end of a stupid repeat has a finalize_jump back to the
+ start, where another failure point will be made which will
+ point to after all the repetitions found so far. */
+
+ /* Take off failure points put on by matching on_failure_jump
+ because didn't fail. Also remove the register information
+ put on by the on_failure_jump. */
+ case finalize_jump:
+ POP_FAILURE_POINT ();
+ /* Note fall through. */
+
+ /* Jump without taking off any failure points. */
+ case jump:
+ nofinalize:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p += mcnt;
+ break;
+
+ case dummy_failure_jump:
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at finalize_jump. We will end up at
+ finalize_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for finalize_jump to pop. */
+ PUSH_FAILURE_POINT (0, 0);
+ goto nofinalize;
+
+
+ /* Have to succeed matching what follows at least n times. Then
+ just handle like an on_failure_jump. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt)
+ {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+ }
+ else if (mcnt == 0)
+ {
+ p[2] = unused;
+ p[3] = unused;
+ goto on_failure;
+ }
+ else
+ {
+ fprintf (stderr, "regex: the succeed_n's n is not set.\n");
+ exit (1);
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER(p + 2, mcnt);
+ goto nofinalize; /* Do the jump without taking off
+ any failure points. */
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ register unsigned char *p1;
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case unused:
+ break;
+
+ case wordbound:
+ if (AT_WORD_BOUNDARY)
+ break;
+ goto fail;
+
+ case notwordbound:
+ if (AT_WORD_BOUNDARY)
+ goto fail;
+ break;
+
+ case wordbeg:
+ if (IS_A_LETTER (d) && (!IS_A_LETTER (d - 1) || AT_STRINGS_BEG))
+ break;
+ goto fail;
+
+ case wordend:
+ /* Have to check if AT_STRINGS_BEG before looking at d - 1. */
+ if (!AT_STRINGS_BEG && IS_A_LETTER (d - 1)
+ && (!IS_A_LETTER (d) || AT_STRINGS_END))
+ break;
+ goto fail;
+
+#ifdef emacs
+ case before_dot:
+ if (PTR_CHAR_POS (d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ if (PTR_CHAR_POS (d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ if (PTR_CHAR_POS (d) <= point)
+ goto fail;
+ break;
+
+ case wordchar:
+ mcnt = (int) Sword;
+ goto matchsyntax;
+
+ case syntaxspec:
+ mcnt = *p++;
+ matchsyntax:
+ PREFETCH;
+ if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
+ SET_REGS_MATCHED;
+ break;
+
+ case notwordchar:
+ mcnt = (int) Sword;
+ goto matchnotsyntax;
+
+ case notsyntaxspec:
+ mcnt = *p++;
+ matchnotsyntax:
+ PREFETCH;
+ if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
+ SET_REGS_MATCHED;
+ break;
+
+#else /* not emacs */
+
+ case wordchar:
+ PREFETCH;
+ if (!IS_A_LETTER (d))
+ goto fail;
+ SET_REGS_MATCHED;
+ break;
+
+ case notwordchar:
+ PREFETCH;
+ if (IS_A_LETTER (d))
+ goto fail;
+ SET_REGS_MATCHED;
+ break;
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ case syntaxspec:
+ case notsyntaxspec:
+ break;
+
+#endif /* not emacs */
+
+ case begbuf:
+ if (AT_STRINGS_BEG)
+ break;
+ goto fail;
+
+ case endbuf:
+ if (AT_STRINGS_END)
+ break;
+ goto fail;
+
+ case exactn:
+ /* Match the next few pattern characters exactly.
+ mcnt is how many characters to match. */
+ mcnt = *p++;
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
+ {
+ do
+ {
+ PREFETCH;
+ if (translate[*d++] != *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ else
+ {
+ do
+ {
+ PREFETCH;
+ if (*d++ != *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED;
+ break;
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+ /* Jump here if any matching operation fails. */
+ fail:
+ if (stackp != stackb)
+ /* A restart point is known. Restart there and pop it. */
+ {
+ short last_used_reg, this_reg;
+
+ /* If this failure point is from a dummy_failure_point, just
+ skip it. */
+ if (!stackp[-2])
+ {
+ POP_FAILURE_POINT ();
+ goto fail;
+ }
+
+ d = *--stackp;
+ p = *--stackp;
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ /* Restore register info. */
+ last_used_reg = (long) *--stackp;
+
+ /* Make the ones that weren't saved -1 or 0 again. */
+ for (this_reg = RE_NREGS - 1; this_reg > last_used_reg; this_reg--)
+ {
+ regend[this_reg] = (unsigned char *) (-1L);
+ regstart[this_reg] = (unsigned char *) (-1L);
+ IS_ACTIVE (reg_info[this_reg]) = 0;
+ MATCHED_SOMETHING (reg_info[this_reg]) = 0;
+ }
+
+ /* And restore the rest from the stack. */
+ for ( ; this_reg > 0; this_reg--)
+ {
+ reg_info[this_reg] = *(struct register_info *) *--stackp;
+ regend[this_reg] = *--stackp;
+ regstart[this_reg] = *--stackp;
+ }
+ }
+ else
+ break; /* Matching at this starting point really fails. */
+ }
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_AND_RETURN(stackb,(-1)); /* Failure to match. */
+}
+
+
+static int
+memcmp_translate (s1, s2, len, translate)
+ unsigned char *s1, *s2;
+ register int len;
+ unsigned char *translate;
+{
+ register unsigned char *p1 = s1, *p2 = s2;
+ while (len)
+ {
+ if (translate [*p1++] != translate [*p2++]) return 1;
+ len--;
+ }
+ return 0;
+}
+
+
+
+/* Entry points compatible with 4.2 BSD regex library. */
+
+#if !defined(emacs) && !defined(GAWK)
+
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+re_comp (s)
+ char *s;
+{
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return "No previous regular expression";
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer)
+ {
+ if (!(re_comp_buf.buffer = (char *) malloc (200)))
+ return "Memory exhausted";
+ re_comp_buf.allocated = 200;
+ if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
+ return "Memory exhausted";
+ }
+ return re_compile_pattern (s, strlen (s), &re_comp_buf);
+}
+
+int
+re_exec (s)
+ char *s;
+{
+ int len = strlen (s);
+ return 0 <= re_search (&re_comp_buf, s, len, 0, len,
+ (struct re_registers *) 0);
+}
+#endif /* not emacs && not GAWK */
+
+
+
+#ifdef test
+
+#ifdef atarist
+long _stksize = 2L; /* reserve memory for stack */
+#endif
+#include <stdio.h>
+
+/* Indexed by a character, gives the upper case equivalent of the
+ character. */
+
+char upcase[0400] =
+ { 000, 001, 002, 003, 004, 005, 006, 007,
+ 010, 011, 012, 013, 014, 015, 016, 017,
+ 020, 021, 022, 023, 024, 025, 026, 027,
+ 030, 031, 032, 033, 034, 035, 036, 037,
+ 040, 041, 042, 043, 044, 045, 046, 047,
+ 050, 051, 052, 053, 054, 055, 056, 057,
+ 060, 061, 062, 063, 064, 065, 066, 067,
+ 070, 071, 072, 073, 074, 075, 076, 077,
+ 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
+ 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
+ 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
+ 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
+ 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
+ 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
+ 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
+ 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
+ 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
+ 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
+ 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
+ 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
+ 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
+ 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
+ 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
+ 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
+ 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
+ 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
+ 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
+ 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
+ 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
+ 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
+ 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
+ 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
+ };
+
+#ifdef canned
+
+#include "tests.h"
+
+typedef enum { extended_test, basic_test } test_type;
+
+/* Use this to run the tests we've thought of. */
+
+void
+main ()
+{
+ test_type t = extended_test;
+
+ if (t == basic_test)
+ {
+ printf ("Running basic tests:\n\n");
+ test_posix_basic ();
+ }
+ else if (t == extended_test)
+ {
+ printf ("Running extended tests:\n\n");
+ test_posix_extended ();
+ }
+}
+
+#else /* not canned */
+
+/* Use this to run interactive tests. */
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ char pat[80];
+ struct re_pattern_buffer buf;
+ int i;
+ char c;
+ char fastmap[(1 << BYTEWIDTH)];
+
+ /* Allow a command argument to specify the style of syntax. */
+ if (argc > 1)
+ obscure_syntax = atol (argv[1]);
+
+ buf.allocated = 40;
+ buf.buffer = (char *) malloc (buf.allocated);
+ buf.fastmap = fastmap;
+ buf.translate = upcase;
+
+ while (1)
+ {
+ gets (pat);
+
+ if (*pat)
+ {
+ re_compile_pattern (pat, strlen(pat), &buf);
+
+ for (i = 0; i < buf.used; i++)
+ printchar (buf.buffer[i]);
+
+ putchar ('\n');
+
+ printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
+
+ re_compile_fastmap (&buf);
+ printf ("Allowed by fastmap: ");
+ for (i = 0; i < (1 << BYTEWIDTH); i++)
+ if (fastmap[i]) printchar (i);
+ putchar ('\n');
+ }
+
+ gets (pat); /* Now read the string to match against */
+
+ i = re_match (&buf, pat, strlen (pat), 0, 0);
+ printf ("Match value %d.\n", i);
+ }
+}
+
+#endif
+
+
+#ifdef NOTDEF
+print_buf (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ int i;
+
+ printf ("buf is :\n----------------\n");
+ for (i = 0; i < bufp->used; i++)
+ printchar (bufp->buffer[i]);
+
+ printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
+
+ printf ("Allowed by fastmap: ");
+ for (i = 0; i < (1 << BYTEWIDTH); i++)
+ if (bufp->fastmap[i])
+ printchar (i);
+ printf ("\nAllowed by translate: ");
+ if (bufp->translate)
+ for (i = 0; i < (1 << BYTEWIDTH); i++)
+ if (bufp->translate[i])
+ printchar (i);
+ printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
+ printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
+}
+#endif /* NOTDEF */
+
+printchar (c)
+ char c;
+{
+ if (c < 040 || c >= 0177)
+ {
+ putchar ('\\');
+ putchar (((c >> 6) & 3) + '0');
+ putchar (((c >> 3) & 7) + '0');
+ putchar ((c & 7) + '0');
+ }
+ else
+ putchar (c);
+}
+
+error (string)
+ char *string;
+{
+ puts (string);
+ exit (1);
+}
+#endif /* test */
diff --git a/gnu/usr.bin/awk/regex.h b/gnu/usr.bin/awk/regex.h
new file mode 100644
index 000000000000..fce11c3a97dd
--- /dev/null
+++ b/gnu/usr.bin/awk/regex.h
@@ -0,0 +1,260 @@
+/* Definitions for data structures callers pass the regex library.
+
+ Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+
+#ifndef __REGEXP_LIBRARY
+#define __REGEXP_LIBRARY
+
+/* Define number of parens for which we record the beginnings and ends.
+ This affects how much space the `struct re_registers' type takes up. */
+#ifndef RE_NREGS
+#define RE_NREGS 10
+#endif
+
+#define BYTEWIDTH 8
+
+
+/* Maximum number of duplicates an interval can allow. */
+#ifndef RE_DUP_MAX
+#define RE_DUP_MAX ((1 << 15) - 1)
+#endif
+
+
+/* This defines the various regexp syntaxes. */
+extern long obscure_syntax;
+
+
+/* The following bits are used in the obscure_syntax variable to choose among
+ alternative regexp syntaxes. */
+
+/* If this bit is set, plain parentheses serve as grouping, and backslash
+ parentheses are needed for literal searching.
+ If not set, backslash-parentheses are grouping, and plain parentheses
+ are for literal searching. */
+#define RE_NO_BK_PARENS 1L
+
+/* If this bit is set, plain | serves as the `or'-operator, and \| is a
+ literal.
+ If not set, \| serves as the `or'-operator, and | is a literal. */
+#define RE_NO_BK_VBAR (1L << 1)
+
+/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are
+ literals.
+ If set, \+, \? are operators and plain +, ? are literals. */
+#define RE_BK_PLUS_QM (1L << 2)
+
+/* If this bit is set, | binds tighter than ^ or $.
+ If not set, the contrary. */
+#define RE_TIGHT_VBAR (1L << 3)
+
+/* If this bit is set, then treat newline as an OR operator.
+ If not set, treat it as a normal character. */
+#define RE_NEWLINE_OR (1L << 4)
+
+/* If this bit is set, then special characters may act as normal
+ characters in some contexts. Specifically, this applies to:
+ ^ -- only special at the beginning, or after ( or |;
+ $ -- only special at the end, or before ) or |;
+ *, +, ? -- only special when not after the beginning, (, or |.
+ If this bit is not set, special characters (such as *, ^, and $)
+ always have their special meaning regardless of the surrounding
+ context. */
+#define RE_CONTEXT_INDEP_OPS (1L << 5)
+
+/* If this bit is not set, then \ before anything inside [ and ] is taken as
+ a real \.
+ If set, then such a \ escapes the following character. This is a
+ special case for awk. */
+#define RE_AWK_CLASS_HACK (1L << 6)
+
+/* If this bit is set, then \{ and \} or { and } serve as interval operators.
+ If not set, then \{ and \} and { and } are treated as literals. */
+#define RE_INTERVALS (1L << 7)
+
+/* If this bit is not set, then \{ and \} serve as interval operators and
+ { and } are literals.
+ If set, then { and } serve as interval operators and \{ and \} are
+ literals. */
+#define RE_NO_BK_CURLY_BRACES (1L << 8)
+
+/* If this bit is set, then character classes are supported; they are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (1L << 9)
+
+/* If this bit is set, then the dot re doesn't match a null byte.
+ If not set, it does. */
+#define RE_DOT_NOT_NULL (1L << 10)
+
+/* If this bit is set, then [^...] doesn't match a newline.
+ If not set, it does. */
+#define RE_HAT_NOT_NEWLINE (1L << 11)
+
+/* If this bit is set, back references are recognized.
+ If not set, they aren't. */
+#define RE_NO_BK_REFS (1L << 12)
+
+/* If this bit is set, back references must refer to a preceding
+ subexpression. If not set, a back reference to a nonexistent
+ subexpression is treated as literal characters. */
+#define RE_NO_EMPTY_BK_REF (1L << 13)
+
+/* If this bit is set, bracket expressions can't be empty.
+ If it is set, they can be empty. */
+#define RE_NO_EMPTY_BRACKETS (1L << 14)
+
+/* If this bit is set, then *, +, ? and { cannot be first in an re or
+ immediately after a |, or a (. Furthermore, a | cannot be first or
+ last in an re, or immediately follow another | or a (. Also, a ^
+ cannot appear in a nonleading position and a $ cannot appear in a
+ nontrailing position (outside of bracket expressions, that is). */
+#define RE_CONTEXTUAL_INVALID_OPS (1L << 15)
+
+/* If this bit is set, then +, ? and | aren't recognized as operators.
+ If it's not, they are. */
+#define RE_LIMITED_OPS (1L << 16)
+
+/* If this bit is set, then an ending range point has to collate higher
+ or equal to the starting range point.
+ If it's not set, then when the ending range point collates higher
+ than the starting range point, the range is just considered empty. */
+#define RE_NO_EMPTY_RANGES (1L << 17)
+
+/* If this bit is set, then a hyphen (-) can't be an ending range point.
+ If it isn't, then it can. */
+#define RE_NO_HYPHEN_RANGE_END (1L << 18)
+
+
+/* Define combinations of bits for the standard possibilities. */
+#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INDEP_OPS)
+#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK)
+#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
+#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
+#define RE_SYNTAX_EMACS 0
+#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \
+ | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \
+ | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \
+ | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \
+ | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)
+
+#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \
+ | RE_NO_BK_VBAR | RE_NO_BK_PARENS \
+ | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \
+ | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
+ | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \
+ | RE_NO_HYPHEN_RANGE_END)
+
+
+/* This data structure is used to represent a compiled pattern. */
+
+struct re_pattern_buffer
+ {
+ char *buffer; /* Space holding the compiled pattern commands. */
+ long allocated; /* Size of space that `buffer' points to. */
+ long used; /* Length of portion of buffer actually occupied */
+ char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
+ /* re_search uses the fastmap, if there is one,
+ to skip over totally implausible characters. */
+ char *translate; /* Translate table to apply to all characters before
+ comparing, or zero for no translation.
+ The translation is applied to a pattern when it is
+ compiled and to data when it is matched. */
+ char fastmap_accurate;
+ /* Set to zero when a new pattern is stored,
+ set to one when the fastmap is updated from it. */
+ char can_be_null; /* Set to one by compiling fastmap
+ if this pattern might match the null string.
+ It does not necessarily match the null string
+ in that case, but if this is zero, it cannot.
+ 2 as value means can match null string
+ but at end of range or before a character
+ listed in the fastmap. */
+ };
+
+
+/* search.c (search_buffer) needs this one value. It is defined both in
+ regex.c and here. */
+#define RE_EXACTN_VALUE 1
+
+
+/* Structure to store register contents data in.
+
+ Pass the address of such a structure as an argument to re_match, etc.,
+ if you want this information back.
+
+ For i from 1 to RE_NREGS - 1, start[i] records the starting index in
+ the string of where the ith subexpression matched, and end[i] records
+ one after the ending index. start[0] and end[0] are analogous, for
+ the entire pattern. */
+
+struct re_registers
+ {
+ int start[RE_NREGS];
+ int end[RE_NREGS];
+ };
+
+
+
+#ifdef __STDC__
+
+extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *);
+/* Is this really advertised? */
+extern void re_compile_fastmap (struct re_pattern_buffer *);
+extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
+ struct re_registers *);
+extern int re_search_2 (struct re_pattern_buffer *, char *, int,
+ char *, int, int, int,
+ struct re_registers *, int);
+extern int re_match (struct re_pattern_buffer *, char *, int, int,
+ struct re_registers *);
+extern int re_match_2 (struct re_pattern_buffer *, char *, int,
+ char *, int, int, struct re_registers *, int);
+extern long re_set_syntax (long syntax);
+
+#ifndef GAWK
+/* 4.2 bsd compatibility. */
+extern char *re_comp (char *);
+extern int re_exec (char *);
+#endif
+
+#else /* !__STDC__ */
+
+extern char *re_compile_pattern ();
+/* Is this really advertised? */
+extern void re_compile_fastmap ();
+extern int re_search (), re_search_2 ();
+extern int re_match (), re_match_2 ();
+extern long re_set_syntax();
+
+#ifndef GAWK
+/* 4.2 bsd compatibility. */
+extern char *re_comp ();
+extern int re_exec ();
+#endif
+
+#endif /* __STDC__ */
+
+
+#ifdef SYNTAX_TABLE
+extern char *re_syntax_table;
+#endif
+
+#endif /* !__REGEXP_LIBRARY */
diff --git a/gnu/usr.bin/awk/version.c b/gnu/usr.bin/awk/version.c
new file mode 100644
index 000000000000..adea5fafacfb
--- /dev/null
+++ b/gnu/usr.bin/awk/version.c
@@ -0,0 +1,46 @@
+char *version_string = "@(#)Gnu Awk (gawk) 2.15";
+
+/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead
+ of the Right Hand Side */
+
+/* 1.03 Fixed split() to treat strings of space and tab as FS if
+ the split char is ' '.
+
+ Added -v option to print version number
+
+ Fixed bug that caused rounding when printing large numbers */
+
+/* 2.00beta Incorporated the functionality of the "new" awk as described
+ the book (reference not handy). Extensively tested, but no
+ doubt still buggy. Badly needs tuning and cleanup, in
+ particular in memory management which is currently almost
+ non-existent. */
+
+/* 2.01 JF: Modified to compile under GCC, and fixed a few
+ bugs while I was at it. I hope I didn't add any more.
+ I modified parse.y to reduce the number of reduce/reduce
+ conflicts. There are still a few left. */
+
+/* 2.02 Fixed JF's bugs; improved memory management, still needs
+ lots of work. */
+
+/* 2.10 Major grammar rework and lots of bug fixes from David.
+ Major changes for performance enhancements from David.
+ A number of minor bug fixes and new features from Arnold.
+ Changes for MSDOS from Conrad Kwok and Scott Garfinkle.
+ The gawk.texinfo and info files included! */
+
+/* 2.11 Bug fix release to 2.10. Lots of changes for portability,
+ speed, and configurability. */
+
+/* 2.12 Lots of changes for portability, speed, and configurability.
+ Several bugs fixed. POSIX compliance. Removal of last set
+ of hard-wired limits. Atari and VMS ports added. */
+
+/* 2.13 Public release of 2.12 */
+
+/* 2.14 Mostly bug fixes. */
+
+/* 2.15 Bug fixes plus intermixing of command-line source and files,
+ GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files. */
+
diff --git a/gnu/usr.bin/rcs/Makefile b/gnu/usr.bin/rcs/Makefile
new file mode 100644
index 000000000000..21818151d975
--- /dev/null
+++ b/gnu/usr.bin/rcs/Makefile
@@ -0,0 +1,3 @@
+SUBDIR= lib ci co ident merge rcs rcsdiff rcsmerge rlog rcsfreeze
+
+.include <bsd.subdir.mk>
diff --git a/gnu/usr.bin/rcs/Makefile.inc b/gnu/usr.bin/rcs/Makefile.inc
new file mode 100644
index 000000000000..b9eca7d52190
--- /dev/null
+++ b/gnu/usr.bin/rcs/Makefile.inc
@@ -0,0 +1,3 @@
+# @(#)Makefile.inc 5.1 (Berkeley) 5/11/90
+
+BINDIR?= /usr/bin
diff --git a/gnu/usr.bin/rcs/ci/Makefile b/gnu/usr.bin/rcs/ci/Makefile
new file mode 100644
index 000000000000..9b64e0848aac
--- /dev/null
+++ b/gnu/usr.bin/rcs/ci/Makefile
@@ -0,0 +1,7 @@
+PROG= ci
+
+SRCS= ci.c
+LDADD= -L${.CURDIR}/../lib/obj -lrcs
+CFLAGS+= -I${.CURDIR}/../lib
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/rcs/ci/ci.1 b/gnu/usr.bin/rcs/ci/ci.1
new file mode 100644
index 000000000000..5736dc95a001
--- /dev/null
+++ b/gnu/usr.bin/rcs/ci/ci.1
@@ -0,0 +1,772 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: ci.1,v 5.9 1991/10/07 17:32:46 eggert Exp $
+.ds r \&\s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.TH CI 1 \*(Dt GNU
+.SH NAME
+ci \- check in RCS revisions
+.SH SYNOPSIS
+.B ci
+.RI [ options ] " file " .\|.\|.
+.SH DESCRIPTION
+.B ci
+stores new revisions into \*r files.
+Each pathname matching an \*r suffix
+is taken to be an \*r file.
+All others
+are assumed to be working files containing new revisions.
+.B ci
+deposits the contents of each working file
+into the corresponding \*r file.
+If only a working file is given,
+.B ci
+tries to find the corresponding \*r file in an \*r subdirectory
+and then in the working file's directory.
+For more details, see
+.SM "FILE NAMING"
+below.
+.PP
+For
+.B ci
+to work, the caller's login must be on the access list,
+except if the access list is empty or the caller is the superuser or the
+owner of the file.
+To append a new revision to an existing branch, the tip revision on
+that branch must be locked by the caller. Otherwise, only a
+new branch can be created. This restriction is not enforced
+for the owner of the file if non-strict locking is used
+(see
+.BR rcs (1)).
+A lock held by someone else may be broken with the
+.B rcs
+command.
+.PP
+Unless the
+.B \-f
+option is given,
+.B ci
+checks whether the revision to be deposited differs from the preceding one.
+If not, instead of creating a new revision
+.B ci
+reverts to the preceding one.
+To revert, ordinary
+.B ci
+removes the working file and any lock;
+.B "ci\ \-l"
+keeps and
+.B "ci\ \-u"
+removes any lock, and then they both generate a new working file much as if
+.B "co\ \-l"
+or
+.B "co\ \-u"
+had been applied to the preceding revision.
+When reverting, any
+.B \-n
+and
+.B \-s
+options apply to the preceding revision.
+.PP
+For each revision deposited,
+.B ci
+prompts for a log message.
+The log message should summarize the change and must be terminated by
+end-of-file or by a line containing
+.BR \&. "\ by"
+itself.
+If several files are checked in
+.B ci
+asks whether to reuse the
+previous log message.
+If the standard input is not a terminal,
+.B ci
+suppresses the prompt
+and uses the same log message for all files.
+See also
+.BR \-m .
+.PP
+If the \*r file does not exist,
+.B ci
+creates it and
+deposits the contents of the working file as the initial revision
+(default number:
+.BR 1.1 ).
+The access list is initialized to empty.
+Instead of the log message,
+.B ci
+requests descriptive text (see
+.B \-t
+below).
+.PP
+The number
+.I rev
+of the deposited revision can be given by any of the options
+.BR \-f ,
+.BR \-I ,
+.BR \-k ,
+.BR \-l ,
+.BR \-M ,
+.BR \-q ,
+.BR \-r ,
+or
+.BR \-u .
+.I rev
+may be symbolic, numeric, or mixed.
+If
+.I rev
+is
+.BR $ ,
+.B ci
+determines the revision number from keyword values in the working file.
+.PP
+If
+.I rev
+is a revision number, it must be higher than the latest
+one on the branch to which
+.I rev
+belongs, or must start a new branch.
+.PP
+If
+.I rev
+is a branch rather than a revision number,
+the new revision is appended to that branch. The level number is obtained
+by incrementing the tip revision number of that branch.
+If
+.I rev
+indicates a non-existing branch,
+that branch is created with the initial revision numbered
+.IB rev .1\f1.\fP
+.br
+.ne 8
+.PP
+If
+.I rev
+is omitted,
+.B ci
+tries to derive the new revision number from
+the caller's last lock. If the caller has locked the tip revision of a branch,
+the new revision is appended to that branch.
+The new revision number is obtained
+by incrementing the tip revision number.
+If the caller locked a non-tip revision, a new branch is started at
+that revision by incrementing the highest branch number at that revision.
+The default initial branch and level numbers are
+.BR 1 .
+.PP
+If
+.I rev
+is omitted and the caller has no lock, but owns
+the file and locking
+is not set to
+.IR strict ,
+then the revision is appended to the
+default branch (normally the trunk; see the
+.B \-b
+option of
+.BR rcs (1)).
+.PP
+Exception: On the trunk, revisions can be appended to the end, but
+not inserted.
+.SH OPTIONS
+.TP
+.BR \-r [\f2rev\fP]
+checks in a revision, releases the corresponding lock, and
+removes the working file. This is the default.
+.RS
+.PP
+The
+.B \-r
+option has an unusual meaning in
+.BR ci .
+In other \*r commands,
+.B \-r
+merely specifies a revision number,
+but in
+.B ci
+it also releases a lock and removes the working file.
+See
+.B \-u
+for a tricky example.
+.RE
+.TP
+.BR \-l [\f2rev\fP]
+works like
+.BR \-r ,
+except it performs an additional
+.B "co\ \-l"
+for the
+deposited revision. Thus, the deposited revision is immediately
+checked out again and locked.
+This is useful for saving a revision although one wants to continue
+editing it after the checkin.
+.TP
+.BR \-u [\f2rev\fP]
+works like
+.BR \-l ,
+except that the deposited revision is not locked.
+This lets one read the working file
+immediately after checkin.
+.RS
+.PP
+The
+.BR \-l ,
+.BR \-r ,
+and
+.B \-u
+options are mutually exclusive and silently override each other.
+For example,
+.B "ci\ \-u\ \-r"
+is equivalent to
+.B "ci\ \-r"
+because
+.B \-r
+overrides
+.BR \-u .
+.RE
+.TP
+.BR \-f [\f2rev\fP]
+forces a deposit; the new revision is deposited even it is not different
+from the preceding one.
+.TP
+.BR \-k [\f2rev\fP]
+searches the working file for keyword values to determine its revision number,
+creation date, state, and author (see
+.BR co (1)),
+and assigns these
+values to the deposited revision, rather than computing them locally.
+It also generates a default login message noting the login of the caller
+and the actual checkin date.
+This option is useful for software distribution. A revision that is sent to
+several sites should be checked in with the
+.B \-k
+option at these sites to
+preserve the original number, date, author, and state.
+The extracted keyword values and the default log message may be overridden
+with the options
+.BR \-d ,
+.BR \-m ,
+.BR \-s ,
+.BR \-w ,
+and any option that carries a revision number.
+.TP
+.BR \-q [\f2rev\fP]
+quiet mode; diagnostic output is not printed.
+A revision that is not different from the preceding one is not deposited,
+unless
+.B \-f
+is given.
+.TP
+.BR \-I [\f2rev\fP]
+interactive mode;
+the user is prompted and questioned
+even if the standard input is not a terminal.
+.TP
+.BR \-d "[\f2date\fP]"
+uses
+.I date
+for the checkin date and time.
+The
+.I date
+is specified in free format as explained in
+.BR co (1).
+This is useful for lying about the checkin date, and for
+.B \-k
+if no date is available.
+If
+.I date
+is empty, the working file's time of last modification is used.
+.TP
+.BR \-M [\f2rev\fP]
+Set the modification time on any new working file
+to be the date of the retrieved revision.
+For example,
+.BI "ci\ \-d\ \-M\ \-u" "\ f"
+does not alter
+.IR f 's
+modification time, even if
+.IR f 's
+contents change due to keyword substitution.
+Use this option with care; it can confuse
+.BR make (1).
+.TP
+.BI \-m "msg"
+uses the string
+.I msg
+as the log message for all revisions checked in.
+.TP
+.BI \-n "name"
+assigns the symbolic name
+.I name
+to the number of the checked-in revision.
+.B ci
+prints an error message if
+.I name
+is already assigned to another
+number.
+.TP
+.BI \-N "name"
+same as
+.BR \-n ,
+except that it overrides a previous assignment of
+.IR name .
+.TP
+.BI \-s "state"
+sets the state of the checked-in revision to the identifier
+.IR state .
+The default state is
+.BR Exp .
+.TP
+.BI \-t file
+writes descriptive text from the contents of the named
+.I file
+into the \*r file,
+deleting the existing text.
+The
+.I file
+may not begin with
+.BR \- .
+.TP
+.BI \-t\- string
+Write descriptive text from the
+.I string
+into the \*r file, deleting the existing text.
+.RS
+.PP
+The
+.B \-t
+option, in both its forms, has effect only during an initial checkin;
+it is silently ignored otherwise.
+.PP
+During the initial checkin, if
+.B \-t
+is not given,
+.B ci
+obtains the text from standard input,
+terminated by end-of-file or by a line containing
+.BR \&. "\ by"
+itself.
+The user is prompted for the text if interaction is possible; see
+.BR \-I .
+.PP
+For backward compatibility with older versions of \*r, a bare
+.B \-t
+option is ignored.
+.RE
+.TP
+.BI \-w "login"
+uses
+.I login
+for the author field of the deposited revision.
+Useful for lying about the author, and for
+.B \-k
+if no author is available.
+.TP
+.BI \-V n
+Emulate \*r version
+.IR n .
+See
+.BR co (1)
+for details.
+.TP
+.BI \-x "suffixes"
+specifies the suffixes for \*r files.
+A nonempty suffix matches any pathname ending in the suffix.
+An empty suffix matches any pathname of the form
+.BI RCS/ file
+or
+.IB path /RCS/ file.
+The
+.B \-x
+option can specify a list of suffixes
+separated by
+.BR / .
+For example,
+.B \-x,v/
+specifies two suffixes:
+.B ,v
+and the empty suffix.
+If two or more suffixes are specified,
+they are tried in order when looking for an \*r file;
+the first one that works is used for that file.
+If no \*r file is found but an \*r file can be created,
+the suffixes are tried in order
+to determine the new \*r file's name.
+The default for
+.IR suffixes
+is installation-dependent; normally it is
+.B ,v/
+for hosts like Unix that permit commas in file names,
+and is empty (i.e. just the empty suffix) for other hosts.
+.SH "FILE NAMING"
+Pairs of \*r files and working files may be specified in three ways
+(see also the
+example section).
+.PP
+1) Both the \*r file and the working file are given. The \*r pathname is of
+the form
+.IB path1 / workfileX
+and the working pathname is of the form
+.IB path2 / workfile
+where
+.IB path1 /
+and
+.IB path2 /
+are (possibly different or empty) paths,
+.I workfile
+is a filename, and
+.I X
+is an \*r suffix.
+If
+.I X
+is empty,
+.IB path1 /
+must be
+.B RCS/
+or must end in
+.BR /RCS/ .
+.PP
+2) Only the \*r file is given. Then the working file is created in the current
+directory and its name is derived from the name of the \*r file
+by removing
+.IB path1 /
+and the suffix
+.IR X .
+.PP
+3) Only the working file is given.
+Then
+.B ci
+considers each \*r suffix
+.I X
+in turn, looking for an \*r file of the form
+.IB path2 /RCS/ workfileX
+or (if the former is not found and
+.I X
+is nonempty)
+.IB path2 / workfileX.
+.PP
+If the \*r file is specified without a path in 1) and 2),
+.B ci
+looks for the \*r file first in the directory
+.B ./RCS
+and then in the current
+directory.
+.PP
+.B ci
+reports an error if an attempt to open an \*r file fails for an unusual reason,
+even if the \*r file's pathname is just one of several possibilities.
+For example, to suppress use of \*r commands in a directory
+.IR d ,
+create a regular file named
+.IB d /RCS
+so that casual attempts to use \*r commands in
+.I d
+fail because
+.IB d /RCS
+is not a directory.
+.SH EXAMPLES
+Suppose
+.B ,v
+is an \*r suffix and the current directory contains a subdirectory
+.B RCS
+with an \*r file
+.BR io.c,v .
+Then each of the following commands check in a copy of
+.B io.c
+into
+.B RCS/io.c,v
+as the latest revision, removing
+.BR io.c .
+.LP
+.RS
+.nf
+.ft 3
+ci io.c; ci RCS/io.c,v; ci io.c,v;
+ci io.c RCS/io.c,v; ci io.c io.c,v;
+ci RCS/io.c,v io.c; ci io.c,v io.c;
+.ft
+.fi
+.RE
+.PP
+Suppose instead that the empty suffix
+is an \*r suffix and the current directory contains a subdirectory
+.B RCS
+with an \*r file
+.BR io.c .
+The each of the following commands checks in a new revision.
+.LP
+.RS
+.nf
+.ft 3
+ci io.c; ci RCS/io.c;
+ci io.c RCS/io.c;
+ci RCS/io.c io.c;
+.ft
+.fi
+.RE
+.SH "FILE MODES"
+An \*r file created by
+.B ci
+inherits the read and execute permissions
+from the working file. If the \*r file exists already,
+.B ci
+preserves its read and execute permissions.
+.B ci
+always turns off all write permissions of \*r files.
+.SH FILES
+Several temporary files may be created in the directory containing
+the working file, and also in the temporary directory (see
+.B \s-1TMPDIR\s0
+under
+.BR \s-1ENVIRONMENT\s0 ).
+A semaphore file or files are created in the directory containing the \*r file.
+With a nonempty suffix, the semaphore names begin with
+the first character of the suffix; therefore, do not specify an suffix
+whose first character could be that of a working filename.
+With an empty suffix, the semaphore names end with
+.B _
+so working filenames should not end in
+.BR _ .
+.PP
+.B ci
+never changes an \*r or working file.
+Normally,
+.B ci
+unlinks the file and creates a new one;
+but instead of breaking a chain of one or more symbolic links to an \*r file,
+it unlinks the destination file instead.
+Therefore,
+.B ci
+breaks any hard or symbolic links to any working file it changes;
+and hard links to \*r files are ineffective,
+but symbolic links to \*r files are preserved.
+.PP
+The effective user must be able to
+search and write the directory containing the \*r file.
+Normally, the real user must be able to
+read the \*r and working files
+and to search and write the directory containing the working file;
+however, some older hosts
+cannot easily switch between real and effective users,
+so on these hosts the effective user is used for all accesses.
+The effective user is the same as the real user
+unless your copies of
+.B ci
+and
+.B co
+have setuid privileges.
+As described in the next section,
+these privileges yield extra security if
+the effective user owns all \*r files and directories,
+and if only the effective user can write \*r directories.
+.PP
+Users can control access to \*r files by setting the permissions
+of the directory containing the files; only users with write access
+to the directory can use \*r commands to change its \*r files.
+For example, in hosts that allow a user to belong to several groups,
+one can make a group's \*r directories writable to that group only.
+This approach suffices for informal projects,
+but it means that any group member can arbitrarily change the group's \*r files,
+and can even remove them entirely.
+Hence more formal projects sometimes distinguish between an \*r administrator,
+who can change the \*r files at will, and other project members,
+who can check in new revisions but cannot otherwise change the \*r files.
+.SH "SETUID USE"
+To prevent anybody but their \*r administrator from deleting revisions,
+a set of users can employ setuid privileges as follows.
+.nr n \w'\(bu '+1n-1/1n
+.IP \(bu \nn
+Check that the host supports \*r setuid use.
+Consult a trustworthy expert if there are any doubts.
+It is best if the
+.B seteuid()
+system call works as described in Posix 1003.1a Draft 5,
+because \*r can switch back and forth easily
+between real and effective users, even if the real user is
+.BR root .
+If not, the second best is if the
+.B setuid()
+system call supports saved setuid
+(the {\s-1_POSIX_SAVED_IDS\s0} behavior of Posix 1003.1-1990);
+this fails only if the real user is
+.BR root .
+If \*r detects any failure in setuid, it quits immediately.
+.IP \(bu \nn
+Choose a user
+.I A
+to serve as \*r administrator for the set of users.
+Only
+.I A
+will be able to invoke the
+.B rcs
+command on the users' \*r files.
+.I A
+should not be
+.B root
+or any other user with special powers.
+Mutually suspicious sets of users should use different administrators.
+.IP \(bu \nn
+Choose a path name
+.I B
+that will be a directory of files to be executed by the users.
+.IP \(bu \nn
+Have
+.I A
+set up
+.I B
+to contain copies of
+.B ci
+and
+.B co
+that are setuid to
+.I A
+by copying the commands from their standard installation directory
+.I D
+as follows:
+.LP
+.RS
+.nf
+.ne 3
+\f3mkdir\fP \f2B\fP
+\f3cp\fP \f2D\fP\^\f3/c[io]\fP \f2B\fP
+\f3chmod go\-w,u+s\fP \f2B\fP\f3/c[io]\fP
+.fi
+.RE
+.IP \(bu \nn
+Have each user prepend
+.I B
+to their path as follows:
+.LP
+.RS
+.nf
+.ne 2
+\f3PATH=\fP\f2B\fP\f3:$PATH; export PATH\fP # ordinary shell
+\f3set path=(\fP\f2B\fP \f3$path)\fP # C shell
+.fi
+.RE
+.IP \(bu \nn
+Have
+.I A
+create each \*r directory
+.I R
+with write access only to
+.I A
+as follows:
+.LP
+.RS
+.nf
+.ne 2
+\f3mkdir\fP \f2R\fP
+\f3chmod go\-w\fP \f2R\fP
+.fi
+.RE
+.IP \(bu \nn
+If you want to let only certain users read the \*r files,
+put the users into a group
+.IR G ,
+and have
+.I A
+further protect the \*r directory as follows:
+.LP
+.RS
+.nf
+.ne 2
+\f3chgrp\fP \f2G R\fP
+\f3chmod g\-w,o\-rwx\fP \f2R\fP
+.fi
+.RE
+.IP \(bu \nn
+Have
+.I A
+copy old \*r files (if any) into
+.IR R ,
+to ensure that
+.I A
+owns them.
+.IP \(bu \nn
+An \*r file's access list limits who can check in and lock revisions.
+The default access list is empty,
+which grants checkin access to anyone who can read the \*r file.
+If you want limit checkin access,
+have
+.I A
+invoke
+.B "rcs\ \-a"
+on the file; see
+.BR rcs (1).
+In particular,
+.BI "rcs\ \-e\ \-a" A
+limits access to just
+.IR A .
+.IP \(bu \nn
+Have
+.I A
+initialize any new \*r files with
+.B "rcs\ \-i"
+before initial checkin, adding the
+.B \-a
+option if you want to limit checkin access.
+.IP \(bu \nn
+Give setuid privileges only to
+.BR ci ,
+.BR co ,
+and
+.BR rcsclean ;
+do not give them to
+.B rcs
+or to any other command.
+.IP \(bu \nn
+Do not use other setuid commands to invoke \*r commands;
+setuid is trickier than you think!
+.SH ENVIRONMENT
+.TP
+.B \s-1RCSINIT\s0
+options prepended to the argument list, separated by spaces.
+A backslash escapes spaces within an option.
+The
+.B \s-1RCSINIT\s0
+options are prepended to the argument lists of most \*r commands.
+Useful
+.B \s-1RCSINIT\s0
+options include
+.BR \-q ,
+.BR \-V ,
+and
+.BR \-x .
+.TP
+.B \s-1TMPDIR\s0
+Name of the temporary directory.
+If not set, the environment variables
+.B \s-1TMP\s0
+and
+.B \s-1TEMP\s0
+are inspected instead and the first value found is taken;
+if none of them are set,
+a host-dependent default is used, typically
+.BR /tmp .
+.SH DIAGNOSTICS
+For each revision,
+.B ci
+prints the \*r file, the working file, and the number
+of both the deposited and the preceding revision.
+The exit status is zero if and only if all operations were successful.
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH "SEE ALSO"
+co(1), ident(1), make(1), rcs(1), rcsclean(1), rcsdiff(1),
+rcsintro(1), rcsmerge(1), rlog(1), rcsfile(5)
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
+.br
diff --git a/gnu/usr.bin/rcs/ci/ci.c b/gnu/usr.bin/rcs/ci/ci.c
new file mode 100644
index 000000000000..566747e139a9
--- /dev/null
+++ b/gnu/usr.bin/rcs/ci/ci.c
@@ -0,0 +1,1165 @@
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+/*
+ * RCS checkin operation
+ */
+/*******************************************************************
+ * check revisions into RCS files
+ *******************************************************************
+ */
+
+
+
+/* $Log: ci.c,v $
+ * Revision 5.21 1991/11/20 17:58:07 eggert
+ * Don't read the delta tree from a nonexistent RCS file.
+ *
+ * Revision 5.20 1991/10/07 17:32:46 eggert
+ * Fix log bugs. Remove lint.
+ *
+ * Revision 5.19 1991/09/26 23:10:30 eggert
+ * Plug file descriptor leak.
+ *
+ * Revision 5.18 1991/09/18 07:29:10 eggert
+ * Work around a common ftruncate() bug.
+ *
+ * Revision 5.17 1991/09/10 22:15:46 eggert
+ * Fix test for redirected stdin.
+ *
+ * Revision 5.16 1991/08/19 23:17:54 eggert
+ * When there are no changes, revert to previous revision instead of aborting.
+ * Add piece tables, -M, -r$. Tune.
+ *
+ * Revision 5.15 1991/04/21 11:58:14 eggert
+ * Ensure that working file is newer than RCS file after ci -[lu].
+ * Add -x, RCSINIT, MS-DOS support.
+ *
+ * Revision 5.14 1991/02/28 19:18:47 eggert
+ * Don't let a setuid ci create a new RCS file; rcs -i -a must be run first.
+ * Fix ci -ko -l mode bug. Open work file at most once.
+ *
+ * Revision 5.13 1991/02/25 07:12:33 eggert
+ * getdate -> getcurdate (SVR4 name clash)
+ *
+ * Revision 5.12 1990/12/31 01:00:12 eggert
+ * Don't use uninitialized storage when handling -{N,n}.
+ *
+ * Revision 5.11 1990/12/04 05:18:36 eggert
+ * Use -I for prompts and -q for diagnostics.
+ *
+ * Revision 5.10 1990/11/05 20:30:10 eggert
+ * Don't remove working file when aborting due to no changes.
+ *
+ * Revision 5.9 1990/11/01 05:03:23 eggert
+ * Add -I and new -t behavior. Permit arbitrary data in logs.
+ *
+ * Revision 5.8 1990/10/04 06:30:09 eggert
+ * Accumulate exit status across files.
+ *
+ * Revision 5.7 1990/09/25 20:11:46 hammer
+ * fixed another small typo
+ *
+ * Revision 5.6 1990/09/24 21:48:50 hammer
+ * added cleanups from Paul Eggert.
+ *
+ * Revision 5.5 1990/09/21 06:16:38 hammer
+ * made it handle multiple -{N,n}'s. Also, made it treat re-directed stdin
+ * the same as the terminal
+ *
+ * Revision 5.4 1990/09/20 02:38:51 eggert
+ * ci -k now checks dates more thoroughly.
+ *
+ * Revision 5.3 1990/09/11 02:41:07 eggert
+ * Fix revision bug with `ci -k file1 file2'.
+ *
+ * Revision 5.2 1990/09/04 08:02:10 eggert
+ * Permit adjacent revisions with identical time stamps (possible on fast hosts).
+ * Improve incomplete line handling. Standardize yes-or-no procedure.
+ *
+ * Revision 5.1 1990/08/29 07:13:44 eggert
+ * Expand locker value like co. Clean old log messages too.
+ *
+ * Revision 5.0 1990/08/22 08:10:00 eggert
+ * Don't require a final newline.
+ * Make lock and temp files faster and safer.
+ * Remove compile-time limits; use malloc instead.
+ * Permit dates past 1999/12/31. Switch to GMT.
+ * Add setuid support. Don't pass +args to diff. Check diff's output.
+ * Ansify and Posixate. Add -k, -V. Remove snooping. Tune.
+ * Check diff's output.
+ *
+ * Revision 4.9 89/05/01 15:10:54 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.8 88/11/08 13:38:23 narten
+ * changes from root@seismo.CSS.GOV (Super User)
+ * -d with no arguments uses the mod time of the file it is checking in
+ *
+ * Revision 4.7 88/08/09 19:12:07 eggert
+ * Make sure workfile is a regular file; use its mode if RCSfile doesn't have one.
+ * Use execv(), not system(); allow cc -R; remove lint.
+ * isatty(fileno(stdin)) -> ttystdin()
+ *
+ * Revision 4.6 87/12/18 11:34:41 narten
+ * lint cleanups (from Guy Harris)
+ *
+ * Revision 4.5 87/10/18 10:18:48 narten
+ * Updating version numbers. Changes relative to revision 1.1 are actually
+ * relative to 4.3
+ *
+ * Revision 1.3 87/09/24 13:57:19 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:21:33 jenkins
+ * Port to suns
+ *
+ * Revision 4.3 83/12/15 12:28:54 wft
+ * ci -u and ci -l now set mode of working file properly.
+ *
+ * Revision 4.2 83/12/05 13:40:54 wft
+ * Merged with 3.9.1.1: added calls to clearerr(stdin).
+ * made rewriteflag external.
+ *
+ * Revision 4.1 83/05/10 17:03:06 wft
+ * Added option -d and -w, and updated assingment of date, etc. to new delta.
+ * Added handling of default branches.
+ * Option -k generates std. log message; fixed undef. pointer in reading of log.
+ * Replaced getlock() with findlock(), link--unlink with rename(),
+ * getpwuid() with getcaller().
+ * Moved all revision number generation to new routine addelta().
+ * Removed calls to stat(); now done by pairfilenames().
+ * Changed most calls to catchints() with restoreints().
+ * Directed all interactive messages to stderr.
+ *
+ * Revision 3.9.1.1 83/10/19 04:21:03 lepreau
+ * Added clearerr(stdin) to getlogmsg() for re-reading stdin.
+ *
+ * Revision 3.9 83/02/15 15:25:44 wft
+ * 4.2 prerelease
+ *
+ * Revision 3.9 83/02/15 15:25:44 wft
+ * Added call to fastcopy() to copy remainder of RCS file.
+ *
+ * Revision 3.8 83/01/14 15:34:05 wft
+ * Added ignoring of interrupts while new RCS file is renamed;
+ * Avoids deletion of RCS files by interrupts.
+ *
+ * Revision 3.7 82/12/10 16:09:20 wft
+ * Corrected checking of return code from diff.
+ *
+ * Revision 3.6 82/12/08 21:34:49 wft
+ * Using DATEFORM to prepare date of checked-in revision;
+ * Fixed return from addbranch().
+ *
+ * Revision 3.5 82/12/04 18:32:42 wft
+ * Replaced getdelta() with gettree(), SNOOPDIR with SNOOPFILE. Updated
+ * field lockedby in removelock(), moved getlogmsg() before calling diff.
+ *
+ * Revision 3.4 82/12/02 13:27:13 wft
+ * added option -k.
+ *
+ * Revision 3.3 82/11/28 20:53:31 wft
+ * Added mustcheckin() to check for redundant checkins.
+ * Added xpandfile() to do keyword expansion for -u and -l;
+ * -m appends linefeed to log message if necessary.
+ * getlogmsg() suppresses prompt if stdin is not a terminal.
+ * Replaced keeplock with lockflag, fclose() with ffclose(),
+ * %02d with %.2d, getlogin() with getpwuid().
+ *
+ * Revision 3.2 82/10/18 20:57:23 wft
+ * An RCS file inherits its mode during the first ci from the working file,
+ * otherwise it stays the same, except that write permission is removed.
+ * Fixed ci -l, added ci -u (both do an implicit co after the ci).
+ * Fixed call to getlogin(), added call to getfullRCSname(), added check
+ * for write error.
+ * Changed conflicting identifiers.
+ *
+ * Revision 3.1 82/10/13 16:04:59 wft
+ * fixed type of variables receiving from getc() (char -> int).
+ * added include file dbm.h for getting BYTESIZ. This is used
+ * to check the return code from diff portably.
+ */
+
+#include "rcsbase.h"
+
+struct Symrev {
+ char const *ssymbol;
+ int override;
+ struct Symrev * nextsym;
+};
+
+static char const *getcurdate P((void));
+static int addbranch P((struct hshentry*,struct buf*));
+static int addelta P((void));
+static int addsyms P((char const*));
+static int fixwork P((mode_t,char const*));
+static int removelock P((struct hshentry*));
+static int xpandfile P((RILE*,char const*,struct hshentry const*,char const**));
+static struct cbuf getlogmsg P((void));
+static void cleanup P((void));
+static void incnum P((char const*,struct buf*));
+static void addassoclst P((int, char *));
+
+static FILE *exfile;
+static RILE *workptr; /* working file pointer */
+static struct buf newdelnum; /* new revision number */
+static struct cbuf msg;
+static int exitstatus;
+static int forceciflag; /* forces check in */
+static int keepflag, keepworkingfile, rcsinitflag;
+static struct hshentries *gendeltas; /* deltas to be generated */
+static struct hshentry *targetdelta; /* old delta to be generated */
+static struct hshentry newdelta; /* new delta to be inserted */
+static struct stat workstat;
+static struct Symrev *assoclst, *lastassoc;
+
+mainProg(ciId, "ci", "$Id: ci.c,v 5.21 1991/11/20 17:58:07 eggert Exp $")
+{
+ static char const cmdusage[] =
+ "\nci usage: ci -{fklqru}[rev] -mmsg -{nN}name -sstate -t[textfile] -Vn file ...";
+ static char const default_state[] = DEFAULTSTATE;
+
+ char altdate[datesize];
+ char olddate[datesize];
+ char newdatebuf[datesize], targetdatebuf[datesize];
+ char *a, **newargv, *textfile;
+ char const *author, *krev, *rev, *state;
+ char const *diffilename, *expfilename;
+ char const *workdiffname, *newworkfilename;
+ char const *mtime;
+ int lockflag, lockthis, mtimeflag, removedlock;
+ int r;
+ int changedRCS, changework, newhead;
+ int usestatdate; /* Use mod time of file for -d. */
+ mode_t newworkmode; /* mode for working file */
+ struct hshentry *workdelta;
+
+ setrid();
+
+ author = rev = state = textfile = nil;
+ lockflag = false;
+ mtimeflag = false;
+ altdate[0]= '\0'; /* empty alternate date for -d */
+ usestatdate=false;
+ suffixes = X_DEFAULT;
+
+ argc = getRCSINIT(argc, argv, &newargv);
+ argv = newargv;
+ while (a = *++argv, 0<--argc && *a++=='-') {
+ switch (*a++) {
+
+ case 'r':
+ keepworkingfile = lockflag = false;
+ revno:
+ if (*a) {
+ if (rev) warn("redefinition of revision number");
+ rev = a;
+ }
+ break;
+
+ case 'l':
+ keepworkingfile=lockflag=true;
+ goto revno;
+
+ case 'u':
+ keepworkingfile=true; lockflag=false;
+ goto revno;
+
+ case 'I':
+ interactiveflag = true;
+ goto revno;
+
+ case 'q':
+ quietflag=true;
+ goto revno;
+
+ case 'f':
+ forceciflag=true;
+ goto revno;
+
+ case 'k':
+ keepflag=true;
+ goto revno;
+
+ case 'm':
+ if (msg.size) redefined('m');
+ msg = cleanlogmsg(a, strlen(a));
+ if (!msg.size)
+ warn("missing message for -m option");
+ break;
+
+ case 'n':
+ if (!*a) {
+ error("missing symbolic name after -n");
+ break;
+ }
+ checksid(a);
+ addassoclst(false, a);
+ break;
+
+ case 'N':
+ if (!*a) {
+ error("missing symbolic name after -N");
+ break;
+ }
+ checksid(a);
+ addassoclst(true, a);
+ break;
+
+ case 's':
+ if (*a) {
+ if (state) redefined('s');
+ checksid(a);
+ state = a;
+ } else
+ warn("missing state for -s option");
+ break;
+
+ case 't':
+ if (*a) {
+ if (textfile) redefined('t');
+ textfile = a;
+ }
+ break;
+
+ case 'd':
+ if (altdate[0] || usestatdate)
+ redefined('d');
+ altdate[0] = 0;
+ if (!(usestatdate = !*a))
+ str2date(a, altdate);
+ break;
+
+ case 'M':
+ mtimeflag = true;
+ goto revno;
+
+ case 'w':
+ if (*a) {
+ if (author) redefined('w');
+ checksid(a);
+ author = a;
+ } else
+ warn("missing author for -w option");
+ break;
+
+ case 'x':
+ suffixes = a;
+ break;
+
+ case 'V':
+ setRCSversion(*argv);
+ break;
+
+
+
+ default:
+ faterror("unknown option: %s%s", *argv, cmdusage);
+ };
+ } /* end processing of options */
+
+ if (argc<1) faterror("no input file%s", cmdusage);
+
+ /* now handle all filenames */
+ do {
+ targetdelta=nil;
+ ffree();
+
+ switch (pairfilenames(argc, argv, rcswriteopen, false, false)) {
+
+ case -1: /* New RCS file */
+# if has_setuid && has_getuid
+ if (euid() != ruid()) {
+ error("setuid initial checkin prohibited; use `rcs -i -a' first");
+ continue;
+ }
+# endif
+ rcsinitflag = true;
+ break;
+
+ case 0: /* Error */
+ continue;
+
+ case 1: /* Normal checkin with prev . RCS file */
+ rcsinitflag = !Head;
+ }
+
+ /* now RCSfilename contains the name of the RCS file, and
+ * workfilename contains the name of the working file.
+ * If the RCS file exists, finptr contains the file descriptor for the
+ * RCS file. The admin node is initialized.
+ * RCSstat is set.
+ */
+
+ diagnose("%s <-- %s\n", RCSfilename,workfilename);
+
+ if (!(workptr = Iopen(workfilename, FOPEN_R_WORK, &workstat))) {
+ eerror(workfilename);
+ continue;
+ }
+ if (finptr && !checkaccesslist()) continue; /* give up */
+
+ krev = rev;
+ if (keepflag) {
+ /* get keyword values from working file */
+ if (!getoldkeys(workptr)) continue;
+ if (!rev && !*(krev = prevrev.string)) {
+ error("can't find a revision number in %s",workfilename);
+ continue;
+ }
+ if (!*prevdate.string && *altdate=='\0' && usestatdate==false)
+ warn("can't find a date in %s", workfilename);
+ if (!*prevauthor.string && !author)
+ warn("can't find an author in %s", workfilename);
+ if (!*prevstate.string && !state)
+ warn("can't find a state in %s", workfilename);
+ } /* end processing keepflag */
+
+ /* Read the delta tree. */
+ if (finptr)
+ gettree();
+
+ /* expand symbolic revision number */
+ if (!fexpandsym(krev, &newdelnum, workptr))
+ continue;
+
+ /* splice new delta into tree */
+ if ((removedlock = addelta()) < 0)
+ continue;
+
+ newdelta.num = newdelnum.string;
+ newdelta.branches=nil;
+ newdelta.lockedby=nil; /*might be changed by addlock() */
+ newdelta.selector = true;
+ /* set author */
+ if (author!=nil)
+ newdelta.author=author; /* set author given by -w */
+ else if (keepflag && *prevauthor.string)
+ newdelta.author=prevauthor.string; /* preserve old author if possible*/
+ else newdelta.author=getcaller();/* otherwise use caller's id */
+ newdelta.state = default_state;
+ if (state!=nil)
+ newdelta.state=state; /* set state given by -s */
+ else if (keepflag && *prevstate.string)
+ newdelta.state=prevstate.string; /* preserve old state if possible */
+ if (usestatdate) {
+ time2date(workstat.st_mtime, altdate);
+ }
+ if (*altdate!='\0')
+ newdelta.date=altdate; /* set date given by -d */
+ else if (keepflag && *prevdate.string) {
+ /* Preserve old date if possible. */
+ str2date(prevdate.string, olddate);
+ newdelta.date = olddate;
+ } else
+ newdelta.date = getcurdate(); /* use current date */
+ /* now check validity of date -- needed because of -d and -k */
+ if (targetdelta!=nil &&
+ cmpnum(newdelta.date,targetdelta->date) < 0) {
+ error("Date %s precedes %s in existing revision %s.",
+ date2str(newdelta.date, newdatebuf),
+ date2str(targetdelta->date, targetdatebuf),
+ targetdelta->num
+ );
+ continue;
+ }
+
+
+ if (lockflag && addlock(&newdelta) < 0) continue;
+ if (!addsyms(newdelta.num))
+ continue;
+
+
+ putadmin(frewrite);
+ puttree(Head,frewrite);
+ putdesc(false,textfile);
+
+ changework = Expand != OLD_EXPAND;
+ lockthis = lockflag;
+ workdelta = &newdelta;
+
+ /* build rest of file */
+ if (rcsinitflag) {
+ diagnose("initial revision: %s\n", newdelnum.string);
+ /* get logmessage */
+ newdelta.log=getlogmsg();
+ if (!putdftext(newdelnum.string,newdelta.log,workptr,frewrite,false)) continue;
+ RCSstat.st_mode = workstat.st_mode;
+ changedRCS = true;
+ } else {
+ diffilename = maketemp(0);
+ workdiffname = workfilename;
+ if (workdiffname[0] == '+') {
+ /* Some diffs have options with leading '+'. */
+ char *dp = ftnalloc(char, strlen(workfilename)+3);
+ workdiffname = dp;
+ *dp++ = '.';
+ *dp++ = SLASH;
+ VOID strcpy(dp, workfilename);
+ }
+ newhead = Head == &newdelta;
+ if (!newhead)
+ foutptr = frewrite;
+ expfilename = buildrevision(
+ gendeltas, targetdelta, (FILE*)0, false
+ );
+ if (
+ !forceciflag &&
+ (changework = rcsfcmp(
+ workptr, &workstat, expfilename, targetdelta
+ )) <= 0
+ ) {
+ diagnose("file is unchanged; reverting to previous revision %s\n",
+ targetdelta->num
+ );
+ if (removedlock < lockflag) {
+ diagnose("previous revision was not locked; ignoring -l option\n");
+ lockthis = 0;
+ }
+ if (!(changedRCS =
+ lockflag < removedlock
+ || assoclst
+ || newdelta.state != default_state
+ && strcmp(newdelta.state, targetdelta->state) != 0
+ ))
+ workdelta = targetdelta;
+ else {
+ /*
+ * We have started to build the wrong new RCS file.
+ * Start over from the beginning.
+ */
+ long hwm = ftell(frewrite);
+ int bad_truncate;
+ if (fseek(frewrite, 0L, SEEK_SET) != 0)
+ Oerror();
+# if !has_ftruncate
+ bad_truncate = 1;
+# else
+ /*
+ * Work around a common ftruncate() bug.
+ * We can't rely on has_truncate, because we might
+ * be using a filesystem exported to us via NFS.
+ */
+ bad_truncate = ftruncate(fileno(frewrite),(off_t)0);
+ if (bad_truncate && errno != EACCES)
+ Oerror();
+# endif
+ Irewind(finptr);
+ Lexinit();
+ getadmin();
+ gettree();
+ if (!(workdelta = genrevs(
+ targetdelta->num, (char*)0, (char*)0, (char*)0,
+ &gendeltas
+ )))
+ continue;
+ workdelta->log = targetdelta->log;
+ if (newdelta.state != default_state)
+ workdelta->state = newdelta.state;
+ if (removedlock && removelock(workdelta)<0)
+ continue;
+ if (!addsyms(workdelta->num))
+ continue;
+ if (!dorewrite(true, true))
+ continue;
+ fastcopy(finptr, frewrite);
+ if (bad_truncate)
+ while (ftell(frewrite) < hwm)
+ /* White out any earlier mistake with '\n's. */
+ /* This is unlikely. */
+ afputc('\n', frewrite);
+ }
+ } else {
+ diagnose("new revision: %s; previous revision: %s\n",
+ newdelnum.string, targetdelta->num
+ );
+ newdelta.log = getlogmsg();
+ switch (run((char*)0, diffilename,
+ DIFF DIFF_FLAGS,
+ newhead ? workdiffname : expfilename,
+ newhead ? expfilename : workdiffname,
+ (char*)0
+ )) {
+ case DIFF_FAILURE: case DIFF_SUCCESS: break;
+ default: faterror("diff failed");
+ }
+ if (newhead) {
+ Irewind(workptr);
+ if (!putdftext(newdelnum.string,newdelta.log,workptr,frewrite,false)) continue;
+ if (!putdtext(targetdelta->num,targetdelta->log,diffilename,frewrite,true)) continue;
+ } else
+ if (!putdtext(newdelnum.string,newdelta.log,diffilename,frewrite,true)) continue;
+ changedRCS = true;
+ }
+ }
+ if (!donerewrite(changedRCS))
+ continue;
+
+ if (!keepworkingfile) {
+ Izclose(&workptr);
+ r = un_link(workfilename); /* Get rid of old file */
+ } else {
+ newworkmode = WORKMODE(RCSstat.st_mode,
+ ! (Expand==VAL_EXPAND || lockthis < StrictLocks)
+ );
+ mtime = mtimeflag ? workdelta->date : (char const*)0;
+
+ /* Expand if it might change or if we can't fix mode, time. */
+ if (changework || (r=fixwork(newworkmode,mtime)) != 0) {
+ Irewind(workptr);
+ /* Expand keywords in file. */
+ locker_expansion = lockthis;
+ switch (xpandfile(
+ workptr, workfilename,
+ workdelta, &newworkfilename
+ )) {
+ default:
+ continue;
+
+ case 0:
+ /*
+ * No expansion occurred; try to reuse working file
+ * unless we already tried and failed.
+ */
+ if (changework)
+ if ((r=fixwork(newworkmode,mtime)) == 0)
+ break;
+ /* fall into */
+ case 1:
+ if (!(r = setfiledate(newworkfilename,mtime))) {
+ Izclose(&workptr);
+ ignoreints();
+ r = chnamemod(&exfile, newworkfilename, workfilename, newworkmode);
+ keepdirtemp(newworkfilename);
+ restoreints();
+ }
+ }
+ }
+ }
+ if (r != 0) {
+ eerror(workfilename);
+ continue;
+ }
+ diagnose("done\n");
+
+ } while (cleanup(),
+ ++argv, --argc >=1);
+
+ tempunlink();
+ exitmain(exitstatus);
+} /* end of main (ci) */
+
+ static void
+cleanup()
+{
+ if (nerror) exitstatus = EXIT_FAILURE;
+ Izclose(&finptr);
+ Izclose(&workptr);
+ Ozclose(&exfile);
+ Ozclose(&fcopy);
+ Ozclose(&frewrite);
+ dirtempunlink();
+}
+
+#if lint
+# define exiterr ciExit
+#endif
+ exiting void
+exiterr()
+{
+ dirtempunlink();
+ tempunlink();
+ _exit(EXIT_FAILURE);
+}
+
+/*****************************************************************/
+/* the rest are auxiliary routines */
+
+
+ static int
+addelta()
+/* Function: Appends a delta to the delta tree, whose number is
+ * given by newdelnum. Updates Head, newdelnum, newdelnumlength,
+ * and the links in newdelta.
+ * Return -1 on error, 1 if a lock is removed, 0 otherwise.
+ */
+{
+ register char *tp;
+ register unsigned i;
+ int removedlock;
+ unsigned newdnumlength; /* actual length of new rev. num. */
+
+ newdnumlength = countnumflds(newdelnum.string);
+
+ if (rcsinitflag) {
+ /* this covers non-existing RCS file and a file initialized with rcs -i */
+ if ((newdnumlength==0)&&(Dbranch!=nil)) {
+ bufscpy(&newdelnum, Dbranch);
+ newdnumlength = countnumflds(Dbranch);
+ }
+ if (newdnumlength==0) bufscpy(&newdelnum, "1.1");
+ else if (newdnumlength==1) bufscat(&newdelnum, ".1");
+ else if (newdnumlength>2) {
+ error("Branch point doesn't exist for %s.",newdelnum.string);
+ return -1;
+ } /* newdnumlength == 2 is OK; */
+ Head = &newdelta;
+ newdelta.next=nil;
+ return 0;
+ }
+ if (newdnumlength==0) {
+ /* derive new revision number from locks */
+ switch (findlock(true, &targetdelta)) {
+
+ default:
+ /* found two or more old locks */
+ return -1;
+
+ case 1:
+ /* found an old lock */
+ /* check whether locked revision exists */
+ if (!genrevs(targetdelta->num,(char*)0,(char*)0,(char*)0,&gendeltas))
+ return -1;
+ if (targetdelta==Head) {
+ /* make new head */
+ newdelta.next=Head;
+ Head= &newdelta;
+ } else if (!targetdelta->next && countnumflds(targetdelta->num)>2) {
+ /* new tip revision on side branch */
+ targetdelta->next= &newdelta;
+ newdelta.next = nil;
+ } else {
+ /* middle revision; start a new branch */
+ bufscpy(&newdelnum, "");
+ return addbranch(targetdelta,&newdelnum);
+ }
+ incnum(targetdelta->num, &newdelnum);
+ return 1; /* successful use of existing lock */
+
+ case 0:
+ /* no existing lock; try Dbranch */
+ /* update newdelnum */
+ if (StrictLocks || !myself(RCSstat.st_uid)) {
+ error("no lock set by %s",getcaller());
+ return -1;
+ }
+ if (Dbranch) {
+ bufscpy(&newdelnum, Dbranch);
+ } else {
+ incnum(Head->num, &newdelnum);
+ }
+ newdnumlength = countnumflds(newdelnum.string);
+ /* now fall into next statement */
+ }
+ }
+ if (newdnumlength<=2) {
+ /* add new head per given number */
+ if(newdnumlength==1) {
+ /* make a two-field number out of it*/
+ if (cmpnumfld(newdelnum.string,Head->num,1)==0)
+ incnum(Head->num, &newdelnum);
+ else
+ bufscat(&newdelnum, ".1");
+ }
+ if (cmpnum(newdelnum.string,Head->num) <= 0) {
+ error("deltanumber %s too low; must be higher than %s",
+ newdelnum.string, Head->num);
+ return -1;
+ }
+ targetdelta = Head;
+ if (0 <= (removedlock = removelock(Head))) {
+ if (!genrevs(Head->num,(char*)0,(char*)0,(char*)0,&gendeltas))
+ return -1;
+ newdelta.next = Head;
+ Head = &newdelta;
+ }
+ return removedlock;
+ } else {
+ /* put new revision on side branch */
+ /*first, get branch point */
+ tp = newdelnum.string;
+ for (i = newdnumlength - (newdnumlength&1 ^ 1); (--i); )
+ while (*tp++ != '.')
+ ;
+ *--tp = 0; /* Kill final dot to get old delta temporarily. */
+ if (!(targetdelta=genrevs(newdelnum.string,(char*)nil,(char*)nil,(char*)nil,&gendeltas)))
+ return -1;
+ if (cmpnum(targetdelta->num, newdelnum.string) != 0) {
+ error("can't find branchpoint %s", newdelnum.string);
+ return -1;
+ }
+ *tp = '.'; /* Restore final dot. */
+ return addbranch(targetdelta,&newdelnum);
+ }
+}
+
+
+
+ static int
+addbranch(branchpoint,num)
+ struct hshentry *branchpoint;
+ struct buf *num;
+/* adds a new branch and branch delta at branchpoint.
+ * If num is the null string, appends the new branch, incrementing
+ * the highest branch number (initially 1), and setting the level number to 1.
+ * the new delta and branchhead are in globals newdelta and newbranch, resp.
+ * the new number is placed into num.
+ * Return -1 on error, 1 if a lock is removed, 0 otherwise.
+ */
+{
+ struct branchhead *bhead, **btrail;
+ struct buf branchnum;
+ int removedlock, result;
+ unsigned field, numlength;
+ static struct branchhead newbranch; /* new branch to be inserted */
+
+ numlength = countnumflds(num->string);
+
+ if (branchpoint->branches==nil) {
+ /* start first branch */
+ branchpoint->branches = &newbranch;
+ if (numlength==0) {
+ bufscpy(num, branchpoint->num);
+ bufscat(num, ".1.1");
+ } else if (numlength&1)
+ bufscat(num, ".1");
+ newbranch.nextbranch=nil;
+
+ } else if (numlength==0) {
+ /* append new branch to the end */
+ bhead=branchpoint->branches;
+ while (bhead->nextbranch) bhead=bhead->nextbranch;
+ bhead->nextbranch = &newbranch;
+ bufautobegin(&branchnum);
+ getbranchno(bhead->hsh->num, &branchnum);
+ incnum(branchnum.string, num);
+ bufautoend(&branchnum);
+ bufscat(num, ".1");
+ newbranch.nextbranch=nil;
+ } else {
+ /* place the branch properly */
+ field = numlength - (numlength&1 ^ 1);
+ /* field of branch number */
+ btrail = &branchpoint->branches;
+ while (0 < (result=cmpnumfld(num->string,(*btrail)->hsh->num,field))) {
+ btrail = &(*btrail)->nextbranch;
+ if (!*btrail) {
+ result = -1;
+ break;
+ }
+ }
+ if (result < 0) {
+ /* insert/append new branchhead */
+ newbranch.nextbranch = *btrail;
+ *btrail = &newbranch;
+ if (numlength&1) bufscat(num, ".1");
+ } else {
+ /* branch exists; append to end */
+ bufautobegin(&branchnum);
+ getbranchno(num->string, &branchnum);
+ targetdelta=genrevs(branchnum.string,(char*)nil,
+ (char*)nil,(char*)nil,&gendeltas);
+ bufautoend(&branchnum);
+ if (!targetdelta)
+ return -1;
+ if (cmpnum(num->string,targetdelta->num) <= 0) {
+ error("deltanumber %s too low; must be higher than %s",
+ num->string,targetdelta->num);
+ return -1;
+ }
+ if (0 <= (removedlock = removelock(targetdelta))) {
+ if (numlength&1)
+ incnum(targetdelta->num,num);
+ targetdelta->next = &newdelta;
+ newdelta.next = 0;
+ }
+ return removedlock;
+ /* Don't do anything to newbranch. */
+ }
+ }
+ newbranch.hsh = &newdelta;
+ newdelta.next=nil;
+ return 0;
+}
+
+ static int
+addsyms(num)
+ char const *num;
+{
+ register struct Symrev *p;
+
+ for (p = assoclst; p; p = p->nextsym)
+ if (!addsymbol(num, p->ssymbol, p->override))
+ return false;
+ return true;
+}
+
+
+ static void
+incnum(onum,nnum)
+ char const *onum;
+ struct buf *nnum;
+/* Increment the last field of revision number onum by one and
+ * place the result into nnum.
+ */
+{
+ register char *tp, *np;
+ register size_t l;
+
+ l = strlen(onum);
+ bufalloc(nnum, l+2);
+ np = tp = nnum->string;
+ VOID strcpy(np, onum);
+ for (tp = np + l; np != tp; )
+ if (isdigit(*--tp)) {
+ if (*tp != '9') {
+ ++*tp;
+ return;
+ }
+ *tp = '0';
+ } else {
+ tp++;
+ break;
+ }
+ /* We changed 999 to 000; now change it to 1000. */
+ *tp = '1';
+ tp = np + l;
+ *tp++ = '0';
+ *tp = 0;
+}
+
+
+
+ static int
+removelock(delta)
+struct hshentry * delta;
+/* function: Finds the lock held by caller on delta,
+ * removes it, and returns nonzero if successful.
+ * Print an error message and return -1 if there is no such lock.
+ * An exception is if !StrictLocks, and caller is the owner of
+ * the RCS file. If caller does not have a lock in this case,
+ * return 0; return 1 if a lock is actually removed.
+ */
+{
+ register struct lock *next, **trail;
+ char const *num;
+
+ num=delta->num;
+ for (trail = &Locks; (next = *trail); trail = &next->nextlock)
+ if (next->delta == delta)
+ if (strcmp(getcaller(), next->login) == 0) {
+ /* We found a lock on delta by caller; delete it. */
+ *trail = next->nextlock;
+ delta->lockedby = 0;
+ return 1;
+ } else {
+ error("revision %s locked by %s",num,next->login);
+ return -1;
+ }
+ if (!StrictLocks && myself(RCSstat.st_uid))
+ return 0;
+ error("no lock set by %s for revision %s", getcaller(), num);
+ return -1;
+}
+
+
+
+ static char const *
+getcurdate()
+/* Return a pointer to the current date. */
+{
+ static char buffer[datesize]; /* date buffer */
+ time_t t;
+
+ if (!buffer[0]) {
+ t = time((time_t *)0);
+ if (t == -1)
+ faterror("time not available");
+ time2date(t, buffer);
+ }
+ return buffer;
+}
+
+ static int
+#if has_prototypes
+fixwork(mode_t newworkmode, char const *mtime)
+ /* The `#if has_prototypes' is needed because mode_t might promote to int. */
+#else
+ fixwork(newworkmode, mtime)
+ mode_t newworkmode;
+ char const *mtime;
+#endif
+{
+ int r;
+ return
+ 1 < workstat.st_nlink
+ || newworkmode&S_IWUSR && !myself(workstat.st_uid)
+ ? -1
+ :
+ workstat.st_mode != newworkmode
+ &&
+ (r =
+# if has_fchmod
+ fchmod(Ifileno(workptr), newworkmode)
+# else
+ chmod(workfilename, newworkmode)
+# endif
+ ) != 0
+ ? r
+ :
+ setfiledate(workfilename, mtime);
+}
+
+ static int
+xpandfile(unexfile, dir, delta, exfilename)
+ RILE *unexfile;
+ char const *dir;
+ struct hshentry const *delta;
+ char const **exfilename;
+/*
+ * Read unexfile and copy it to a
+ * file in dir, performing keyword substitution with data from delta.
+ * Return -1 if unsuccessful, 1 if expansion occurred, 0 otherwise.
+ * If successful, stores the stream descriptor into *EXFILEP
+ * and its name into *EXFILENAME.
+ */
+{
+ char const *targetfname;
+ int e, r;
+
+ targetfname = makedirtemp(dir, 1);
+ if (!(exfile = fopen(targetfname, FOPEN_W_WORK))) {
+ eerror(targetfname);
+ error("can't expand working file");
+ return -1;
+ }
+ r = 0;
+ if (Expand == OLD_EXPAND)
+ fastcopy(unexfile,exfile);
+ else {
+ for (;;) {
+ e = expandline(unexfile,exfile,delta,false,(FILE*)nil);
+ if (e < 0)
+ break;
+ r |= e;
+ if (e <= 1)
+ break;
+ }
+ }
+ *exfilename = targetfname;
+ aflush(exfile);
+ return r & 1;
+}
+
+
+
+
+/* --------------------- G E T L O G M S G --------------------------------*/
+
+
+ static struct cbuf
+getlogmsg()
+/* Obtain and yield a log message.
+ * If a log message is given with -m, yield that message.
+ * If this is the initial revision, yield a standard log message.
+ * Otherwise, reads a character string from the terminal.
+ * Stops after reading EOF or a single '.' on a
+ * line. getlogmsg prompts the first time it is called for the
+ * log message; during all later calls it asks whether the previous
+ * log message can be reused.
+ */
+{
+ static char const
+ emptych[] = EMPTYLOG,
+ initialch[] = "Initial revision";
+ static struct cbuf const
+ emptylog = { emptych, sizeof(emptych)-sizeof(char) },
+ initiallog = { initialch, sizeof(initialch)-sizeof(char) };
+ static struct buf logbuf;
+ static struct cbuf logmsg;
+
+ register char *tp;
+ register size_t i;
+ char const *caller;
+
+ if (msg.size) return msg;
+
+ if (keepflag) {
+ /* generate std. log message */
+ caller = getcaller();
+ i = sizeof(ciklog)+strlen(caller)+3;
+ bufalloc(&logbuf, i+datesize);
+ tp = logbuf.string;
+ VOID sprintf(tp, "%s%s at ", ciklog, caller);
+ VOID date2str(getcurdate(), tp+i);
+ logmsg.string = tp;
+ logmsg.size = strlen(tp);
+ return logmsg;
+ }
+
+ if (!targetdelta && (
+ cmpnum(newdelnum.string,"1.1")==0 ||
+ cmpnum(newdelnum.string,"1.0")==0
+ ))
+ return initiallog;
+
+ if (logmsg.size) {
+ /*previous log available*/
+ if (yesorno(true, "reuse log message of previous file? [yn](y): "))
+ return logmsg;
+ }
+
+ /* now read string from stdin */
+ logmsg = getsstdin("m", "log message", "", &logbuf);
+
+ /* now check whether the log message is not empty */
+ if (logmsg.size)
+ return logmsg;
+ return emptylog;
+}
+
+/* Make a linked list of Symbolic names */
+
+ static void
+addassoclst(flag, sp)
+int flag;
+char * sp;
+{
+ struct Symrev *pt;
+
+ pt = talloc(struct Symrev);
+ pt->ssymbol = sp;
+ pt->override = flag;
+ pt->nextsym = nil;
+ if (lastassoc)
+ lastassoc->nextsym = pt;
+ else
+ assoclst = pt;
+ lastassoc = pt;
+ return;
+}
diff --git a/gnu/usr.bin/rcs/co/Makefile b/gnu/usr.bin/rcs/co/Makefile
new file mode 100644
index 000000000000..e9de8da3c013
--- /dev/null
+++ b/gnu/usr.bin/rcs/co/Makefile
@@ -0,0 +1,7 @@
+PROG= co
+
+SRCS= co.c
+LDADD= -L${.CURDIR}/../lib/obj -lrcs
+CFLAGS+= -I${.CURDIR}/../lib
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/rcs/co/co.1 b/gnu/usr.bin/rcs/co/co.1
new file mode 100644
index 000000000000..d9ce65e3d17e
--- /dev/null
+++ b/gnu/usr.bin/rcs/co/co.1
@@ -0,0 +1,569 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: co.1,v 5.7 1991/08/19 03:13:55 eggert Exp $
+.ds g \&\s-1UTC\s0
+.ds r \&\s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.TH CO 1 \*(Dt GNU
+.SH NAME
+co \- check out RCS revisions
+.SH SYNOPSIS
+.B co
+.RI [ options ] " file " .\|.\|.
+.SH DESCRIPTION
+.B co
+retrieves a revision from each \*r file and stores it into
+the corresponding working file.
+.PP
+Pathnames matching an \*r suffix denote \*r files;
+all others denote working files.
+Names are paired as explained in
+.BR ci (1).
+.PP
+Revisions of an \*r file may be checked out locked or unlocked. Locking a
+revision prevents overlapping updates. A revision checked out for reading or
+processing (e.g., compiling) need not be locked. A revision checked out
+for editing and later checkin must normally be locked. Checkout with locking
+fails if the revision to be checked out is currently locked by another user.
+(A lock may be broken with
+.BR rcs "(1).)\ \&"
+Checkout with locking also requires the caller to be on the access list of
+the \*r file, unless he is the owner of the
+file or the superuser, or the access list is empty.
+Checkout without locking is not subject to accesslist restrictions, and is
+not affected by the presence of locks.
+.PP
+A revision is selected by options for revision or branch number,
+checkin date/time, author, or state.
+When the selection options
+are applied in combination,
+.B co
+retrieves the latest revision
+that satisfies all of them.
+If none of the selection options
+is specified,
+.B co
+retrieves the latest revision
+on the default branch (normally the trunk, see the
+.B \-b
+option of
+.BR rcs (1)).
+A revision or branch number may be attached
+to any of the options
+.BR \-f ,
+.BR \-I ,
+.BR \-l ,
+.BR \-M ,
+.BR \-p ,
+.BR \-q ,
+.BR \-r ,
+or
+.BR \-u .
+The options
+.B \-d
+(date),
+.B \-s
+(state), and
+.B \-w
+(author)
+retrieve from a single branch, the
+.I selected
+branch,
+which is either specified by one of
+.BR \-f,
+\&.\|.\|.,
+.BR \-u ,
+or the default branch.
+.PP
+A
+.B co
+command applied to an \*r
+file with no revisions creates a zero-length working file.
+.B co
+always performs keyword substitution (see below).
+.SH OPTIONS
+.TP
+.BR \-r [\f2rev\fP]
+retrieves the latest revision whose number is less than or equal to
+.I rev.
+If
+.I rev
+indicates a branch rather than a revision,
+the latest revision on that branch is retrieved.
+If
+.I rev
+is omitted, the latest revision on the default branch
+(see the
+.B \-b
+option of
+.BR rcs (1))
+is retrieved.
+If
+.I rev
+is
+.BR $ ,
+.B co
+determines the revision number from keyword values in the working file.
+Otherwise, a revision is composed of one or more numeric or symbolic fields
+separated by periods. The numeric equivalent of a symbolic field
+is specified with the
+.B \-n
+option of the commands
+.BR ci (1)
+and
+.BR rcs (1).
+.TP
+.BR \-l [\f2rev\fP]
+same as
+.BR \-r ,
+except that it also locks the retrieved revision for
+the caller.
+.TP
+.BR \-u [\f2rev\fP]
+same as
+.BR \-r ,
+except that it unlocks the retrieved revision if it was
+locked by the caller. If
+.I rev
+is omitted,
+.B \-u
+retrieves the revision locked by the caller, if there is one; otherwise,
+it retrieves the latest revision on the default branch.
+.TP
+.BR \-f [\f2rev\fP]
+forces the overwriting of the working file;
+useful in connection with
+.BR \-q .
+See also
+.SM "FILE MODES"
+below.
+.TP
+.B \-kkv
+Generate keyword strings using the default form, e.g.\&
+.B "$\&Revision: \*(Rv $"
+for the
+.B Revision
+keyword.
+A locker's name is inserted in the value of the
+.BR Header ,
+.BR Id ,
+and
+.B Locker
+keyword strings
+only as a file is being locked,
+i.e. by
+.B "ci\ \-l"
+and
+.BR "co\ \-l".
+This is the default.
+.TP
+.B \-kkvl
+Like
+.BR \-kkv ,
+except that a locker's name is always inserted
+if the given revision is currently locked.
+.TP
+.BR \-kk
+Generate only keyword names in keyword strings; omit their values.
+See
+.SM "KEYWORD SUBSTITUTION"
+below.
+For example, for the
+.B Revision
+keyword, generate the string
+.B $\&Revision$
+instead of
+.BR "$\&Revision: \*(Rv $".
+This option is useful to ignore differences due to keyword substitution
+when comparing different revisions of a file.
+.TP
+.BR \-ko
+Generate the old keyword string,
+present in the working file just before it was checked in.
+For example, for the
+.B Revision
+keyword, generate the string
+.B "$\&Revision: 1.1 $"
+instead of
+.B "$\&Revision: \*(Rv $"
+if that is how the string appeared when the file was checked in.
+This can be useful for binary file formats
+that cannot tolerate any changes to substrings
+that happen to take the form of keyword strings.
+.TP
+.BR \-kv
+Generate only keyword values for keyword strings.
+For example, for the
+.B Revision
+keyword, generate the string
+.B \*(Rv
+instead of
+.BR "$\&Revision: \*(Rv $".
+This can help generate files in programming languages where it is hard to
+strip keyword delimiters like
+.B "$\&Revision:\ $"
+from a string.
+However, further keyword substitution cannot be performed once the
+keyword names are removed, so this option should be used with care.
+Because of this danger of losing keywords,
+this option cannot be combined with
+.BR \-l ,
+and the owner write permission of the working file is turned off;
+to edit the file later, check it out again without
+.BR \-kv .
+.TP
+.BR \-p [\f2rev\fP]
+prints the retrieved revision on the standard output rather than storing it
+in the working file.
+This option is useful when
+.B co
+is part of a pipe.
+.TP
+.BR \-q [\f2rev\fP]
+quiet mode; diagnostics are not printed.
+.TP
+.BR \-I [\f2rev\fP]
+interactive mode;
+the user is prompted and questioned
+even if the standard input is not a terminal.
+.TP
+.BI \-d date
+retrieves the latest revision on the selected branch whose checkin date/time is
+less than or equal to
+.I date.
+The date and time may be given in free format.
+The time zone
+.B LT
+stands for local time;
+other common time zone names are understood.
+For example, the following
+.IR date s
+are equivalent
+if local time is January 11, 1990, 8pm Pacific Standard Time,
+eight hours west of Coordinated Universal Time (\*g):
+.RS
+.LP
+.RS
+.nf
+.ta \w'\f3Thu, 11 Jan 1990 20:00:00 \-0800\fP 'u
+.ne 9
+\f38:00 pm lt\fP
+\f34:00 AM, Jan. 12, 1990\fP note: default is \*g
+\f31990/01/12 04:00:00\fP \*r date format
+\f3Thu Jan 11 20:00:00 1990 LT\fP output of \f3ctime\fP(3) + \f3LT\fP
+\f3Thu Jan 11 20:00:00 PST 1990\fP output of \f3date\fP(1)
+\f3Fri Jan 12 04:00:00 GMT 1990\fP
+\f3Thu, 11 Jan 1990 20:00:00 \-0800\fP
+\f3Fri-JST, 1990, 1pm Jan 12\fP
+\f312-January-1990, 04:00-WET\fP
+.ta 4n +4n +4n +4n
+.fi
+.RE
+.LP
+Most fields in the date and time may be defaulted.
+The default time zone is \*g.
+The other defaults are determined in the order year, month, day,
+hour, minute, and second (most to least significant). At least one of these
+fields must be provided. For omitted fields that are of higher significance
+than the highest provided field, the time zone's current values are assumed.
+For all other omitted fields,
+the lowest possible values are assumed.
+For example, the date
+.B "20, 10:30"
+defaults to
+10:30:00 \*g of the 20th of the \*g time zone's current month and year.
+The date/time must be quoted if it contains spaces.
+.RE
+.TP
+.BR \-M [\f2rev\fP]
+Set the modification time on the new working file
+to be the date of the retrieved revision.
+Use this option with care; it can confuse
+.BR make (1).
+.TP
+.BI \-s state
+retrieves the latest revision on the selected branch whose state is set to
+.I state.
+.TP
+.BR \-w [\f2login\fP]
+retrieves the latest revision on the selected branch which was checked in
+by the user with login name
+.I login.
+If the argument
+.I login
+is
+omitted, the caller's login is assumed.
+.TP
+.BI \-j joinlist
+generates a new revision which is the join of the revisions on
+.I joinlist.
+This option is largely obsoleted by
+.BR rcsmerge (1)
+but is retained for backwards compatibility.
+.RS
+.PP
+The
+.I joinlist
+is a comma-separated list of pairs of the form
+.IB rev2 : rev3,
+where
+.I rev2
+and
+.I rev3
+are (symbolic or numeric)
+revision numbers.
+For the initial such pair,
+.I rev1
+denotes the revision selected
+by the above options
+.BR \-f,
+\&.\|.\|.,
+.BR \-w .
+For all other pairs,
+.I rev1
+denotes the revision generated by the previous pair.
+(Thus, the output
+of one join becomes the input to the next.)
+.PP
+For each pair,
+.B co
+joins revisions
+.I rev1
+and
+.I rev3
+with respect to
+.I rev2.
+This means that all changes that transform
+.I rev2
+into
+.I rev1
+are applied to a copy of
+.I rev3.
+This is particularly useful if
+.I rev1
+and
+.I rev3
+are the ends of two branches that have
+.I rev2
+as a common ancestor. If
+.IR rev1 < rev2 < rev3
+on the same branch,
+joining generates a new revision which is like
+.I rev3,
+but with all changes that lead from
+.I rev1
+to
+.I rev2
+undone.
+If changes from
+.I rev2
+to
+.I rev1
+overlap with changes from
+.I rev2
+to
+.I rev3,
+.B co
+reports overlaps as described in
+.BR merge (1).
+.PP
+For the initial pair,
+.I rev2
+may be omitted. The default is the common
+ancestor.
+If any of the arguments indicate branches, the latest revisions
+on those branches are assumed.
+The options
+.B \-l
+and
+.B \-u
+lock or unlock
+.I rev1.
+.RE
+.TP
+.BI \-V n
+Emulate \*r version
+.I n,
+where
+.I n
+may be
+.BR 3 ,
+.BR 4 ,
+or
+.BR 5 .
+This may be useful when interchanging \*r files with others who are
+running older versions of \*r.
+To see which version of \*r your correspondents are running, have them invoke
+.B rlog
+on an \*r file;
+if none of the first few lines of output contain the string
+.B branch:
+it is version 3;
+if the dates' years have just two digits, it is version 4;
+otherwise, it is version 5.
+An \*r file generated while emulating version 3 will lose its default branch.
+An \*r revision generated while emulating version 4 or earlier will have
+a timestamp that is off by up to 13 hours.
+A revision extracted while emulating version 4 or earlier will contain
+dates of the form
+.IB yy / mm / dd
+instead of
+.IB yyyy / mm / dd
+and may also contain different white space in the substitution for
+.BR $\&Log$ .
+.TP
+.BI \-x "suffixes"
+Use
+.I suffixes
+to characterize \*r files.
+See
+.BR ci (1)
+for details.
+.SH "KEYWORD SUBSTITUTION"
+Strings of the form
+.BI $ keyword $
+and
+.BI $ keyword : .\|.\|. $
+embedded in
+the text are replaced
+with strings of the form
+.BI $ keyword : value $
+where
+.I keyword
+and
+.I value
+are pairs listed below.
+Keywords may be embedded in literal strings
+or comments to identify a revision.
+.PP
+Initially, the user enters strings of the form
+.BI $ keyword $ .
+On checkout,
+.B co
+replaces these strings with strings of the form
+.BI $ keyword : value $ .
+If a revision containing strings of the latter form
+is checked back in, the value fields will be replaced during the next
+checkout.
+Thus, the keyword values are automatically updated on checkout.
+This automatic substitution can be modified by the
+.B \-k
+options.
+.PP
+Keywords and their corresponding values:
+.TP
+.B $\&Author$
+The login name of the user who checked in the revision.
+.TP
+.B $\&Date$
+The date and time (\*g) the revision was checked in.
+.TP
+.B $\&Header$
+A standard header containing the full pathname of the \*r file, the
+revision number, the date (\*g), the author, the state,
+and the locker (if locked).
+.TP
+.B $\&Id$
+Same as
+.BR $\&Header$ ,
+except that the \*r filename is without a path.
+.TP
+.B $\&Locker$
+The login name of the user who locked the revision (empty if not locked).
+.TP
+.B $\&Log$
+The log message supplied during checkin, preceded by a header
+containing the \*r filename, the revision number, the author, and the date
+(\*g).
+Existing log messages are
+.I not
+replaced.
+Instead, the new log message is inserted after
+.BR $\&Log: .\|.\|. $ .
+This is useful for
+accumulating a complete change log in a source file.
+.TP
+.B $\&RCSfile$
+The name of the \*r file without a path.
+.TP
+.B $\&Revision$
+The revision number assigned to the revision.
+.TP
+.B $\&Source$
+The full pathname of the \*r file.
+.TP
+.B $\&State$
+The state assigned to the revision with the
+.B \-s
+option of
+.BR rcs (1)
+or
+.BR ci (1).
+.SH "FILE MODES"
+The working file inherits the read and execute permissions from the \*r
+file. In addition, the owner write permission is turned on, unless
+.B \-kv
+is set or the file
+is checked out unlocked and locking is set to strict (see
+.BR rcs (1)).
+.PP
+If a file with the name of the working file exists already and has write
+permission,
+.B co
+aborts the checkout,
+asking beforehand if possible.
+If the existing working file is
+not writable or
+.B \-f
+is given, the working file is deleted without asking.
+.SH FILES
+.B co
+accesses files much as
+.BR ci (1)
+does, except that it does not need to read the working file.
+.SH ENVIRONMENT
+.TP
+.B \s-1RCSINIT\s0
+options prepended to the argument list, separated by spaces.
+See
+.BR ci (1)
+for details.
+.SH DIAGNOSTICS
+The \*r pathname, the working pathname,
+and the revision number retrieved are
+written to the diagnostic output.
+The exit status is zero if and only if all operations were successful.
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH "SEE ALSO"
+ci(1), ctime(3), date(1), ident(1), make(1),
+rcs(1), rcsdiff(1), rcsintro(1), rcsmerge(1), rlog(1),
+rcsfile(5)
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
+.SH LIMITS
+Links to the \*r and working files are not preserved.
+.PP
+There is no way to selectively suppress the expansion of keywords, except
+by writing them differently. In nroff and troff, this is done by embedding the
+null-character
+.B \e&
+into the keyword.
+.SH BUGS
+The
+.B \-d
+option sometimes gets confused, and accepts no date before 1970.
+.br
diff --git a/gnu/usr.bin/rcs/co/co.c b/gnu/usr.bin/rcs/co/co.c
new file mode 100644
index 000000000000..9435574e7d05
--- /dev/null
+++ b/gnu/usr.bin/rcs/co/co.c
@@ -0,0 +1,769 @@
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+/*
+ * RCS checkout operation
+ */
+/*****************************************************************************
+ * check out revisions from RCS files
+ *****************************************************************************
+ */
+
+
+/* $Log: co.c,v $
+ * Revision 5.9 1991/10/07 17:32:46 eggert
+ * ci -u src/RCS/co.c,v src/co.c <<\.
+ * -k affects just working file, not RCS file.
+ *
+ * Revision 5.8 1991/08/19 03:13:55 eggert
+ * Warn before removing somebody else's file.
+ * Add -M. Fix co -j bugs. Tune.
+ *
+ * Revision 5.7 1991/04/21 11:58:15 eggert
+ * Ensure that working file is newer than RCS file after co -[lu].
+ * Add -x, RCSINIT, MS-DOS support.
+ *
+ * Revision 5.6 1990/12/04 05:18:38 eggert
+ * Don't checkaccesslist() unless necessary.
+ * Use -I for prompts and -q for diagnostics.
+ *
+ * Revision 5.5 1990/11/01 05:03:26 eggert
+ * Fix -j. Add -I.
+ *
+ * Revision 5.4 1990/10/04 06:30:11 eggert
+ * Accumulate exit status across files.
+ *
+ * Revision 5.3 1990/09/11 02:41:09 eggert
+ * co -kv yields a readonly working file.
+ *
+ * Revision 5.2 1990/09/04 08:02:13 eggert
+ * Standardize yes-or-no procedure.
+ *
+ * Revision 5.0 1990/08/22 08:10:02 eggert
+ * Permit multiple locks by same user. Add setuid support.
+ * Remove compile-time limits; use malloc instead.
+ * Permit dates past 1999/12/31. Switch to GMT.
+ * Make lock and temp files faster and safer.
+ * Ansify and Posixate. Add -k, -V. Remove snooping. Tune.
+ *
+ * Revision 4.7 89/05/01 15:11:41 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.6 88/08/09 19:12:15 eggert
+ * Fix "co -d" core dump; rawdate wasn't always initialized.
+ * Use execv(), not system(); fix putchar('\0') and diagnose() botches; remove lint
+ *
+ * Revision 4.5 87/12/18 11:35:40 narten
+ * lint cleanups (from Guy Harris)
+ *
+ * Revision 4.4 87/10/18 10:20:53 narten
+ * Updating version numbers changes relative to 1.1, are actually
+ * relative to 4.2
+ *
+ * Revision 1.3 87/09/24 13:58:30 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:21:38 jenkins
+ * Port to suns
+ *
+ * Revision 4.2 83/12/05 13:39:48 wft
+ * made rewriteflag external.
+ *
+ * Revision 4.1 83/05/10 16:52:55 wft
+ * Added option -u and -f.
+ * Added handling of default branch.
+ * Replaced getpwuid() with getcaller().
+ * Removed calls to stat(); now done by pairfilenames().
+ * Changed and renamed rmoldfile() to rmworkfile().
+ * Replaced catchints() calls with restoreints(), unlink()--link() with rename();
+ *
+ * Revision 3.7 83/02/15 15:27:07 wft
+ * Added call to fastcopy() to copy remainder of RCS file.
+ *
+ * Revision 3.6 83/01/15 14:37:50 wft
+ * Added ignoring of interrupts while RCS file is renamed; this avoids
+ * deletion of RCS files during the unlink/link window.
+ *
+ * Revision 3.5 82/12/08 21:40:11 wft
+ * changed processing of -d to use DATEFORM; removed actual from
+ * call to preparejoin; re-fixed printing of done at the end.
+ *
+ * Revision 3.4 82/12/04 18:40:00 wft
+ * Replaced getdelta() with gettree(), SNOOPDIR with SNOOPFILE.
+ * Fixed printing of "done".
+ *
+ * Revision 3.3 82/11/28 22:23:11 wft
+ * Replaced getlogin() with getpwuid(), flcose() with ffclose(),
+ * %02d with %.2d, mode generation for working file with WORKMODE.
+ * Fixed nil printing. Fixed -j combined with -l and -p, and exit
+ * for non-existing revisions in preparejoin().
+ *
+ * Revision 3.2 82/10/18 20:47:21 wft
+ * Mode of working file is now maintained even for co -l, but write permission
+ * is removed.
+ * The working file inherits its mode from the RCS file, plus write permission
+ * for the owner. The write permission is not given if locking is strict and
+ * co does not lock.
+ * An existing working file without write permission is deleted automatically.
+ * Otherwise, co asks (empty answer: abort co).
+ * Call to getfullRCSname() added, check for write error added, call
+ * for getlogin() fixed.
+ *
+ * Revision 3.1 82/10/13 16:01:30 wft
+ * fixed type of variables receiving from getc() (char -> int).
+ * removed unused variables.
+ */
+
+
+
+
+#include "rcsbase.h"
+
+static char const *getancestor P((char const*,char const*));
+static int buildjoin P((char const*));
+static int preparejoin P((void));
+static int rmlock P((struct hshentry const*));
+static int rmworkfile P((void));
+static void cleanup P((void));
+
+static char const quietarg[] = "-q";
+
+static char const *expandarg, *join, *suffixarg, *versionarg;
+static char const *joinlist[joinlength]; /* revisions to be joined */
+static FILE *neworkptr;
+static int exitstatus;
+static int forceflag;
+static int lastjoin; /* index of last element in joinlist */
+static int lockflag; /* -1 -> unlock, 0 -> do nothing, 1 -> lock */
+static int mtimeflag;
+static struct hshentries *gendeltas; /* deltas to be generated */
+static struct hshentry *targetdelta; /* final delta to be generated */
+static struct stat workstat;
+
+mainProg(coId, "co", "$Id: co.c,v 5.9 1991/10/07 17:32:46 eggert Exp $")
+{
+ static char const cmdusage[] =
+ "\nco usage: co -{flpqru}[rev] -ddate -jjoinlist -sstate -w[login] -Vn file ...";
+
+ char *a, **newargv;
+ char const *author, *date, *rev, *state;
+ char const *joinfilename, *newdate, *neworkfilename;
+ int changelock; /* 1 if a lock has been changed, -1 if error */
+ int expmode, r, tostdout, workstatstat;
+ struct buf numericrev; /* expanded revision number */
+ char finaldate[datesize];
+
+ setrid();
+ author = date = rev = state = nil;
+ bufautobegin(&numericrev);
+ expmode = -1;
+ suffixes = X_DEFAULT;
+ tostdout = false;
+
+ argc = getRCSINIT(argc, argv, &newargv);
+ argv = newargv;
+ while (a = *++argv, 0<--argc && *a++=='-') {
+ switch (*a++) {
+
+ case 'r':
+ revno:
+ if (*a) {
+ if (rev) warn("redefinition of revision number");
+ rev = a;
+ }
+ break;
+
+ case 'f':
+ forceflag=true;
+ goto revno;
+
+ case 'l':
+ if (lockflag < 0) {
+ warn("-l overrides -u.");
+ }
+ lockflag = 1;
+ goto revno;
+
+ case 'u':
+ if (0 < lockflag) {
+ warn("-l overrides -u.");
+ }
+ lockflag = -1;
+ goto revno;
+
+ case 'p':
+ tostdout = true;
+ goto revno;
+
+ case 'I':
+ interactiveflag = true;
+ goto revno;
+
+ case 'q':
+ quietflag=true;
+ goto revno;
+
+ case 'd':
+ if (date)
+ redefined('d');
+ str2date(a, finaldate);
+ date=finaldate;
+ break;
+
+ case 'j':
+ if (*a) {
+ if (join) redefined('j');
+ join = a;
+ }
+ break;
+
+ case 'M':
+ mtimeflag = true;
+ goto revno;
+
+ case 's':
+ if (*a) {
+ if (state) redefined('s');
+ state = a;
+ }
+ break;
+
+ case 'w':
+ if (author) redefined('w');
+ if (*a)
+ author = a;
+ else
+ author = getcaller();
+ break;
+
+ case 'x':
+ suffixarg = *argv;
+ suffixes = a;
+ break;
+
+ case 'V':
+ versionarg = *argv;
+ setRCSversion(versionarg);
+ break;
+
+ case 'k': /* set keyword expand mode */
+ expandarg = *argv;
+ if (0 <= expmode) redefined('k');
+ if (0 <= (expmode = str2expmode(a)))
+ break;
+ /* fall into */
+ default:
+ faterror("unknown option: %s%s", *argv, cmdusage);
+
+ };
+ } /* end of option processing */
+
+ if (argc<1) faterror("no input file%s", cmdusage);
+ if (tostdout)
+# if text_equals_binary_stdio || text_work_stdio
+ workstdout = stdout;
+# else
+ if (!(workstdout = fdopen(STDOUT_FILENO, FOPEN_W_WORK)))
+ efaterror("stdout");
+# endif
+
+ /* now handle all filenames */
+ do {
+ ffree();
+
+ if (pairfilenames(argc, argv, lockflag?rcswriteopen:rcsreadopen, true, false) <= 0)
+ continue;
+
+ /* now RCSfilename contains the name of the RCS file, and finptr
+ * points at it. workfilename contains the name of the working file.
+ * Also, RCSstat has been set.
+ */
+ diagnose("%s --> %s\n", RCSfilename,tostdout?"stdout":workfilename);
+
+ workstatstat = -1;
+ if (tostdout) {
+ neworkfilename = 0;
+ neworkptr = workstdout;
+ } else {
+ workstatstat = stat(workfilename, &workstat);
+ neworkfilename = makedirtemp(workfilename, 1);
+ if (!(neworkptr = fopen(neworkfilename, FOPEN_W_WORK))) {
+ if (errno == EACCES)
+ error("%s: parent directory isn't writable",
+ workfilename
+ );
+ else
+ eerror(neworkfilename);
+ continue;
+ }
+ }
+
+ gettree(); /* reads in the delta tree */
+
+ if (Head==nil) {
+ /* no revisions; create empty file */
+ diagnose("no revisions present; generating empty revision 0.0\n");
+ Ozclose(&fcopy);
+ if (workstatstat == 0)
+ if (!rmworkfile()) continue;
+ changelock = 0;
+ newdate = 0;
+ /* Can't reserve a delta, so don't call addlock */
+ } else {
+ if (rev!=nil) {
+ /* expand symbolic revision number */
+ if (!expandsym(rev, &numericrev))
+ continue;
+ } else
+ switch (lockflag<0 ? findlock(false,&targetdelta) : 0) {
+ default:
+ continue;
+ case 0:
+ bufscpy(&numericrev, Dbranch?Dbranch:"");
+ break;
+ case 1:
+ bufscpy(&numericrev, targetdelta->num);
+ break;
+ }
+ /* get numbers of deltas to be generated */
+ if (!(targetdelta=genrevs(numericrev.string,date,author,state,&gendeltas)))
+ continue;
+ /* check reservations */
+ changelock =
+ lockflag < 0 ?
+ rmlock(targetdelta)
+ : lockflag == 0 ?
+ 0
+ :
+ addlock(targetdelta);
+
+ if (
+ changelock < 0 ||
+ changelock && !checkaccesslist() ||
+ !dorewrite(lockflag, changelock)
+ )
+ continue;
+
+ if (0 <= expmode)
+ Expand = expmode;
+ if (0 < lockflag && Expand == VAL_EXPAND) {
+ error("cannot combine -kv and -l");
+ continue;
+ }
+
+ if (join && !preparejoin()) continue;
+
+ diagnose("revision %s%s\n",targetdelta->num,
+ 0<lockflag ? " (locked)" :
+ lockflag<0 ? " (unlocked)" : "");
+
+ /* Prepare to remove old working file if necessary. */
+ if (workstatstat == 0)
+ if (!rmworkfile()) continue;
+
+ /* skip description */
+ getdesc(false); /* don't echo*/
+
+ locker_expansion = 0 < lockflag;
+ joinfilename = buildrevision(
+ gendeltas, targetdelta,
+ join&&tostdout ? (FILE*)0 : neworkptr,
+ Expand!=OLD_EXPAND
+ );
+# if !large_memory
+ if (fcopy == neworkptr)
+ fcopy = 0; /* Don't close it twice. */
+# endif
+ if_advise_access(changelock && gendeltas->first!=targetdelta,
+ finptr, MADV_SEQUENTIAL
+ );
+
+ if (!donerewrite(changelock))
+ continue;
+
+ newdate = targetdelta->date;
+ if (join) {
+ newdate = 0;
+ if (!joinfilename) {
+ aflush(neworkptr);
+ joinfilename = neworkfilename;
+ }
+ if (!buildjoin(joinfilename))
+ continue;
+ }
+ }
+ if (!tostdout) {
+ r = 0;
+ if (mtimeflag && newdate) {
+ if (!join)
+ aflush(neworkptr);
+ r = setfiledate(neworkfilename, newdate);
+ }
+ if (r == 0) {
+ ignoreints();
+ r = chnamemod(&neworkptr, neworkfilename, workfilename,
+ WORKMODE(RCSstat.st_mode,
+ !(Expand==VAL_EXPAND || lockflag<=0&&StrictLocks)
+ )
+ );
+ keepdirtemp(neworkfilename);
+ restoreints();
+ }
+ if (r != 0) {
+ eerror(workfilename);
+ error("see %s", neworkfilename);
+ continue;
+ }
+ diagnose("done\n");
+ }
+ } while (cleanup(),
+ ++argv, --argc >=1);
+
+ tempunlink();
+ Ofclose(workstdout);
+ exitmain(exitstatus);
+
+} /* end of main (co) */
+
+ static void
+cleanup()
+{
+ if (nerror) exitstatus = EXIT_FAILURE;
+ Izclose(&finptr);
+ Ozclose(&frewrite);
+# if !large_memory
+ if (fcopy!=workstdout) Ozclose(&fcopy);
+# endif
+ if (neworkptr!=workstdout) Ozclose(&neworkptr);
+ dirtempunlink();
+}
+
+#if lint
+# define exiterr coExit
+#endif
+ exiting void
+exiterr()
+{
+ dirtempunlink();
+ tempunlink();
+ _exit(EXIT_FAILURE);
+}
+
+
+/*****************************************************************
+ * The following routines are auxiliary routines
+ *****************************************************************/
+
+ static int
+rmworkfile()
+/* Function: prepares to remove workfilename, if it exists, and if
+ * it is read-only.
+ * Otherwise (file writable):
+ * if !quietmode asks the user whether to really delete it (default: fail);
+ * otherwise failure.
+ * Returns true if permission is gotten.
+ */
+{
+ if (workstat.st_mode&(S_IWUSR|S_IWGRP|S_IWOTH) && !forceflag) {
+ /* File is writable */
+ if (!yesorno(false, "writable %s exists%s; remove it? [ny](n): ",
+ workfilename,
+ myself(workstat.st_uid) ? "" : ", and you do not own it"
+ )) {
+ error(!quietflag && ttystdin()
+ ? "checkout aborted"
+ : "writable %s exists; checkout aborted", workfilename);
+ return false;
+ }
+ }
+ /* Actual unlink is done later by caller. */
+ return true;
+}
+
+
+ static int
+rmlock(delta)
+ struct hshentry const *delta;
+/* Function: removes the lock held by caller on delta.
+ * Returns -1 if someone else holds the lock,
+ * 0 if there is no lock on delta,
+ * and 1 if a lock was found and removed.
+ */
+{ register struct lock * next, * trail;
+ char const *num;
+ struct lock dummy;
+ int whomatch, nummatch;
+
+ num=delta->num;
+ dummy.nextlock=next=Locks;
+ trail = &dummy;
+ while (next!=nil) {
+ whomatch = strcmp(getcaller(), next->login);
+ nummatch=strcmp(num,next->delta->num);
+ if ((whomatch==0) && (nummatch==0)) break;
+ /*found a lock on delta by caller*/
+ if ((whomatch!=0)&&(nummatch==0)) {
+ error("revision %s locked by %s; use co -r or rcs -u",num,next->login);
+ return -1;
+ }
+ trail=next;
+ next=next->nextlock;
+ }
+ if (next!=nil) {
+ /*found one; delete it */
+ trail->nextlock=next->nextlock;
+ Locks=dummy.nextlock;
+ next->delta->lockedby=nil; /* reset locked-by */
+ return 1; /*success*/
+ } else return 0; /*no lock on delta*/
+}
+
+
+
+
+/*****************************************************************
+ * The rest of the routines are for handling joins
+ *****************************************************************/
+
+
+ static char const *
+addjoin(joinrev)
+ char *joinrev;
+/* Add joinrev's number to joinlist, yielding address of char past joinrev,
+ * or nil if no such revision exists.
+ */
+{
+ register char *j;
+ register struct hshentry const *d;
+ char terminator;
+ struct buf numrev;
+ struct hshentries *joindeltas;
+
+ j = joinrev;
+ for (;;) {
+ switch (*j++) {
+ default:
+ continue;
+ case 0:
+ case ' ': case '\t': case '\n':
+ case ':': case ',': case ';':
+ break;
+ }
+ break;
+ }
+ terminator = *--j;
+ *j = 0;
+ bufautobegin(&numrev);
+ d = 0;
+ if (expandsym(joinrev, &numrev))
+ d = genrevs(numrev.string,(char*)nil,(char*)nil,(char*)nil,&joindeltas);
+ bufautoend(&numrev);
+ *j = terminator;
+ if (d) {
+ joinlist[++lastjoin] = d->num;
+ return j;
+ }
+ return nil;
+}
+
+ static int
+preparejoin()
+/* Function: Parses a join list pointed to by join and places pointers to the
+ * revision numbers into joinlist.
+ */
+{
+ register char const *j;
+
+ j=join;
+ lastjoin= -1;
+ for (;;) {
+ while ((*j==' ')||(*j=='\t')||(*j==',')) j++;
+ if (*j=='\0') break;
+ if (lastjoin>=joinlength-2) {
+ error("too many joins");
+ return(false);
+ }
+ if (!(j = addjoin(j))) return false;
+ while ((*j==' ') || (*j=='\t')) j++;
+ if (*j == ':') {
+ j++;
+ while((*j==' ') || (*j=='\t')) j++;
+ if (*j!='\0') {
+ if (!(j = addjoin(j))) return false;
+ } else {
+ error("join pair incomplete");
+ return false;
+ }
+ } else {
+ if (lastjoin==0) { /* first pair */
+ /* common ancestor missing */
+ joinlist[1]=joinlist[0];
+ lastjoin=1;
+ /*derive common ancestor*/
+ if (!(joinlist[0] = getancestor(targetdelta->num,joinlist[1])))
+ return false;
+ } else {
+ error("join pair incomplete");
+ return false;
+ }
+ }
+ }
+ if (lastjoin<1) {
+ error("empty join");
+ return false;
+ } else return true;
+}
+
+
+
+ static char const *
+getancestor(r1, r2)
+ char const *r1, *r2;
+/* Yield the common ancestor of r1 and r2 if successful, nil otherwise.
+ * Work reliably only if r1 and r2 are not branch numbers.
+ */
+{
+ static struct buf t1, t2;
+
+ unsigned l1, l2, l3;
+ char const *r;
+
+ l1 = countnumflds(r1);
+ l2 = countnumflds(r2);
+ if ((2<l1 || 2<l2) && cmpnum(r1,r2)!=0) {
+ /* not on main trunk or identical */
+ l3 = 0;
+ while (cmpnumfld(r1, r2, l3+1)==0 && cmpnumfld(r1, r2, l3+2)==0)
+ l3 += 2;
+ /* This will terminate since r1 and r2 are not the same; see above. */
+ if (l3==0) {
+ /* no common prefix; common ancestor on main trunk */
+ VOID partialno(&t1, r1, l1>2 ? (unsigned)2 : l1);
+ VOID partialno(&t2, r2, l2>2 ? (unsigned)2 : l2);
+ r = cmpnum(t1.string,t2.string)<0 ? t1.string : t2.string;
+ if (cmpnum(r,r1)!=0 && cmpnum(r,r2)!=0)
+ return r;
+ } else if (cmpnumfld(r1, r2, l3+1)!=0)
+ return partialno(&t1,r1,l3);
+ }
+ error("common ancestor of %s and %s undefined", r1, r2);
+ return nil;
+}
+
+
+
+ static int
+buildjoin(initialfile)
+ char const *initialfile;
+/* Function: merge pairs of elements in joinlist into initialfile
+ * If workstdout is set, copy result to stdout.
+ * All unlinking of initialfile, rev2, and rev3 should be done by tempunlink().
+ */
+{
+ struct buf commarg;
+ struct buf subs;
+ char const *rev2, *rev3;
+ int i;
+ char const *cov[10], *mergev[12];
+ char const **p;
+
+ bufautobegin(&commarg);
+ bufautobegin(&subs);
+ rev2 = maketemp(0);
+ rev3 = maketemp(3); /* buildrevision() may use 1 and 2 */
+
+ cov[0] = nil;
+ /* cov[1] setup below */
+ cov[2] = CO;
+ /* cov[3] setup below */
+ p = &cov[4];
+ if (expandarg) *p++ = expandarg;
+ if (suffixarg) *p++ = suffixarg;
+ if (versionarg) *p++ = versionarg;
+ *p++ = quietarg;
+ *p++ = RCSfilename;
+ *p = nil;
+
+ mergev[0] = nil;
+ mergev[1] = nil;
+ mergev[2] = MERGE;
+ mergev[3] = mergev[5] = "-L";
+ /* rest of mergev setup below */
+
+ i=0;
+ while (i<lastjoin) {
+ /*prepare marker for merge*/
+ if (i==0)
+ bufscpy(&subs, targetdelta->num);
+ else {
+ bufscat(&subs, ",");
+ bufscat(&subs, joinlist[i-2]);
+ bufscat(&subs, ":");
+ bufscat(&subs, joinlist[i-1]);
+ }
+ diagnose("revision %s\n",joinlist[i]);
+ bufscpy(&commarg, "-p");
+ bufscat(&commarg, joinlist[i]);
+ cov[1] = rev2;
+ cov[3] = commarg.string;
+ if (runv(cov))
+ goto badmerge;
+ diagnose("revision %s\n",joinlist[i+1]);
+ bufscpy(&commarg, "-p");
+ bufscat(&commarg, joinlist[i+1]);
+ cov[1] = rev3;
+ cov[3] = commarg.string;
+ if (runv(cov))
+ goto badmerge;
+ diagnose("merging...\n");
+ mergev[4] = subs.string;
+ mergev[6] = joinlist[i+1];
+ p = &mergev[7];
+ if (quietflag) *p++ = quietarg;
+ if (lastjoin<=i+2 && workstdout) *p++ = "-p";
+ *p++ = initialfile;
+ *p++ = rev2;
+ *p++ = rev3;
+ *p = nil;
+ switch (runv(mergev)) {
+ case DIFF_FAILURE: case DIFF_SUCCESS:
+ break;
+ default:
+ goto badmerge;
+ }
+ i=i+2;
+ }
+ bufautoend(&commarg);
+ bufautoend(&subs);
+ return true;
+
+ badmerge:
+ nerror++;
+ bufautoend(&commarg);
+ bufautoend(&subs);
+ return false;
+}
diff --git a/gnu/usr.bin/rcs/doc/rcs.ms b/gnu/usr.bin/rcs/doc/rcs.ms
new file mode 100644
index 000000000000..7b3f80772699
--- /dev/null
+++ b/gnu/usr.bin/rcs/doc/rcs.ms
@@ -0,0 +1,1524 @@
+.\" Format this file with:
+.\" pic file | tbl | troff -ms
+.\"
+.\" \*s stands for $, and avoids problems when this file is checked in.
+.ds s $
+.\" PS and PE center pic diagrams. (The corresponding ms-macros may not.)
+.de PS
+.nr pE (\\n(.lu-\\$2u)/2u
+.in +\\n(pEu
+.ne \\$1u
+..
+.de PE
+.in -\\n(pEu
+..
+.de D(
+.DS
+.nr VS 12p
+.vs 12p
+.I
+..
+.de D)
+.DE
+.nr VS 18p
+.vs 18p
+.R
+..
+.de Id
+.ND \\$4
+..
+.Id $Id: rcs.ms,v 5.2 1991/01/03 10:57:28 eggert Exp $
+.RP
+.TL
+RCS\*-A System for Version Control
+.sp
+.AU
+Walter F. Tichy
+.AI
+Department of Computer Sciences
+Purdue University
+West Lafayette, Indiana 47907
+.sp
+.AB
+An important problem in program development and maintenance is version control,
+i.e., the task of keeping a software system consisting of many versions and
+configurations well organized.
+The Revision Control System (RCS)
+is a software tool that assists with that task.
+RCS manages revisions of text documents, in particular source programs,
+documentation, and test data.
+It automates the storing, retrieval, logging and identification of revisions,
+and it provides selection mechanisms for composing configurations.
+This paper introduces basic version control concepts and
+discusses the practice of version control
+using RCS.
+For conserving space, RCS stores deltas, i.e., differences between
+successive revisions. Several delta storage methods are discussed.
+Usage statistics show that RCS's delta storage method is
+space and time efficient.
+The paper concludes with a detailed survey of version control tools.
+.sp
+\fBKeywords\fR: configuration management, history management,
+version control, revisions, deltas.
+.AE
+.FS
+An earlier version of this paper was published in
+.I "Software\*-Practice & Experience"
+.B 15 ,
+7 (July 1985), 637-654.
+.FE
+.nr VS 18p
+.LP
+.NH
+Introduction
+.PP
+Version control is the task of keeping software
+systems consisting of many versions and configurations well organized.
+The Revision Control System (RCS) is a set of UNIX
+commands that assist with that task.
+.PP
+RCS' primary function is to manage \fIrevision groups\fR.
+A revision group is a set of text documents, called \fIrevisions\fR,
+that evolved from each other. A new revision is
+created by manually editing an existing one.
+RCS organizes the revisions into an ancestral tree. The initial revision
+is the root of the tree, and the tree edges indicate
+from which revision a given one evolved.
+Besides managing individual revision groups, RCS provides
+flexible selection functions for composing configurations.
+RCS may be combined with MAKE\u1\d,
+resulting in a powerful package for version control.
+.PP
+RCS also offers facilities for
+merging updates with customer modifications,
+for distributed software development, and
+for automatic identification.
+Identification is the `stamping'
+of revisions and configurations with unique markers.
+These markers are akin to serial numbers,
+telling software maintainers unambiguously which configuration
+is before them.
+.PP
+RCS is designed for both production and experimental
+environments.
+In production environments,
+access controls detect update conflicts and prevent overlapping changes.
+In experimental environments, where strong controls are
+counterproductive, it is possible to loosen the controls.
+.PP
+Although RCS was originally intended for programs, it is useful for any
+text that is revised frequently and whose previous revisions must be
+preserved. RCS has been applied successfully to store the source
+text for drawings, VLSI layouts, documentation, specifications,
+test data, form letters and articles.
+.PP
+This paper discusses the practice of
+version control using RCS.
+It also introduces basic version control concepts,
+useful for clarifying current practice and designing similar systems.
+Revision groups of individual components are treated in the next three sections,
+and the extensions to configurations follow.
+Because of its size, a survey of version control tools
+appears at the end of the paper.
+.NH
+Getting started with RCS
+.PP
+Suppose a text file \fIf.c\fR is to be placed under control of RCS.
+Invoking the check-in command
+.D(
+ci f.c
+.D)
+creates a new revision group with the contents of
+\fIf.c\fR as the initial
+revision (numbered 1.1)
+and stores the group into the file \fIf.c,v\fR.
+Unless told otherwise, the command deletes \fIf.c\fR.
+It also asks for a description of the group.
+The description should state the common purpose of all revisions in the group,
+and becomes part of the group's documentation.
+All later check-in commands will ask for a log entry,
+which should summarize the changes made.
+(The first revision is assigned a default log message,
+which just records the fact that it is the initial revision.)
+.PP
+Files ending in \fI,v\fR
+are called \fIRCS files\fR (\fIv\fR stands for \fIv\fRersions);
+the others are called working files.
+To get back the working file \fIf.c\fR in the previous example,
+execute the check-out command:
+.D(
+co f.c
+.D)
+.R
+This command extracts the latest revision from
+the revision group \fIf.c,v\fR and writes
+it into \fIf.c\fR.
+The file \fIf.c\fR can now be edited and, when finished,
+checked back in with \fIci\fR:
+.D(
+ci f.c
+.D)
+\fICi\fR assigns number 1.2 to
+the new revision.
+If \fIci\fR complains with the message
+.D(
+ci error: no lock set by <login>
+.D)
+then the system administrator has decided to configure RCS for a
+production environment by enabling the `strict locking feature'.
+If this feature is enabled, all RCS files are initialized
+such that check-in operations require a lock on the previous revision
+(the one from which the current one evolved).
+Locking prevents overlapping modifications if several people work on the same file.
+If locking is required, the revision should
+have been locked during the check-out by using
+the option \fI\-l\fR:
+.D(
+co \-l f.c
+.D)
+Of course it is too late now for the check-out with locking, because
+\fIf.c\fR has already been changed; checking out the file again
+would overwrite the modifications.
+(To prevent accidental overwrites, \fIco\fR senses the presence
+of a working file and asks whether the user really intended to overwrite it.
+The overwriting check-out is sometimes useful for
+backing up to the previous revision.)
+To be able to proceed with the check-in in the present case, first execute
+.D(
+rcs \-l f.c
+.D)
+This command retroactively locks the latest revision, unless someone
+else locked it in the meantime. In this case, the two programmers
+involved have to negotiate whose
+modifications should take precedence.
+.PP
+If an RCS file is private, i.e., if only the owner of the file is expected
+to deposit revisions into it, the strict locking feature is unnecessary and
+may be disabled.
+If strict locking is disabled,
+the owner of the RCS file need not have a lock for check-in.
+For safety reasons, all others
+still do. Turning strict locking off and on is done with the commands:
+.D(
+rcs \-U f.c \fRand\fP rcs \-L f.c
+.D)
+These commands enable or disable the strict locking feature for each RCS file
+individually.
+The system administrator only decides whether strict locking is
+enabled initially.
+.PP
+To reduce the clutter in a working directory, all RCS files can be moved
+to a subdirectory with the name \fIRCS\fR.
+RCS commands look first into that directory for RCS files.
+All the commands presented above work
+with the \fIRCS\fR subdirectory without change.\(dg
+.FS \(dg
+Pairs of RCS and working files can actually be specified in 3 ways:
+a) both are given, b) only the working file is given, c) only the
+RCS file is given.
+If a pair is given, both files may have arbitrary path prefixes;
+RCS commands pair them up intelligently.
+.FE
+.PP
+It may be undesirable that \fIci\fR deletes the working file.
+For instance, sometimes one would like to save the current revision,
+but continue editing.
+Invoking
+.D(
+ci \-l f.c
+.D)
+checks in \fIf.c\fR as usual, but performs an additional
+check-out with locking afterwards. Thus, the working file does
+not disappear after the check-in.
+Similarly, the option
+\fI\-u\fR does a check-in followed by a check-out without
+locking. This option is useful if the file is needed for compilation after the check-in.
+Both options update the identification markers in the working file
+(see below).
+.PP
+Besides the operations \fIci\fR and \fIco\fR, RCS provides the following
+commands:
+.sp 0
+.nr VS 12p
+.vs 12p
+.TS
+tab(%);
+li l.
+ident%extract identification markers
+rcs%change RCS file attributes
+rcsclean%remove unchanged working files (optional)
+rcsdiff%compare revisions
+rcsfreeze%record a configuration (optional)
+rcsmerge%merge revisions
+rlog%read log messages and other information in RCS files
+.TE
+A synopsis of these commands appears in the Appendix.
+.NH 2
+Automatic Identification
+.PP
+RCS can stamp source and object code with special identification strings,
+similar to product and serial numbers.
+To obtain such identification, place the marker
+.D(
+\*sId\*s
+.D)
+into the text of a revision, for instance inside a comment.
+The check-out operation will replace this marker with a string of the form
+.D(
+\*sId: filename revisionnumber date time author state locker \*s
+.D)
+This string need never be touched, because \fIco\fR keeps it
+up to date automatically.
+To propagate the marker into object code, simply put
+it into a literal character string. In C, this is done as follows:
+.D(
+static char rcsid[] = \&"\*sId\*s\&";
+.D)
+The command \fIident\fR extracts such markers from any file, in particular from
+object code.
+\fIIdent\fR helps to find out
+which revisions of which modules were used in a given program.
+It returns a complete and unambiguous component list,
+from which a copy of the program can be reconstructed.
+This facility is invaluable for program maintenance.
+.PP
+There are several additional identification markers, one for each component
+of \*sId\*s.
+The marker
+.D(
+\*sLog\*s
+.D)
+has a similar function. It accumulates
+the log messages that are requested during check-in.
+Thus, one can maintain the complete history of a revision directly inside it,
+by enclosing it in a comment.
+Figure 1 is a partial reproduction of a log contained in revision 4.1 of
+the file \fIci.c\fR. The log appears at the beginning of the file,
+and makes it easy to determine what the recent modifications were.
+.sp
+.nr VS 12p
+.vs 12p
+.ne 18
+.nf
+.in +0.5i
+/* \*sLog: ci.c,v \*s
+ * Revision 4.1 1983/05/10 17:03:06 wft
+ * Added option \-d and \-w, and updated assignment of date, etc. to new delta.
+ * Added handling of default branches.
+ *
+ * Revision 3.9 1983/02/15 15:25:44 wft
+ * Added call to fastcopy() to copy remainder of RCS file.
+ *
+ * Revision 3.8 1983/01/14 15:34:05 wft
+ * Added ignoring of interrupts while new RCS file is renamed;
+ * avoids deletion of RCS files by interrupts.
+ *
+ * Revision 3.7 1982/12/10 16:09:20 wft
+ * Corrected checking of return code from diff.
+ * An RCS file now inherits its mode during the first ci from the working file,
+ * except that write permission is removed.
+ */
+.in 0
+.ce 1
+Figure 1. Log entries produced by the marker \*sLog\*s.
+.fi
+.nr VS 18p
+.vs 18p
+.sp 0
+.LP
+Since revisions are stored in the form of differences,
+each log message is
+physically stored once,
+independent of the number of revisions present.
+Thus, the \*sLog\*s marker incurs negligible space overhead.
+.NH
+The RCS Revision Tree
+.PP
+RCS arranges revisions in an ancestral tree.
+The \fIci\fR command builds this tree; the auxiliary command \fIrcs\fR
+prunes it.
+The tree has a root revision, normally numbered 1.1, and successive revisions
+are numbered 1.2, 1.3, etc. The first field of a revision number
+is called the \fIrelease number\fR and the second one
+the \fIlevel number\fR. Unless given explicitly,
+the \fIci\fR command assigns a new revision number
+by incrementing the level number of the previous revision.
+The release number must be incremented explicitly, using the
+\fI\-r\fR option of \fIci\fR.
+Assuming there are revisions 1.1, 1.2, and 1.3 in the RCS file f.c,v, the command
+.D(
+ci \-r2.1 f.c \fRor\fP ci \-r2 f.c
+.D)
+assigns the number 2.1 to the new revision.
+Later check-ins without the \fI\-r\fR option will assign the numbers 2.2, 2.3,
+and so on.
+The release number should be incremented only at major transition points
+in the development, for instance when a new release of a software product has
+been completed.
+.NH 2
+When are branches needed?
+.PP
+A young revision tree is slender:
+It consists of only one branch, called the trunk.
+As the tree ages, side branches may form.
+Branches are needed in the following 4 situations.
+.IP "\fITemporary fixes\fR"
+.sp 0
+Suppose a tree has 5 revisions grouped in 2 releases,
+as illustrated in Figure 2.
+Revision 1.3, the last one of release 1, is in operation at customer sites,
+while release 2 is in active development.
+.ne 4
+.PS 4i
+.ps -2
+box "1.1"
+arrow
+box "1.2"
+arrow
+box "1.3"
+arrow
+box "2.1"
+arrow
+box "2.2"
+arrow dashed
+.ps +2
+.PE
+.ce 1
+Figure 2. A slender revision tree.
+.sp 0
+Now imagine a customer requesting a fix of
+a problem in revision 1.3, although actual development has moved on
+to release 2. RCS does not permit an extra
+revision to be spliced in between 1.3 and 2.1, since that would not reflect
+the actual development history. Instead, create a branch
+at revision 1.3, and check in the fix on that branch.
+The first branch starting at 1.3 has number 1.3.1, and
+the revisions on that branch are numbered 1.3.1.1, 1.3.1.2, etc.
+The double numbering is needed to allow for another
+branch at 1.3, say 1.3.2.
+Revisions on the second branch would be numbered
+1.3.2.1, 1.3.2.2, and so on.
+The following steps create
+branch 1.3.1 and add revision 1.3.1.1:
+.sp 0
+.I
+.nr VS 12p
+.vs 12p
+.TS
+tab(%);
+l l l.
+ %co \-r1.3 f.c% \*- check out revision 1.3
+ %edit f.c% \*- change it
+ %ci \-r1.3.1 f.c% \*- check it in on branch 1.3.1
+.TE
+.nr VS 18p
+.vs 18p
+.R
+This sequence of commands transforms the tree of Figure 2 into
+the one in Figure 3.
+Note that it may be necessary to incorporate the differences
+between 1.3 and 1.3.1.1
+into a revision at level 2. The operation \fIrcsmerge\fR automates this
+process (see the Appendix).
+.ne 7
+.PS 4i
+.ps -2
+ box "1.1"
+ arrow
+ box "1.2"
+ arrow
+R13: box "1.3"
+ arrow
+R21: box "2.1"
+ arrow
+R22: box "2.2"
+ arrow dashed
+ line invis down from R21.s
+RB1: box "1.3.1.1"
+ arrow dashed right from RB1.e
+ arrow from R13.s to RB1.w
+.ps +2
+.PE
+.ce 1
+Figure 3. A revision tree with one side branch
+.sp
+.IP "\fIDistributed development and customer modifications\fR"
+.sp 0
+Assume a situation as in Figure 2, where revision 1.3 is in operation
+at several customer sites,
+while release 2 is in development.
+Customer sites should use RCS to store the distributed software.
+However, customer modifications should not be placed on the same branch
+as the distributed source; instead, they should be placed on a side branch.
+When the next software distribution arrives,
+it should be appended to the trunk of
+the customer's RCS file, and the customer
+can then merge the local modifications back into the new release.
+In the above example, a
+customer's RCS file would contain the following tree, assuming
+that the customer has received revision 1.3, added his local modifications
+as revision 1.3.1.1, then received revision 2.4, and merged
+2.4 and 1.3.1.1, resulting in 2.4.1.1.
+.ne 7
+.PS 4i
+.ps -2
+R13: box "1.3"
+ line invis
+R21: box invis
+ line invis
+R22: box invis
+ line invis
+R24: box "2.4"
+ line invis
+R25: box invis
+ line invis
+ arrow from R13.e to R24.w
+ line invis down from R21.s
+RB1: box "1.3.1.1"
+ arrow from R13.s to RB1.w
+ right
+ line invis down from R25.s
+RB2: box "2.4.1.1"
+ arrow from R24.s to RB2.w
+.ps +2
+.PE
+.ce 1
+Figure 4. A customer's revision tree with local modifications.
+.sp 1
+This approach is actually practiced in the CSNET project,
+where several universities and a company cooperate
+in developing a national computer network.
+.IP "\fIParallel development\fR"
+.sp 0
+Sometimes it is desirable to explore an alternate design or
+a different implementation technique in parallel with the
+main line development. Such development
+should be carried out on a side branch.
+The experimental changes may later be moved into the main line, or abandoned.
+.IP "\fIConflicting updates\fR"
+.sp 0
+A common occurrence is that one programmer
+has checked out a revision, but cannot complete the assignment
+for some reason. In the meantime, another person
+must perform another modification
+immediately. In that case, the second person should check-out the same revision,
+modify it, and check it in on a side branch, for later merging.
+.PP
+Every node in a revision tree consists of the following attributes:
+a revision number, a check-in date and time, the author's identification,
+a log entry, a state and the actual text. All these attributes
+are determined at the time the revision is checked in.
+The state attribute indicates the status of a revision.
+It is set automatically to `experimental' during check-in.
+A revision can later be promoted to a higher status, for example
+`stable' or `released'. The set of states is user-defined.
+.NH 2
+Revisions are represented as deltas
+.PP
+For conserving space, RCS stores revisions in the form
+of deltas, i.e., as differences between revisions.
+The user interface completely hides this fact.
+.PP
+A delta is a sequence of edit commands that transforms one string
+into another. The deltas employed by RCS are line-based, which means
+that the only edit commands allowed are insertion and deletion of lines.
+If a single character in a line is changed, the
+edit scripts consider the entire line changed.
+The program \fIdiff\fR\u2\d
+produces a small, line-based delta between pairs of text files.
+A character-based edit script would take much longer to compute,
+and would not be significantly shorter.
+.PP
+Using deltas is a classical space-time tradeoff: deltas reduce the
+space consumed, but increase access time.
+However, a version control tool should impose as little delay
+as possible on programmers.
+Excessive delays discourage the use of version controls,
+or induce programmers to take shortcuts that compromise system integrity.
+To gain reasonably fast access time for both editing and compiling,
+RCS arranges deltas in the following way.
+The most recent revision on the trunk is stored intact.
+All other revisions on the trunk are stored as reverse deltas.
+A reverse delta describes how to go backward in the development history:
+it produces the desired revision if applied to the successor of that revision.
+This implementation has the advantage
+that extraction of the latest revision is a simple and fast copy
+operation.
+Adding a new revision to the trunk is also fast: \fIci\fR simply
+adds the new revision intact, replaces the previous
+revision with a reverse delta, and keeps the rest of the old deltas.
+Thus, \fIci\fR requires the computation
+of only one new delta.
+.PP
+Branches need special treatment. The naive solution would be to
+store complete copies for the tips of all branches.
+Clearly, this approach would cost too much space. Instead,
+RCS uses \fIforward\fR deltas for branches. Regenerating a revision
+on a side branch proceeds as follows. First, extract the latest revision
+on the trunk; secondly, apply reverse deltas until the fork revision for
+the branch is obtained; thirdly, apply forward deltas until the desired
+branch revision is reached. Figure 5 illustrates a tree with
+one side branch. Triangles pointing to the left and right represent
+reverse and forward deltas, respectively.
+.ne 8
+.PS 4i
+.ps -2
+define BD X [line invis $1 right .5;
+line up .3 then left .5 down .3 then right .5 down .3 then up .3] X
+
+define FD X [line invis $1 right .5;
+line left .5 down .3 then up .6 then right .5 down .3;] X
+
+right
+D11: BD(" 1.1")
+ arrow right from D11.e
+D12: BD(" 1.2")
+ arrow right from D12.e
+D13: BD(" 1.3")
+ arrow right from D13.e
+D21: BD(" 2.1")
+ arrow right from D21.e
+D22: box "2.2"
+ line invis down from D21.s
+F1: FD("1.3.1.1 ")
+ arrow from D13.se to F1.w
+ arrow from F1.e right
+ right
+F2: FD("1.3.1.2 ")
+.ps +2
+.PE
+.ce 1
+Figure 5. A revision tree with reverse and forward deltas.
+.sp 0
+.PP
+Although implementing fast check-out for the latest trunk revision,
+this arrangement has the disadvantage that generation of other revisions
+takes time proportional to the number of deltas applied. For example,
+regenerating the branch tip in Figure 5 requires application of five
+deltas (including the initial one). Since usage statistics show that
+the latest trunk revision is the one that is retrieved in 95 per cent
+of all cases (see the section on usage statistics), biasing check-out time
+in favor of that revision results in significant savings.
+However, careful implementation of the delta application process is
+necessary to provide low retrieval overhead for other revisions, in
+particular for branch tips.
+.PP
+There are several techniques for delta application.
+The naive one is to pass each delta to a general-purpose text editor.
+A prototype of RCS invoked the UNIX editor \fIed\fR both
+for applying deltas and for expanding the identification markers.
+Although easy to implement, performance was poor, owing to the
+high start-up costs and excess generality of \fIed\fR. An intermediate
+version of RCS used a special-purpose, stream-oriented editor.
+This technique reduced the cost of applying a delta to the cost of
+checking out the latest trunk revision. The reason for this behavior
+is that each delta application involves a complete pass over
+the preceding revision.
+.PP
+However, there is a much better algorithm. Note that the deltas are
+line oriented and that most of the work of a stream editor involves
+copying unchanged lines from one revision to the next. A faster
+algorithm avoids unnecessary copying of character strings by using
+a \fIpiece table\fR.
+A piece table is a one-dimensional array, specifying how a given
+revision is `pieced together' from lines in the RCS file.
+Suppose piece table \fIPT\dr\u\fR represents revision \fIr\fR.
+Then \fIPT\dr\u[i]\fR contains the starting position of line \fIi\fR
+of revision \fIr\fR.
+Application of the next delta transforms piece table \fIPT\dr\u\fR
+into \fIPT\dr+1\u\fR. For instance, a delete command removes a
+series of entries from the piece table. An insertion command inserts
+new entries, moving the entries following the insertion point further down the
+array. The inserted entries point to the text lines in the delta.
+Thus, no I/O is involved except for reading the delta itself. When all
+deltas have been applied to the piece table, a sequential pass
+through the table looks up each line in the RCS file and copies it to
+the output file, updating identification markers at the same time.
+Of course, the RCS file must permit random access, since the copied
+lines are scattered throughout that file. Figure 6 illustrates an
+RCS file with two revisions and the corresponding piece tables.
+.ne 13
+.sp 6
+.ce 1
+\fIFigure 6 is not available.\fP
+.sp 5
+.ce 1
+Figure 6. An RCS file and its piece tables
+.sp 0
+.PP
+The piece table approach has the property that the time for applying a single
+delta is roughly determined by the size of the delta, and not by the
+size of the revision. For example, if a delta is
+10 per cent of the size of a revision, then applying it takes only
+10 per cent of the time to generate the latest trunk revision. (The stream
+editor would take 100 per cent.)
+.PP
+There is an important alternative for representing deltas that affects
+performance. SCCS\u3\d,
+a precursor of RCS, uses \fIinterleaved\fR deltas.
+A file containing interleaved deltas is partitioned into blocks of lines.
+Each block has a header that specifies to which revision(s) the block
+belongs. The blocks are sorted out in such a way that a single
+pass over the file can pick up all the lines belonging to a given
+revision. Thus, the regeneration time for all revisions is the same:
+all headers must be inspected, and the associated blocks either copied
+or skipped. As the number of revisions increases, the cost of retrieving
+any revision is much higher than the cost of checking out the
+latest trunk revision with reverse deltas. A detailed comparison
+of SCCS's interleaved deltas and RCS's reverse deltas can be found
+in Reference 4.
+This reference considers the version of RCS with the
+stream editor only. The piece table method improves performance
+further, so that RCS is always faster than SCCS, except if 10
+or more deltas are applied.
+.PP
+Additional speed-up for both delta methods can be obtained by caching
+the most recently generated revision, as has been implemented in DSEE.\u5\d
+With caching, access time to frequently used revisions can approach normal file
+access time, at the cost of some additional space.
+.NH
+Locking: A Controversial Issue
+.PP
+The locking mechanism for RCS was difficult to design.
+The problem and its solution are first presented in their `pure' form,
+followed by a discussion of the complications
+caused by `real-world' considerations.
+.PP
+RCS must prevent two or more persons from depositing competing changes of the
+same revision.
+Suppose two programmers check out revision 2.4 and
+modify it. Programmer A checks in a revision before programmer B\&.
+Unfortunately, programmer B has not seen A's
+changes, so the effect is that A's changes are covered up by B's deposit.
+A's changes are not lost since all revisions
+are saved, but they are confined to a single revision.\(dd
+.FS \(dd
+Note that this problem is entirely different from the atomicity problem.
+Atomicity means that
+concurrent update operations on the same RCS file cannot be permitted,
+because that may result in inconsistent data.
+Atomic updates are essential (and implemented in RCS),
+but do not solve the conflict discussed here.
+.FE
+.PP
+This conflict is prevented in RCS by locking.
+Whenever someone intends to edit a revision (as opposed
+to reading or compiling it), the revision should be checked out
+and locked,
+using the \fI\-l\fR option on \fIco\fR. On subsequent check-in,
+\fIci\fR tests the lock and then removes it.
+At most one programmer at a time may
+lock a particular revision, and only this programmer may check in
+the succeeding revision.
+Thus, while a revision is locked, it is the exclusive responsibility
+of the locker.
+.PP
+An important maxim for software tools like RCS is that they must
+not stand in the way of making progress with a project.
+This consideration leads to several weakenings of the locking mechanism.
+First of all, even if a revision is locked, it can
+still be checked out. This is necessary if other people
+wish to compile or inspect the locked revision
+while the next one is in preparation. The only operations they
+cannot do are to lock the revision or to check in the succeeding one. Secondly,
+check-in operations on other branches in the RCS file are still possible; the
+locking of one revision does not affect any other revision.
+Thirdly, revisions are occasionally locked for a long period of time
+because a programmer is absent or otherwise unable to complete
+the assignment. If another programmer has to make a pressing change,
+there are the following three alternatives for making progress:
+a) find out who is holding the lock and ask that person to release it;
+b) check out the locked revision, modify it, check it
+in on a branch, and merge the changes later;
+c) break the lock. Breaking a lock leaves a highly visible
+trace, namely an electronic mail message that is sent automatically to the
+holder of the lock, recording the breaker and a commentary requested from him.
+Thus, breaking locks is tolerated under certain circumstances,
+but will not go unnoticed.
+Experience has shown that the automatic mail message attaches a high enough
+stigma to lock breaking,
+such that programmers break locks only in real emergencies,
+or when a co-worker resigns and leaves locked revisions behind.
+.PP
+If an RCS file is private, i.e., when a programmer owns an RCS file
+and does not expect anyone else to perform check-in operations,
+locking is an unnecessary nuisance.
+In this case,
+the `strict locking feature' discussed earlier may be disabled,
+provided that file protection
+is set such that only the owner may write the RCS file.
+This has the effect that only the owner can check-in revisions,
+and that no lock is needed for doing so.
+.PP
+As added protection,
+each RCS file contains an access list that specifies the users
+who may execute update operations. If an access list is empty,
+only normal UNIX file protection applies. Thus, the access list is
+useful for restricting the set of people who would otherwise have update
+permission. Just as with locking, the access list
+has no effect on read-only operations such as \fIco\fR. This approach
+is consistent with the UNIX philosophy of openness, which contributes
+to a productive software development environment.
+.NH
+Configuration Management
+.PP
+The preceding sections described how RCS deals with revisions of individual
+components; this section discusses how to handle configurations.
+A configuration is a set of revisions, where each revision comes
+from a different revision group, and the revisions are selected
+according to a certain criterion.
+For example,
+in order to build a functioning compiler, the `right'
+revisions from the scanner, the parser, the optimizer
+and the code generator must be combined.
+RCS, in conjunction with MAKE,
+provides a number of facilities to effect a smooth selection.
+.NH 2
+RCS Selection Functions
+.PP
+.IP "\fIDefault selection\fR"
+.sp 0
+During development, the usual selection criterion is to choose
+the latest revision of all components. The \fIco\fR command
+makes this selection by default. For example, the command
+.D(
+co *,v
+.D)
+retrieves the latest revision on the default branch of each RCS file
+in the current directory.
+The default branch is usually the trunk, but may be
+set to be a side branch.
+Side branches as defaults are needed in distributed software development,
+as discussed in the section on the RCS revision tree.
+.sp
+.IP "\fIRelease based selection\fR"
+.sp 0
+Specifying a release or branch number selects the latest revision in
+that release or branch.
+For instance,
+.D(
+co \-r2 *,v
+.D)
+retrieves the latest revision with release number 2 from each RCS file.
+This selection is convenient if a release has been completed and
+development has moved on to the next release.
+.sp
+.IP "\fIState and author based selection\fR"
+.sp 0
+If the highest level number within a given release number
+is not the desired one,
+the state attribute can help. For example,
+.D(
+co \-r2 \-sReleased *,v
+.D)
+retrieves the latest revision with release number 2 whose state attribute
+is `Released'.
+Of course, the state attribute has to be set appropriately, using the
+\fIci\fR or \fIrcs\fR commands.
+Another alternative is to select a revision by its author,
+using the \fI\-w\fR option.
+.sp
+.IP "\fIDate based selection\fR"
+.sp 0
+Revisions may also be selected by date.
+Suppose a release of an entire system was
+completed and current on March 4, at 1:00 p.m. local time. Then the command
+.D(
+co \-d'March 4, 1:00 pm LT' *,v
+.D)
+checks out all the components of that release, independent of the numbering.
+The \fI\-d\fR option specifies a `cutoff date', i.e.,
+the revision selected has a check-in date that
+is closest to, but not after the date given.
+.IP "\fIName based selection\fR"
+.sp 0
+The most powerful selection function is based on assigning symbolic
+names to revisions and branches.
+In large systems, a single release number or date is not sufficient
+to collect the appropriate revisions from all groups.
+For example, suppose one wishes to combine release 2
+of one subsystem and release 15 of another.
+Most likely, the creation dates of those releases differ also.
+Thus, a single revision number or date passed to the \fIco\fR command
+will not suffice to select the right revisions.
+Symbolic revision numbers solve this problem.
+Each RCS file may contain a set of symbolic names that are mapped
+to numeric revision numbers. For example, assume
+the symbol \fIV3\fR is bound to release number 2 in file \fIs,v\fR, and to
+revision number 15.9 in \fIt,v\fR.
+Then the single command
+.D(
+co \-rV3 s,v t,v
+.D)
+retrieves the latest revision of release 2 from \fIs,v\fR,
+and revision 15.9 from \fIt,v\fR.
+In a large system with many modules, checking out all
+revisions with one command greatly simplifies configuration management.
+.PP
+Judicious use of symbolic revision numbers helps with organizing
+large configurations.
+A special command, \fIrcsfreeze\fR,
+assigns a symbolic revision number to a selected revision
+in every RCS file.
+\fIRcsfreeze\fR effectively freezes a configuration.
+The assigned symbolic revision number selects all components
+of the configuration.
+If necessary, symbolic numbers
+may even be intermixed with numeric ones. Thus, \fIV3.5\fR in the
+above example
+would select revision 2.5 in \fIs,v\fR and branch 15.9.5 in \fIt,v\fR.
+.PP
+The options \fI\-r\fR, \fI\-s\fR, \fI\-w\fR and \fI\-d\fR
+may be combined. If a branch is given, the latest revision
+on that branch satisfying all conditions is retrieved;
+otherwise, the default branch is used.
+.NH 2
+Combining MAKE and RCS
+.PP
+MAKE\u1\d
+is a program that processes configurations.
+It is driven by configuration specifications
+recorded in a special file, called a `Makefile'.
+MAKE avoids redundant processing steps
+by comparing creation dates of source and processed objects.
+For example, when instructed to compile all
+modules of a given system, it only recompiles
+those source modules that were changed
+since they were processed last.
+.PP
+MAKE has been extended with an auto-checkout feature for RCS.*
+.FS *
+This auto-checkout extension is available only in some versions of MAKE,
+e.g. GNU MAKE.
+.FE
+When a certain file to be processed is not present,
+MAKE attempts a check-out operation.
+If successful, MAKE performs the required processing, and then deletes
+the checked out file to conserve space.
+The selection parameters discussed above can be passed to MAKE
+either as parameters, or directly embedded in the Makefile.
+MAKE has also been extended to search the subdirectory named \fIRCS\fR
+for needed files, rather than just the current working directory.
+However, if a working file is present, MAKE totally ignores the corresponding
+RCS file and uses the working file.
+(In newer versions of MAKE distributed by AT&T and others,
+auto-checkout can be
+achieved with the rule DEFAULT, instead of a special extension of MAKE.
+However, a file checked out by the rule DEFAULT
+will not be deleted after processing. \fIRcsclean\fR can be
+used for that purpose.)
+.PP
+With auto-checkout, RCS/MAKE can effect a selection rule
+especially tuned for multi-person software development and maintenance.
+In these situations,
+programmers should obtain configurations that consist of
+the revisions they have personally checked out plus the latest
+checked in revision of all other revision groups.
+This schema can be set up as follows.
+.PP
+Each programmer chooses a working directory
+and places into it a symbolic link, named \fIRCS\fR,
+to the directory containing the relevant RCS files.
+The symbolic link makes sure that \fIco\fR and \fIci\fR
+operations need only specify the working files, and that
+the Makefile need not be changed.
+The programmer then checks out the needed files and modifies them.
+If MAKE is invoked,
+it composes configurations by selecting those
+revisions that are checked out, and the rest from the
+subdirectory \fIRCS\fR.
+The latter selection may be controlled by a symbolic
+revision number or any of the other selection criteria.
+If there are several programmers editing in separate working directories,
+they are insulated from each other's changes until checking in their
+modifications.
+.PP
+Similarly, a maintainer can recreate an older configuration
+by starting to work in an empty working directory.
+During the initial MAKE invocation, all revisions are selected from RCS files.
+As the maintainer checks out files and modifies them,
+a new configuration is gradually built up.
+Every time MAKE is invoked, it substitutes the modified revisions
+into the configuration being manipulated.
+.PP
+A final application of RCS is to use it for storing Makefiles.
+Revision groups of Makefiles represent
+multiple versions of configurations.
+Whenever a configuration is baselined or distributed,
+the best approach is to unambiguously fix
+the configuration with a symbolic revision number by calling
+\fIrcsfreeze\fR,
+to embed that symbol into the Makefile, and to
+check in the Makefile (using the same symbolic revision number).
+With this approach, old configurations
+can be regenerated easily and reliably.
+.NH
+Usage Statistics
+.PP
+The following usage statistics were collected on two DEC VAX-11/780
+computers of the Purdue Computer Science Department. Both machines
+are mainly used for research purposes. Thus, the data
+reflect an environment in which the majority of projects
+involve prototyping and advanced software development,
+but relatively little long-term maintenance.
+.PP
+For the first experiment,
+the \fIci\fR and \fIco\fR operations were instrumented
+to log the number of backward and forward deltas applied.
+The data were collected during a 13 month period
+from Dec. 1982 to Dec. 1983.
+Table I summarizes the results.
+.sp 0
+.nr VS 12p
+.vs 12p
+.TS
+center,box,tab(#);
+c|c|c|c|c s|c s
+c|c|c|c|c s|c s
+l|n|n|n|n n|n n.
+Operation#Total#Total deltas#Mean deltas#Operations#Branch
+ #operations #applied#applied#with >1 delta#operations
+_
+co # 7867# 9320#1.18#509#(6%)#203#(3%)
+ci # 3468# 2207#0.64# 85#(2%)# 75#(2%)
+ci & co#11335#11527#1.02#594#(5%)#278#(2%)
+.TE
+.ce 1
+Table I. Statistics for \fIco\fR and \fIci\fR operations.
+.nr VS 18p
+.vs 18p
+.PP
+The first two lines show statistics for check-out and check-in;
+the third line shows the combination.
+Recall that \fIci\fR performs an implicit check-out to obtain
+a revision for computing the delta.
+In all measures presented, the most recent revision (stored intact)
+counts as one delta. The number of deltas applied represents
+the number of passes necessary, where the first `pass' is a copying step.
+.PP
+Note that the check-out operation is executed more than
+twice as frequently as the check-in operation.
+The fourth column gives the mean number of deltas
+applied in all three cases.
+For \fIci\fR, the mean number of deltas applied is less
+than one.
+The reasons are that the initial check-in requires no delta at all, and that
+the only time \fIci\fR requires more than one delta is for branches.
+Column 5 shows the actual number of operations that applied more than one
+delta.
+The last column indicates that branches were not used often.
+.PP
+The last three columns demonstrate that the most recent trunk revision
+is by far the most frequently accessed.
+For RCS, check-out of
+this revision is a simple copy operation, which is the absolute minimum
+given the copy-semantics of \fIco\fR.
+Access to older revisions and branches
+is more common in non-academic environments,
+yet even if access to older deltas were an order
+of magnitude more frequent,
+the combined average number of deltas applied would still be below 1.2.
+Since RCS is faster than SCCS until up to 10 delta applications,
+reverse deltas are clearly the method of choice.
+.PP
+The second experiment, conducted in March of 1984,
+involved surveying the existing RCS files
+on our two machines. The goal was to determine the mean number of
+revisions per RCS file, as well as the space consumed by them.
+Table II shows the results. (Tables I and II were produced at different
+times and are unrelated.)
+.sp 0
+.nr VS 12p
+.vs 12p
+.TS
+center,box,tab(#);
+c | c | c | c | c | c | c
+c | c | c | c | c | c | c
+l | n | n | n | n | n | n.
+ #Total RCS#Total#Mean#Mean size of#Mean size of#Overhead
+ #files#revisions#revisions#RCS files#revisions
+_
+All files #8033#11133#1.39#6156#5585#1.10
+Files with#1477# 4578#3.10#8074#6041#1.34
+\(>= 2 deltas
+.TE
+.ce 1
+Table II. Statistics for RCS files.
+.nr VS 18p
+.vs 18p
+.PP
+The mean number of revisions per RCS file is 1.39.
+Columns 5 and 6 show the mean sizes (in bytes) of an RCS file
+and of the latest revision of each RCS file, respectively.
+The `overhead' column contains the ratio of the mean sizes.
+Assuming that all revisions in an RCS file are approximately the same size,
+this ratio gives a measure of the space consumed by the extra revisions.
+.PP
+In our sample, over 80 per cent of the RCS files contained only a single revision.
+The reason is that our
+systems programmers routinely check in all source files
+on the distribution tapes, even though they may never touch them again.
+To get a better indication of how much space savings are possible
+with deltas, all measures with those files
+that contained 2 or more revisions were recomputed. Only for those files
+is RCS necessary.
+As shown in the second line, the average number of revisions for those files is
+3.10, with an overhead of 1.34. This means that the extra 2.10 deltas
+require 34 per cent extra space, or
+16 per cent per extra revision.
+Rochkind\u3\d
+measured the space consumed by SCCS, and
+reported an average of 5 revisions per group
+and an overhead of 1.37 (or about 9 per cent per extra revision).
+In a later paper, Glasser\u6\d
+observed an average of 7 revisions per group in a single, large project,
+but provided no overhead figure.
+In his paper on DSEE\u5\d,
+Leblang reported that delta storage combined with blank compression
+results in an overhead of a mere 1\-2 per cent per revision.
+Since leading blanks accounted for about 20 per cent of the surveyed Pascal
+programs, a revision group with 5\-10 members was smaller
+than a single cleartext copy.
+.PP
+The above observations demonstrate clearly that the space needed
+for extra revisions is small. With delta storage, the luxury of
+keeping multiple revisions online is certainly affordable.
+In fact, introducing a system with delta storage may reduce
+storage requirements, because programmers often save back-up copies
+anyway. Since back-up copies are stored much more efficiently with deltas,
+introducing a system such as RCS may
+actually free a considerable amount of space.
+.NH
+Survey of Version Control Tools
+.PP
+The need to keep back-up copies of software arose when
+programs and data were no longer stored on paper media, but were entered
+from terminals and stored on disk.
+Back-up copies are desirable for reliability, and many modern editors
+automatically save a back-up copy for every file touched.
+This strategy
+is valuable for short-term back-ups, but not suitable for long-term
+version control, since an existing back-up copy is overwritten whenever the
+corresponding file is edited.
+.PP
+Tape archives are suitable for long-term, offline storage.
+If all changed files are dumped on a back-up tape once per day, old revisions
+remain accessible. However, tape archives are unsatisfactory
+for version control in several ways. First, backing up the file
+system every 24 hours does not capture intermediate revisions.
+Secondly, the old revisions are not online,
+and accessing them is tedious and time-consuming.
+In particular, it is impractical to
+compare several old revisions of a group,
+because that may require mounting and searching several tapes.
+Tape archives are important fail-safe tools in the
+event of catastrophic disk failures or accidental deletions,
+but they are ill-suited for version control.
+Conversely, version control tools do not obviate the
+need for tape archives.
+.PP
+A natural technique for keeping several old revisions online is
+to never delete a file.
+Editing a file
+simply creates a new file with the same
+name, but with a different sequence number.
+This technique, available as an option in DEC's VMS operating system,
+turns out to be inadequate for version control.
+First, it is prohibitively expensive in terms of storage costs,
+especially since no data compression techniques are employed.
+Secondly, indiscriminately storing every change produces too many
+revisions, and programmers have difficulties distinguishing them.
+The proliferation of revisions forces programmers to spend much time on
+finding and deleting useless files.
+Thirdly, most of the support functions like locking, logging,
+revision selection,
+and identification described in this paper are not available.
+.PP
+An alternative approach is to separate editing from revision control.
+The user may repeatedly edit a given revision,
+until freezing it with an explicit command.
+Once a revision is frozen, it is stored permanently and can no longer be modified.
+(In RCS, freezing a revisions is done with \fIci\fR.)
+Editing a frozen revision implicitly creates a new one, which
+can again be changed repeatedly until it is frozen itself.
+This approach saves exactly those revisions that the user
+considers important, and keeps the number of revisions manageable.
+IBM's CLEAR/CASTER\u7\d,
+AT&T's SCCS\u3\d,
+CMU's SDC\u8\d
+and DEC's CMS\u9\d,
+are examples of version control systems using this approach.
+CLEAR/CASTER maintains a data base of programs, specifications,
+documentation and messages, using deltas.
+Its goal is to provide control over the development process from a
+management viewpoint.
+SCCS stores multiple revisions of source text in an ancestral tree,
+records a log entry for each revision,
+provides access control, and has facilities
+for uniquely identifying each revision.
+An efficient delta technique
+reduces the space consumed by each revision group.
+SDC is much simpler than SCCS because it stores not more than
+two revisions. However, it maintains a complete log for all old
+revisions, some of which may be on back-up tape.
+CMS, like SCCS, manages tree-structured revision groups,
+but offers no identification mechanism.
+.PP
+Tools for dealing with configurations are still in a state of flux.
+SCCS, SDC and CMS can be combined with MAKE or MAKE-like programs.
+Since flexible selection rules are missing from all these tools,
+it is sometimes difficult
+to specify precisely which revision of each group
+should be passed to MAKE for building a desired configuration.
+The Xerox Cedar system\u10\d
+provides a `System Modeller' that can rebuild
+a configuration from an arbitrary set of module revisions.
+The revisions of a module are only distinguished by creation time,
+and there is no tool for managing groups.
+Since the selection rules are primitive,
+the System Modeller appears to be somewhat tedious to use.
+Apollo's DSEE\u5\d
+is a sophisticated software engineering environment.
+It manages revision groups in a way similar to SCCS and CMS. Configurations
+are built using `configuration threads'.
+A configuration thread states which revision of each group
+named in a configuration should be chosen.
+A configuration thread may contain dynamic specifiers
+(e.g., `choose the revisions I am currently working on,
+and the most recent revisions otherwise'), which are bound
+automatically at build time.
+It also provides a notification mechanism for alerting
+maintainers about the need to rebuild a system after a change.
+.PP
+RCS is based on a general model for describing
+multi-version/multi-configuration systems\u11\d.
+The model describes systems using AND/OR graphs, where AND nodes represent
+configurations, and OR nodes represent version groups.
+The model gives rise to a suit of selection rules for
+composing configurations, almost all of which are implemented in RCS.
+The revisions selected by RCS are passed to MAKE for configuration building.
+Revision group management is modelled after SCCS.
+RCS retains SCCS's best features,
+but offers a significantly simpler user interface,
+flexible selection rules, adequate integration with MAKE
+and improved identification.
+A detailed comparison of RCS and SCCS appears in Reference 4.
+.PP
+An important component of all revision control systems
+is a program for computing deltas.
+SCCS and RCS use the program \fIdiff\fR\u2\d,
+which first computes the longest common substring of two
+revisions, and then produces the delta from that substring.
+The delta is simply an edit script consisting of deletion and
+insertion commands that generate one revision from the other.
+.PP
+A delta based on a longest common substring is not necessarily minimal,
+because it does not take advantage of crossing block moves.
+Crossing block moves arise if two or more blocks of lines
+(e.g., procedures)
+appear in a different order in two revisions.
+An edit script derived from a longest common substring
+first deletes the shorter of the two blocks, and then reinserts it.
+Heckel\u12\d
+proposed an algorithm for detecting block moves, but
+since the algorithm is based on heuristics,
+there are conditions
+under which the generated delta is far from minimal.
+DSEE uses this algorithm combined with blank compression,
+apparently with satisfactory overall results.
+A new algorithm that is guaranteed to produce a minimal delta based on
+block moves appears in Reference 13.
+A future release of RCS will use this algorithm.
+.PP
+\fIAcknowledgements\fR:
+Many people have helped make RCS a success by contributed criticisms, suggestions,
+corrections, and even whole new commands (including manual pages).
+The list of people is too long to be
+reproduced here, but my sincere thanks for their help and
+goodwill goes to all of them.
+.sp
+.nr VS 12p
+.vs 12p
+.SH
+Appendix: Synopsis of RCS Operations
+.LP
+.IP "\fIci\fP \fB\- check in revisions\fP"
+.sp 0
+\fICi\fR stores the contents of a working file into the
+corresponding RCS file as a new revision.
+If the RCS file doesn't exist, \fIci\fR creates it.
+\fICi\fR removes the working file, unless one of the options
+\fI\-u\fR or \fI\-l\fR is present.
+For each check-in, \fIci\fR asks for a commentary
+describing the changes relative to the previous revision.
+.sp 1
+\fICi\fR assigns the revision number given by the \fI\-r\fR option;
+if that option is missing, it derives the number from the
+lock held by the user; if there is no lock and locking is not strict,
+\fIci\fR increments the number of the latest revision on the trunk.
+A side branch can only be started by explicitly specifying its
+number with the \fI\-r\fR option during check-in.
+.sp 1
+\fICi\fR also determines
+whether the revision to be checked in is different from the
+previous one, and asks whether to proceed if not.
+This facility simplifies check-in operations for large systems,
+because one need not remember which files were changed.
+.sp 1
+The option \fI\-k\fR searches the checked in file for identification
+markers containing
+the attributes
+revision number, check-in date, author and state, and assigns these
+to the new revision rather than computing them. This option is
+useful for software distribution: Recipients of distributed software
+using RCS should check in updates with the \fI\-k\fR option.
+This convention guarantees that revision numbers, check-in dates,
+etc., are the same at all sites.
+.IP "\fIco\fP \fB\- check out revisions\fP"
+.sp 0
+\fICo\fR retrieves revisions according to revision number,
+date, author and state attributes. It either places the revision
+into the working file, or prints it on the standard output.
+\fICo\fR always expands the identification markers.
+.IP "\fIident\fP \fB\- extract identification markers\fP"
+.sp 0
+\fIIdent\fR extracts the identification markers expanded by \fIco\fR
+from any file and prints them.
+.IP "\fIrcs\fP \fB\- change RCS file attributes\fP"
+.sp 0
+\fIRcs\fR is an administrative operation that changes access lists,
+locks, unlocks, breaks locks, toggles the strict-locking feature,
+sets state attributes and symbolic revision numbers, changes the
+description, and deletes revisions. A revision can
+only be deleted if it is not the fork of a side branch.
+.IP "\fIrcsclean\fP \fB\- clean working directory\fP"
+.sp 0
+.ne 10
+\fIRcsclean\fR removes working files that were checked out but never changed.*
+.FS *
+The \fIrcsclean\fP and \fIrcsfreeze\fP commands
+are optional and are not always installed.
+.FE
+.IP "\fIrcsdiff\fP \fB\- compare revisions\fP"
+.sp 0
+\fIRcsdiff\fR compares two revisions and prints their
+difference, using the UNIX tool \fIdiff\fR.
+One of the revisions compared may be checked out.
+This command is useful for finding out about changes.
+.IP "\fIrcsfreeze\fP \fB\- freeze a configuration\fP"
+.sp 0
+\fIRcsfreeze\fR assigns the same symbolic revision number
+to a given revision in all RCS files.
+This command is useful for accurately recording a configuration.*
+.IP "\fIrcsmerge\fP \fB\- merge revisions\fP"
+.sp 0
+\fIRcsmerge\fR merges two revisions, \fIrev1\fR and \fIrev2\fR,
+with respect to a common ancestor.
+A 3-way file comparison determines the segments of lines that
+are (a) the same in all three revisions, or (b) the same in 2 revisions,
+or (c) different in all three. For all segments of type (b) where
+\fIrev1\fR is the differing revision,
+the segment in \fIrev1\fR replaces the corresponding segment of \fIrev2\fR.
+Type (c) indicates an overlapping change, is flagged as an error, and requires user
+intervention to select the correct alternative.
+.IP "\fIrlog\fP \fB\- read log messages\fP"
+.sp 0
+\fIRlog\fR prints the log messages and other information in an RCS file.
+.bp
+.LP
+.nr VS 12p
+.vs 12p
+.]<
+.ds [F 1
+.]-
+.ds [K FELD02
+.ds [K MakeArticle
+.ds [A Feldman, Stuart I.
+.ds [D March 1979
+.ds [T Make\*-A Program for Maintaining Computer Programs
+.ds [J Software\*-Practice & Experience
+.ds [V 9
+.ds [N 3
+.ds [P 255-265
+.nr [P 1
+.nr [T 0
+.nr [A 1
+.nr [O 0
+.][ 1 journal-article
+.ds [F 2
+.]-
+.ds [K HUNT01
+.ds [T An Algorithm for Differential File Comparison
+.ds [A Hunt, James W.
+.as [A " and McIlroy, M. D.
+.ds [I Computing Science Technical Report, Bell Laboratories
+.ds [R 41
+.ds [D June 1976
+.nr [T 0
+.nr [A 1
+.nr [O 0
+.][ 4 tech-report
+.ds [F 3
+.]-
+.ds [K SCCS
+.ds [A Rochkind, Marc J.
+.ds [D Dec. 1975
+.ds [T The Source Code Control System
+.ds [J IEEE Transactions on Software Engineering
+.ds [V SE-1
+.ds [N 4
+.ds [P 364-370
+.nr [P 1
+.nr [T 0
+.nr [A 1
+.nr [O 0
+.][ 1 journal-article
+.ds [F 4
+.]-
+.ds [K TICH08
+.ds [T Design, Implementation, and Evaluation of a Revision Control System
+.ds [A Tichy, Walter F.
+.ds [B Proceedings of the 6th International Conference on Software Engineering
+.ds [I ACM, IEEE, IPS, NBS
+.ds [D September 1982
+.ds [P 58-67
+.nr [P 1
+.nr [T 0
+.nr [A 1
+.nr [O 0
+.][ 3 article-in-book
+.ds [F 5
+.]-
+.ds [K LEBL01
+.ds [A Leblang, David B.
+.as [A " and Chase, Robert P.
+.ds [T Computer-Aided Software Engineering in a Distributed Workstation Environment
+.ds [O Proceedings of the ACM SIGSOFT/SIGPLAN Software Engineering Symposium
+.as [O " on Practical Software Development Environments.
+.ds [J SIGPLAN Notices
+.ds [V 19
+.ds [N 5
+.ds [D May 1984
+.ds [P 104-112
+.nr [P 1
+.nr [T 0
+.nr [A 1
+.nr [O 0
+.][ 1 journal-article
+.ds [F 1
+.ds [F 3
+.ds [F 6
+.]-
+.ds [K SCCSEval
+.ds [A Glasser, Alan L.
+.ds [D Nov. 1978
+.ds [T The Evolution of a Source Code Control System
+.ds [J Software Engineering Notes
+.ds [V 3
+.ds [N 5
+.ds [P 122-125
+.nr [P 1
+.ds [O Proceedings of the Software Quality and Assurance Workshop.
+.nr [T 0
+.nr [A 1
+.nr [O 1
+.][ 1 journal-article
+.ds [F 5
+.ds [F 7
+.]-
+.ds [K IBMClearCaster
+.ds [A Brown, H.B.
+.ds [D 1970
+.ds [T The Clear/Caster System
+.ds [J Nato Conference on Software Engineering, Rome
+.nr [T 0
+.nr [A 1
+.nr [O 0
+.][ 1 journal-article
+.ds [F 3
+.ds [F 8
+.]-
+.ds [K HabermannSDC
+.ds [A Habermann, A. Nico
+.ds [D Jan. 1979
+.ds [T A Software Development Control System
+.ds [I Technical Report, Carnegie-Mellon University, Department of Computer Science
+.nr [T 0
+.nr [A 0
+.nr [O 0
+.][ 2 book
+.ds [F 9
+.]-
+.ds [K CMS
+.ds [A DEC
+.ds [T Code Management System
+.ds [I Digital Equipment Corporation
+.ds [O Document No.\ EA-23134-82
+.ds [D 1982
+.nr [T 0
+.nr [A 0
+.nr [O 0
+.][ 2 book
+.ds [F 10
+.]-
+.ds [K LAMP01
+.ds [A Lampson, Butler W.
+.as [A " and Schmidt, Eric E.
+.ds [T Practical Use of a Polymorphic Applicative Language
+.ds [B Proceedings of the 10th Symposium on Principles of Programming Languages
+.ds [I ACM
+.ds [P 237-255
+.nr [P 1
+.ds [D January 1983
+.nr [T 0
+.nr [A 1
+.nr [O 0
+.][ 3 article-in-book
+.ds [F 5
+.ds [F 11
+.]-
+.ds [K TICH07
+.ds [T A Data Model for Programming Support Environments and its Application
+.ds [A Tichy, Walter F.
+.ds [B Automated Tools for Information System Design and Development
+.ds [E Hans-Jochen Schneider and Anthony I. Wasserman
+.ds [C Amsterdam
+.ds [I North-Holland Publishing Company
+.ds [D 1982
+.nr [T 0
+.nr [A 1
+.nr [O 0
+.][ 3 article-in-book
+.ds [F 4
+.ds [F 2
+.ds [F 12
+.]-
+.ds [K HECK01
+.ds [T A Technique for Isolating Differences Between Files
+.ds [A Heckel, Paul
+.ds [J Communications of the ACM
+.ds [D April 1978
+.ds [V 21
+.ds [N 4
+.ds [P 264-268
+.nr [P 1
+.nr [T 0
+.nr [A 0
+.nr [O 0
+.][ 1 journal-article
+.ds [F 13
+.]-
+.ds [K TICH11
+.ds [T The String-to-String Correction Problem with Block Moves
+.ds [A Tichy, Walter F.
+.ds [D Nov. 1984
+.ds [J ACM Transactions on Computer Systems
+.ds [V 2
+.ds [N 4
+.ds [P 309-321
+.nr [P 1
+.nr [T 0
+.nr [A 1
+.nr [O 0
+.][ 1 journal-article
+.]>
diff --git a/gnu/usr.bin/rcs/doc/rcs_func.ms b/gnu/usr.bin/rcs/doc/rcs_func.ms
new file mode 100644
index 000000000000..9818086c3de4
--- /dev/null
+++ b/gnu/usr.bin/rcs/doc/rcs_func.ms
@@ -0,0 +1,95 @@
+.SH
+Functions of RCS (Revision Control System)
+.PP
+RCS manages software libraries. It greatly increases programmer productivity
+by providing the following functions.
+.IP 1.
+RCS stores and retrieves multiple revisions of program and other text.
+Thus, one can maintain one or more releases while developing the next
+release, with a minimum of space overhead. Changes no longer destroy the
+original -- previous revisions remain accessible.
+.RS
+.IP a.
+Maintains each module as a tree of revisions.
+.IP b.
+Project libraries can
+be organized centrally, decentralized, or any way you like.
+.IP c.
+RCS works for any type of text: programs, documentation, memos, papers,
+graphics, VLSI layouts, form letters, etc.
+.RE
+.IP 2.
+RCS maintains a complete history of changes.
+Thus, one can find out what happened to a module easily
+and quickly, without having to compare source listings or
+having to track down colleagues.
+.RS
+.IP a.
+RCS performs automatic record keeping.
+.IP b.
+RCS logs all changes automatically.
+.IP c.
+RCS guarantees project continuity.
+.RE
+.IP 3.
+RCS manages multiple lines of development.
+.IP 4.
+RCS can merge multiple lines of development.
+Thus, when several parallel lines of development must be consolidated
+into one line, the merging of changes is automatic.
+.IP 5.
+RCS flags coding conflicts.
+If two or more lines of development modify the same section of code,
+RCS can alert programmers about overlapping changes.
+.IP 6.
+RCS resolves access conflicts.
+When two or more programmers wish to modify the same revision,
+RCS alerts the programmers and makes sure that one modification won't wipe
+out the other one.
+.IP 7.
+RCS provides high-level retrieval functions.
+Revisions can be retrieved according to ranges of revision numbers,
+symbolic names, dates, authors, and states.
+.IP 8.
+RCS provides release and configuration control.
+Revisions can be marked as released, stable, experimental, etc.
+Configurations of modules can be described simply and directly.
+.IP 9.
+RCS performs automatic identification of modules with name, revision
+number, creation time, author, etc.
+Thus, it is always possible to determine which revisions of which
+modules make up a given configuration.
+.IP 10.
+Provides high-level management visibility.
+Thus, it is easy to track the status of a software project.
+.RS
+.IP a.
+RCS provides a complete change history.
+.IP b.
+RCS records who did what when to which revision of which module.
+.RE
+.IP 11.
+RCS is fully compatible with existing software development tools.
+RCS is unobtrusive -- its interface to the file system is such that
+all your existing software tools can be used as before.
+.IP 12.
+RCS' basic user interface is extremely simple. The novice need to learn
+only two commands. Its more sophisticated features have been
+tuned towards advanced software development environments and the
+experienced software professional.
+.IP 13.
+RCS simplifies software distribution if customers
+maintain sources with RCS also. This technique assures proper
+identification of versions and configurations, and tracking of customer
+modifications. Customer modifications can be merged into distributed
+versions locally or by the development group.
+.IP 14.
+RCS needs little extra space for the revisions (only the differences).
+If intermediate revisions are deleted, the corresponding
+differences are compressed into the shortest possible form.
+.IP 15.
+RCS is implemented with reverse deltas. This means that
+the latest revision, which is the one that is accessed most often,
+is stored intact. All others are regenerated from the latest one
+by applying reverse deltas (backward differences). This
+results in fast access time for the revision needed most often.
diff --git a/gnu/usr.bin/rcs/ident/Makefile b/gnu/usr.bin/rcs/ident/Makefile
new file mode 100644
index 000000000000..1a618e529b33
--- /dev/null
+++ b/gnu/usr.bin/rcs/ident/Makefile
@@ -0,0 +1,7 @@
+PROG= ident
+
+SRCS= ident.c
+LDADD= -L${.CURDIR}/../lib/obj -lrcs
+CFLAGS+= -I${.CURDIR}/../lib
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/rcs/ident/ident.1 b/gnu/usr.bin/rcs/ident/ident.1
new file mode 100644
index 000000000000..37c8eda202a3
--- /dev/null
+++ b/gnu/usr.bin/rcs/ident/ident.1
@@ -0,0 +1,76 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+.ds iD \\$3 \\$4 \\$5 \\$6 \\$7
+..
+.Id $Id: ident.1,v 5.0 1990/08/22 09:09:36 eggert Exp $
+.ds r \s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.TH IDENT 1 \*(Dt GNU
+.SH NAME
+ident \- identify files
+.SH SYNOPSIS
+.B ident
+[
+.B \-q
+] [
+.I file
+\&.\|.\|. ]
+.SH DESCRIPTION
+.B ident
+searches for all occurrences of the pattern
+.BI $ keyword : .\|.\|. $
+in the named files or, if no file name appears, the standard input.
+.PP
+These patterns are normally inserted automatically by the \*r command
+.BR co (1),
+but can also be inserted manually.
+The option
+.B \-q
+suppresses
+the warning given if there are no patterns in a file.
+.PP
+.B ident
+works on text files as well as object files and dumps.
+For example, if the C program in
+.B f.c
+contains
+.IP
+\f3char rcsid[] = \&"$\&Id: f.c,v \*(iD $\&";\fP
+.LP
+and
+.B f.c
+is compiled into
+.BR f.o ,
+then the command
+.IP
+.B "ident f.c f.o"
+.LP
+will output
+.nf
+.IP
+.ft 3
+f.c:
+ $\&Id: f.c,v \*(iD $
+f.o:
+ $\&Id: f.c,v \*(iD $
+.ft
+.fi
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990 by Paul Eggert.
+.SH "SEE ALSO"
+ci(1), co(1), rcs(1), rcsdiff(1), rcsintro(1), rcsmerge(1), rlog(1),
+rcsfile(5)
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
diff --git a/gnu/usr.bin/rcs/ident/ident.c b/gnu/usr.bin/rcs/ident/ident.c
new file mode 100644
index 000000000000..a2cc018da3ff
--- /dev/null
+++ b/gnu/usr.bin/rcs/ident/ident.c
@@ -0,0 +1,214 @@
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+/*
+ * RCS identification operation
+ */
+
+/* $Log: ident.c,v $
+ * Revision 5.3 1991/09/10 22:15:46 eggert
+ * Open files with FOPEN_R, not FOPEN_R_WORK,
+ * because they might be executables, not working files.
+ *
+ * Revision 5.2 1991/08/19 03:13:55 eggert
+ * Report read errors immediately.
+ *
+ * Revision 5.1 1991/02/25 07:12:37 eggert
+ * Don't report empty keywords. Check for I/O errors.
+ *
+ * Revision 5.0 1990/08/22 08:12:37 eggert
+ * Don't limit output to known keywords.
+ * Remove arbitrary limits and lint. Ansify and Posixate.
+ *
+ * Revision 4.5 89/05/01 15:11:54 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.4 87/10/23 17:09:57 narten
+ * added exit(0) so exit return code would be non random
+ *
+ * Revision 4.3 87/10/18 10:23:55 narten
+ * Updating version numbers. Changes relative to 1.1 are actually relative
+ * to 4.1
+ *
+ * Revision 1.3 87/07/09 09:20:52 trinkle
+ * Added check to make sure there is at least one arg before comparing argv[1]
+ * with "-q". This necessary on machines that don't allow dereferncing null
+ * pointers (i.e. Suns).
+ *
+ * Revision 1.2 87/03/27 14:21:47 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/05/10 16:31:02 wft
+ * Added option -q and input from reading stdin.
+ * Marker matching is now done with trymatch() (independent of keywords).
+ *
+ * Revision 3.4 83/02/18 17:37:49 wft
+ * removed printing of new line after last file.
+ *
+ * Revision 3.3 82/12/04 12:48:55 wft
+ * Added LOCKER.
+ *
+ * Revision 3.2 82/11/28 18:24:17 wft
+ * removed Suffix; added ungetc to avoid skipping over trailing KDELIM.
+ *
+ * Revision 3.1 82/10/13 15:58:51 wft
+ * fixed type of variables receiving from getc() (char-->int).
+*/
+
+#include "rcsbase.h"
+
+static int match P((FILE*));
+static void scanfile P((FILE*,char const*,int));
+
+mainProg(identId, "ident", "$Id: ident.c,v 5.3 1991/09/10 22:15:46 eggert Exp $")
+/* Ident searches the named files for all occurrences
+ * of the pattern $keyword:...$, where the keywords are
+ * Author, Date, Header, Id, Log, RCSfile, Revision, Source, and State.
+ */
+
+{
+ FILE *fp;
+ int quiet;
+ int status = EXIT_SUCCESS;
+
+ if ((quiet = argc > 1 && strcmp("-q",argv[1])==0)) {
+ argc--; argv++;
+ }
+
+ if (argc<2)
+ scanfile(stdin, (char*)0, quiet);
+
+ while ( --argc > 0 ) {
+ if (!(fp = fopen(*++argv, FOPEN_R))) {
+ VOID fprintf(stderr, "%s error: can't open %s\n", cmdid, *argv);
+ status = EXIT_FAILURE;
+ } else {
+ scanfile(fp, *argv, quiet);
+ if (argc>1) VOID putchar('\n');
+ }
+ }
+ if (ferror(stdout) || fclose(stdout)!=0) {
+ VOID fprintf(stderr, "%s error: write error\n", cmdid);
+ status = EXIT_FAILURE;
+ }
+ exitmain(status);
+}
+
+#if lint
+ exiting void identExit() { _exit(EXIT_FAILURE); }
+#endif
+
+
+ static void
+scanfile(file, name, quiet)
+ register FILE *file;
+ char const *name;
+ int quiet;
+/* Function: scan an open file with descriptor file for keywords.
+ * Return false if there's a read error.
+ */
+{
+ register int c;
+
+ if (name)
+ VOID printf("%s:\n", name);
+ else
+ name = "input";
+ c = 0;
+ for (;;) {
+ if (c < 0) {
+ if (feof(file))
+ break;
+ if (ferror(file))
+ goto read_error;
+ }
+ if (c == KDELIM) {
+ if ((c = match(file)))
+ continue;
+ quiet = true;
+ }
+ c = getc(file);
+ }
+ if (!quiet)
+ VOID fprintf(stderr, "%s warning: no id keywords in %s\n", cmdid, name);
+ if (fclose(file) == 0)
+ return;
+
+ read_error:
+ VOID fprintf(stderr, "%s error: %s: read error\n", cmdid, name);
+ exit(EXIT_FAILURE);
+}
+
+
+
+ static int
+match(fp) /* group substring between two KDELIM's; then do pattern match */
+ register FILE *fp;
+{
+ char line[BUFSIZ];
+ register int c;
+ register char * tp;
+
+ tp = line;
+ while ((c = getc(fp)) != VDELIM) {
+ if (c < 0)
+ return c;
+ switch (ctab[c]) {
+ case LETTER: case Letter:
+ *tp++ = c;
+ if (tp < line+sizeof(line)-4)
+ break;
+ /* fall into */
+ default:
+ return c ? c : '\n'/* anything but 0 or KDELIM or EOF */;
+ }
+ }
+ if (tp == line)
+ return c;
+ *tp++ = c;
+ if ((c = getc(fp)) != ' ')
+ return c ? c : '\n';
+ *tp++ = c;
+ while( (c = getc(fp)) != KDELIM ) {
+ if (c < 0 && feof(fp) | ferror(fp))
+ return c;
+ switch (ctab[c]) {
+ default:
+ *tp++ = c;
+ if (tp < line+sizeof(line)-2)
+ break;
+ /* fall into */
+ case NEWLN: case UNKN:
+ return c ? c : '\n';
+ }
+ }
+ if (tp[-1] != ' ')
+ return c;
+ *tp++ = c; /*append trailing KDELIM*/
+ *tp = '\0';
+ VOID fprintf(stdout, " %c%s\n", KDELIM, line);
+ return 0;
+}
diff --git a/gnu/usr.bin/rcs/lib/Makefile b/gnu/usr.bin/rcs/lib/Makefile
new file mode 100644
index 000000000000..b198e9ec6f98
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/Makefile
@@ -0,0 +1,5 @@
+LIB= rcs
+SRCS= maketime.c partime.c rcsedit.c rcsfcmp.c rcsfnms.c rcsgen.c rcskeep.c \
+ rcskeys.c rcslex.c rcsmap.c rcsrev.c rcssyn.c rcsutil.c merger.c
+
+.include <bsd.lib.mk>
diff --git a/gnu/usr.bin/rcs/lib/conf.h b/gnu/usr.bin/rcs/lib/conf.h
new file mode 100644
index 000000000000..d29e51159dde
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/conf.h
@@ -0,0 +1,495 @@
+/* RCS compile-time configuration */
+
+ /* $Id: conf.sh,v 5.14 1991/11/20 18:21:10 eggert Exp $ */
+
+/*
+ * This file is generated automatically.
+ * If you edit it by hand your changes may be lost.
+ * Instead, please try to fix conf.sh,
+ * and send your fixes to rcs-bugs@cs.purdue.edu.
+ */
+
+#define exitmain(n) return n /* how to exit from main() */
+/* #define _POSIX_SOURCE */ /* Define this if Posix + strict Standard C. */
+
+#include <errno.h>
+#include <stdio.h>
+#include <time.h>
+
+/* Comment out #include lines below that do not work. */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pwd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <utime.h>
+/* #include <vfork.h> */
+
+/* Define the following symbols to be 1 or 0. */
+#define has_sys_dir_h 1 /* Does #include <sys/dir.h> work? */
+#define has_sys_param_h 1 /* Does #include <sys/param.h> work? */
+#define has_readlink 1 /* Does readlink() work? */
+
+/* #undef NAME_MAX */ /* Uncomment this if NAME_MAX is broken. */
+
+#if !defined(NAME_MAX) && !defined(_POSIX_NAME_MAX)
+# if has_sys_dir_h
+# include <sys/dir.h>
+# endif
+# ifndef NAME_MAX
+# ifndef MAXNAMLEN
+# define MAXNAMLEN 14
+# endif
+# define NAME_MAX MAXNAMLEN
+# endif
+#endif
+#if !defined(PATH_MAX) && !defined(_POSIX_PATH_MAX)
+# if has_sys_param_h
+# include <sys/param.h>
+# define included_sys_param_h 1
+# endif
+# ifndef PATH_MAX
+# ifndef MAXPATHLEN
+# define MAXPATHLEN 1024
+# endif
+# define PATH_MAX (MAXPATHLEN-1)
+# endif
+#endif
+#if has_readlink && !defined(MAXSYMLINKS)
+# if has_sys_param_h && !included_sys_param_h
+# include <sys/param.h>
+# endif
+# ifndef MAXSYMLINKS
+# define MAXSYMLINKS 20 /* BSD; not standard yet */
+# endif
+#endif
+
+/* Comment out the keyword definitions below if the keywords work. */
+/* #define const */
+/* #define volatile */
+
+/* Comment out the typedefs below if the types are already declared. */
+/* Fix any uncommented typedefs that are wrong. */
+/* typedef int mode_t; */
+/* typedef int pid_t; */
+typedef int sig_atomic_t;
+/* typedef unsigned size_t; */
+/* typedef int ssize_t; */
+/* typedef long time_t; */
+/* typedef int uid_t; */
+
+/* Define the following symbols to be 1 or 0. */
+#define has_prototypes 1 /* Do function prototypes work? */
+#define has_stdarg 1 /* Does <stdarg.h> work? */
+#define has_varargs 0 /* Does <varargs.h> work? */
+#define va_start_args 2 /* How many args does va_start() take? */
+#if has_prototypes
+# define P(params) params
+#else
+# define P(params) ()
+#endif
+#if has_stdarg
+# include <stdarg.h>
+#else
+# if has_varargs
+# include <varargs.h>
+# else
+ typedef char *va_list;
+# define va_dcl int va_alist;
+# define va_start(ap) ((ap) = (va_list)&va_alist)
+# define va_arg(ap,t) (((t*) ((ap)+=sizeof(t))) [-1])
+# define va_end(ap)
+# endif
+#endif
+#if va_start_args == 2
+# define vararg_start va_start
+#else
+# define vararg_start(ap,p) va_start(ap)
+#endif
+
+#define text_equals_binary_stdio 1 /* Does stdio treat text like binary? */
+#define text_work_stdio 0 /* Text i/o for working file, binary for RCS file? */
+#if text_equals_binary_stdio
+ /* Text and binary i/o behave the same, or binary i/o does not work. */
+# define FOPEN_R "r"
+# define FOPEN_W "w"
+# define FOPEN_WPLUS "w+"
+#else
+ /* Text and binary i/o behave differently. */
+ /* This is incompatible with Posix and Unix. */
+# define FOPEN_R "rb"
+# define FOPEN_W "wb"
+# define FOPEN_WPLUS "w+b"
+#endif
+#if text_work_stdio
+# define FOPEN_R_WORK "r"
+# define FOPEN_W_WORK "w"
+# define FOPEN_WPLUS_WORK "w+"
+#else
+# define FOPEN_R_WORK FOPEN_R
+# define FOPEN_W_WORK FOPEN_W
+# define FOPEN_WPLUS_WORK FOPEN_WPLUS
+#endif
+
+/* Define or comment out the following symbols as needed. */
+#define bad_fopen_wplus 0 /* Does fopen(f,FOPEN_WPLUS) fail to truncate f? */
+#define getlogin_is_secure 0 /* Is getlogin() secure? Usually it's not. */
+#define has_dirent 1 /* Do opendir(), readdir(), closedir() work? */
+#define has_fchmod 0 /* Does fchmod() work? */
+#define has_fputs 0 /* Does fputs() work? */
+#define has_ftruncate 1 /* Does ftruncate() work? */
+#define has_getuid 1 /* Does getuid() work? */
+#define has_getpwuid 1 /* Does getpwuid() work? */
+#define has_link 1 /* Does link() work? */
+#define has_memcmp 1 /* Does memcmp() work? */
+#define has_memcpy 1 /* Does memcpy() work? */
+#define has_memmove 1 /* Does memmove() work? */
+#define has_madvise 0 /* Does madvise() work? */
+#define has_mmap 0 /* Does mmap() work on regular files? */
+#define has_rename 1 /* Does rename() work? */
+#define bad_a_rename 0 /* Does rename(A,B) fail if A is unwritable? */
+#define bad_b_rename 0 /* Does rename(A,B) fail if B is unwritable? */
+#define VOID (void) /* 'VOID e;' discards the value of an expression 'e'. */
+#define has_seteuid 0 /* Does seteuid() work? See README. */
+#define has_setuid 1 /* Does setuid() exist? */
+#define has_signal 1 /* Does signal() work? */
+#define signal_args P((int)) /* arguments of signal handlers */
+#define signal_type void /* type returned by signal handlers */
+#define sig_zaps_handler 0 /* Must a signal handler reinvoke signal()? */
+#define has_sigaction 1 /* Does struct sigaction work? */
+/* #define has_sigblock ? */ /* Does sigblock() work? */
+/* #define sigmask(s) (1 << ((s)-1)) */ /* Yield mask for signal number. */
+#define has_sys_siglist 0 /* Does sys_siglist[] work? */
+typedef ssize_t fread_type; /* type returned by fread() and fwrite() */
+typedef size_t freadarg_type; /* type of their size arguments */
+typedef void *malloc_type; /* type returned by malloc() */
+#define has_getcwd 1 /* Does getcwd() work? */
+/* #define has_getwd ? */ /* Does getwd() work? */
+#define has_mktemp 1 /* Does mktemp() work? */
+#define has_NFS 1 /* Might NFS be used? */
+/* #define strchr index */ /* Use old-fashioned name for strchr()? */
+/* #define strrchr rindex */ /* Use old-fashioned name for strrchr()? */
+#define bad_unlink 0 /* Does unlink() fail on unwritable files? */
+#define has_vfork 0 /* Does vfork() work? */
+#define has_fork 1 /* Does fork() work? */
+#define has_spawn 0 /* Does spawn*() work? */
+#define has_wait 1 /* Does wait() work? */
+#define has_waitpid 0 /* Does waitpid() work? */
+#define RCS_SHELL "/bin/sh" /* shell to run RCS subprograms */
+#define has_vfprintf 1 /* Does vfprintf() work? */
+/* #define has__doprintf ? */ /* Does _doprintf() work? */
+/* #define has__doprnt ? */ /* Does _doprnt() work? */
+/* #undef EXIT_FAILURE */ /* Uncomment this if EXIT_FAILURE is broken. */
+#define large_memory 0 /* Can main memory hold entire RCS files? */
+/* #undef ULONG_MAX */ /* Uncomment this if ULONG_MAX is broken (e.g. < 0). */
+/* struct utimbuf { time_t actime, modtime; }; */ /* Uncomment this if needed. */
+#define CO "/usr/bin/co" /* name of 'co' program */
+#define COMPAT2 0 /* Are version 2 files supported? */
+#define DATEFORM "%.2d.%.2d.%.2d.%.2d.%.2d.%.2d" /* e.g. 01.01.01.01.01.01 */
+#define DIFF "/usr/bin/diff" /* name of 'diff' program */
+#define DIFF3 "/usr/bin/diff3" /* name of 'diff3' program */
+#define DIFF3_BIN 1 /* Is diff3 user-visible (not the /usr/lib auxiliary)? */
+#define DIFF_FLAGS , "-an" /* Make diff output suitable for RCS. */
+#define DIFF_L 1 /* Does diff -L work? */
+#define DIFF_SUCCESS 0 /* DIFF status if no differences are found */
+#define DIFF_FAILURE 1 /* DIFF status if differences are found */
+#define DIFF_TROUBLE 2 /* DIFF status if trouble */
+#define ED "/bin/ed" /* name of 'ed' program (used only if !DIFF3_BIN) */
+#define MERGE "/usr/bin/merge" /* name of 'merge' program */
+#define TMPDIR "/tmp" /* default directory for temporary files */
+#define SLASH '/' /* principal pathname separator */
+#define SLASHes '/' /* `case SLASHes:' labels all pathname separators */
+#define isSLASH(c) ((c) == SLASH) /* Is arg a pathname separator? */
+#define ROOTPATH(p) isSLASH((p)[0]) /* Is p an absolute pathname? */
+#define X_DEFAULT ",v/" /* default value for -x option */
+#define DIFF_ABSOLUTE 1 /* Is ROOTPATH(DIFF) true? */
+#define ALL_ABSOLUTE 1 /* Are all subprograms absolute pathnames? */
+#define SENDMAIL "/usr/bin/mail" /* how to send mail */
+#define TZ_must_be_set 0 /* Must TZ be set for gmtime() to work? */
+
+
+
+/* Adjust the following declarations as needed. */
+
+
+#if __GNUC__ && !__STRICT_ANSI__
+# define exiting volatile /* GCC extension: function cannot return */
+#else
+# define exiting
+#endif
+
+#if has_ftruncate
+ int ftruncate P((int,off_t));
+#endif
+
+/* <sys/mman.h> */
+#if has_madvise
+ int madvise P((caddr_t,size_t,int));
+#endif
+#if has_mmap
+ caddr_t mmap P((caddr_t,size_t,int,int,int,off_t));
+ int munmap P((caddr_t,size_t));
+#endif
+
+
+/* Posix (ISO/IEC 9945-1: 1990 / IEEE Std 1003.1-1990) */
+/* These definitions are for the benefit of non-Posix hosts, and */
+/* Posix hosts that have Standard C compilers but traditional include files. */
+/* Unfortunately, mixed-up hosts are all too common. */
+
+/* <fcntl.h> */
+#ifdef F_DUPFD
+ int fcntl P((int,int,...));
+#else
+ int dup2 P((int,int));
+#endif
+#ifndef O_BINARY /* some non-Posix hosts need O_BINARY */
+# define O_BINARY 0 /* no effect on Posix */
+#endif
+#ifdef O_CREAT
+# define open_can_creat 1
+#else
+# define open_can_creat 0
+# define O_RDONLY 0
+# define O_WRONLY 1
+# define O_RDWR 2
+# define O_CREAT 01000
+# define O_TRUNC 02000
+ int creat P((char const*,mode_t));
+#endif
+#ifndef O_EXCL
+# define O_EXCL 0
+#endif
+
+/* <pwd.h> */
+#if has_getpwuid
+ struct passwd *getpwuid P((uid_t));
+#endif
+
+/* <signal.h> */
+#if has_sigaction
+ int sigaction P((int,struct sigaction const*,struct sigaction*));
+ int sigaddset P((sigset_t*,int));
+ int sigemptyset P((sigset_t*));
+#else
+#if has_sigblock
+ /* BSD */
+ int sigblock P((int));
+ int sigmask P((int));
+ int sigsetmask P((int));
+#endif
+#endif
+
+/* <stdio.h> */
+FILE *fdopen P((int,char const*));
+int fileno P((FILE*));
+
+/* <sys/stat.h> */
+int chmod P((char const*,mode_t));
+int fstat P((int,struct stat*));
+int stat P((char const*,struct stat*));
+mode_t umask P((mode_t));
+#if has_fchmod
+ int fchmod P((int,mode_t));
+#endif
+#ifndef S_IRUSR
+# ifdef S_IREAD
+# define S_IRUSR S_IREAD
+# else
+# define S_IRUSR 0400
+# endif
+# ifdef S_IWRITE
+# define S_IWUSR S_IWRITE
+# else
+# define S_IWUSR (S_IRUSR/2)
+# endif
+#endif
+#ifndef S_IRGRP
+# if has_getuid
+# define S_IRGRP (S_IRUSR / 0010)
+# define S_IWGRP (S_IWUSR / 0010)
+# define S_IROTH (S_IRUSR / 0100)
+# define S_IWOTH (S_IWUSR / 0100)
+# else
+ /* single user OS -- not Posix or Unix */
+# define S_IRGRP 0
+# define S_IWGRP 0
+# define S_IROTH 0
+# define S_IWOTH 0
+# endif
+#endif
+#ifndef S_ISREG
+# define S_ISREG(n) (((n) & S_IFMT) == S_IFREG)
+#endif
+
+/* <sys/wait.h> */
+#if has_wait
+ pid_t wait P((int*));
+#endif
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+# undef WIFEXITED /* Avoid 4.3BSD incompatibility with Posix. */
+#endif
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (!((stat_val) & 255))
+#endif
+
+/* <unistd.h> */
+char *getlogin P((void));
+int close P((int));
+int isatty P((int));
+int link P((char const*,char const*));
+int open P((char const*,int,...));
+int unlink P((char const*));
+int _filbuf P((FILE*)); /* keeps lint quiet in traditional C */
+int _flsbuf P((int,FILE*)); /* keeps lint quiet in traditional C */
+long pathconf P((char const*,int));
+ssize_t write P((int,void const*,size_t));
+#ifndef STDIN_FILENO
+# define STDIN_FILENO 0
+# define STDOUT_FILENO 1
+# define STDERR_FILENO 2
+#endif
+#if has_fork
+# if !has_vfork
+# undef vfork
+# define vfork fork
+# endif
+ pid_t vfork P((void)); /* vfork is nonstandard but faster */
+#endif
+#if has_getcwd || !has_getwd
+ char *getcwd P((char*,size_t));
+#else
+ char *getwd P((char*));
+#endif
+#if has_getuid
+ uid_t getuid P((void));
+#endif
+#if has_readlink
+/* ssize_t readlink P((char const*,char*,size_t)); *//* BSD; not standard yet */
+#endif
+#if has_setuid
+# if !has_seteuid
+# undef seteuid
+# define seteuid setuid
+# endif
+ int seteuid P((uid_t));
+ uid_t geteuid P((void));
+#endif
+#if has_spawn
+ int spawnv P((int,char const*,char*const*));
+# if ALL_ABSOLUTE
+# define spawn_RCS spawnv
+# else
+# define spawn_RCS spawnvp
+ int spawnvp P((int,char const*,char*const*));
+# endif
+#else
+ int execv P((char const*,char*const*));
+# if ALL_ABSOLUTE
+# define exec_RCS execv
+# else
+# define exec_RCS execvp
+ int execvp P((char const*,char*const*));
+# endif
+#endif
+
+/* utime.h */
+int utime P((char const*,struct utimbuf const*));
+
+
+/* Standard C library */
+/* These definitions are for the benefit of hosts that have */
+/* traditional C include files, possibly with Standard C compilers. */
+/* Unfortunately, mixed-up hosts are all too common. */
+
+/* <errno.h> */
+extern int errno;
+
+/* <limits.h> */
+#ifndef ULONG_MAX
+ /* This does not work in #ifs, but it's good enough for us. */
+# define ULONG_MAX ((unsigned long)-1)
+#endif
+
+/* <signal.h> */
+#if has_signal
+ signal_type (*signal P((int,signal_type(*)signal_args)))signal_args;
+#endif
+
+/* <stdio.h> */
+FILE *fopen P((char const*,char const*));
+fread_type fread P((void*,freadarg_type,freadarg_type,FILE*));
+fread_type fwrite P((void const*,freadarg_type,freadarg_type,FILE*));
+int fclose P((FILE*));
+int feof P((FILE*));
+int ferror P((FILE*));
+int fflush P((FILE*));
+int fprintf P((FILE*,char const*,...));
+int fputs P((char const*,FILE*));
+int fseek P((FILE*,long,int));
+int printf P((char const*,...));
+int rename P((char const*,char const*));
+int sprintf P((char*,char const*,...));
+/* long ftell P((FILE*)); */
+void clearerr P((FILE*));
+void perror P((char const*));
+#ifndef L_tmpnam
+# define L_tmpnam 32 /* power of 2 > sizeof("/usr/tmp/xxxxxxxxxxxxxxx") */
+#endif
+#ifndef SEEK_SET
+# define SEEK_SET 0
+#endif
+#if has_mktemp
+ char *mktemp P((char*)); /* traditional */
+#else
+ char *tmpnam P((char*));
+#endif
+#if has_vfprintf
+ int vfprintf P((FILE*,char const*,va_list));
+#else
+#if has__doprintf
+ void _doprintf P((FILE*,char const*,va_list)); /* Minix */
+#else
+ void _doprnt P((char const*,va_list,FILE*)); /* BSD */
+#endif
+#endif
+
+/* <stdlib.h> */
+char *getenv P((char const*));
+exiting void _exit P((int));
+exiting void exit P((int));
+malloc_type malloc P((size_t));
+malloc_type realloc P((malloc_type,size_t));
+void free P((malloc_type));
+#ifndef EXIT_FAILURE
+# define EXIT_FAILURE 1
+#endif
+#ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
+#endif
+#if !has_fork && !has_spawn
+ int system P((char const*));
+#endif
+
+/* <string.h> */
+char *strcpy P((char*,char const*));
+char *strchr P((char const*,int));
+char *strrchr P((char const*,int));
+int memcmp P((void const*,void const*,size_t));
+int strcmp P((char const*,char const*));
+size_t strlen P((char const*));
+void *memcpy P((void*,void const*,size_t));
+#if has_memmove
+ void *memmove P((void*,void const*,size_t));
+#endif
+
+/* <time.h> */
+time_t time P((time_t*));
diff --git a/gnu/usr.bin/rcs/lib/maketime.c b/gnu/usr.bin/rcs/lib/maketime.c
new file mode 100644
index 000000000000..c95c9f0b1e71
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/maketime.c
@@ -0,0 +1,344 @@
+#
+/*
+ * MAKETIME derive 32-bit time value from TM structure.
+ *
+ * Usage:
+ * int zone; Minutes west of GMT, or
+ * 48*60 for localtime
+ * time_t t;
+ * struct tm *tp; Pointer to TM structure from <time.h>
+ * t = maketime(tp,zone);
+ *
+ * Returns:
+ * -1 if failure; parameter out of range or nonsensical.
+ * else time-value.
+ * Notes:
+ * This code is quasi-public; it may be used freely in like software.
+ * It is not to be sold, nor used in licensed software without
+ * permission of the author.
+ * For everyone's benefit, please report bugs and improvements!
+ * Copyright 1981 by Ken Harrenstien, SRI International.
+ * (ARPANET: KLH @ SRI)
+ */
+/* $Log: maketime.c,v $
+ * Revision 5.3 1991/08/19 03:13:55 eggert
+ * Add setfiledate, str2time, TZ_must_be_set.
+ *
+ * Revision 5.2 1990/11/01 05:03:30 eggert
+ * Remove lint.
+ *
+ * Revision 5.1 1990/10/04 06:30:13 eggert
+ * Calculate the GMT offset of 'xxx LT' as of xxx, not as of now.
+ * Don't assume time_t is 32 bits. Fix bugs near epoch and near end of time.
+ *
+ * Revision 5.0 1990/08/22 08:12:38 eggert
+ * Switch to GMT and fix the bugs exposed thereby.
+ * Permit dates past 1999/12/31. Ansify and Posixate.
+ *
+ * Revision 1.8 88/11/08 13:54:53 narten
+ * allow negative timezones (-24h <= x <= 24h)
+ *
+ * Revision 1.7 88/08/28 14:47:52 eggert
+ * Allow cc -R. Remove unportable "#endif XXX"s.
+ *
+ * Revision 1.6 87/12/18 17:05:58 narten
+ * include rcsparam.h
+ *
+ * Revision 1.5 87/12/18 11:35:51 narten
+ * maketime.c: fixed USG code - you have tgo call "tzset" in order to have
+ * "timezone" set. ("localtime" calls it, but it's probably better not to
+ * count on "localtime" having been called.)
+ *
+ * Revision 1.4 87/10/18 10:26:57 narten
+ * Updating version numbers. Changes relative to 1.0 are actually
+ * relative to 1.2
+ *
+ * Revision 1.3 87/09/24 13:58:45 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:21:48 jenkins
+ * Port to suns
+ *
+ * Revision 1.2 83/12/05 10:12:56 wft
+ * added cond. compilation for USG Unix; long timezone;
+ *
+ * Revision 1.1 82/05/06 11:38:00 wft
+ * Initial revision
+ *
+ */
+
+
+#include "rcsbase.h"
+
+libId(maketId, "$Id: maketime.c,v 5.3 1991/08/19 03:13:55 eggert Exp $")
+
+static struct tm const *time2tm P((time_t));
+
+#define given(v) (0 <= (v)) /* Negative values are unspecified. */
+
+static int const daytb[] = {
+ /* # days in year thus far, indexed by month (0-12!!) */
+ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365
+};
+
+ static time_t
+maketime(atm,zone)
+ struct tm const *atm;
+ int zone;
+{
+ register struct tm const *tp;
+ register int i;
+ int year, yday, mon, day, hour, min, sec, leap, localzone;
+ int attempts;
+ time_t t, tres;
+
+ attempts = 2;
+ localzone = zone==48*60;
+ tres = -1;
+ year = mon = day = 0; /* Keep lint happy. */
+
+ do {
+
+ if (localzone || !given(atm->tm_year)) {
+ if (tres == -1)
+ if ((tres = time((time_t*)0)) == -1)
+ return -1;
+ tp = time2tm(tres);
+ /* Get breakdowns of default time, adjusting to zone. */
+ year = tp->tm_year; /* Use to set up defaults */
+ yday = tp->tm_yday;
+ mon = tp->tm_mon;
+ day = tp->tm_mday;
+ hour = tp->tm_hour;
+ min = tp->tm_min;
+ if (localzone) {
+ tp = localtime(&tres);
+ zone =
+ min - tp->tm_min + 60*(
+ hour - tp->tm_hour + 24*(
+ /* If years differ, it's by one day. */
+ year - tp->tm_year
+ ? year - tp->tm_year
+ : yday - tp->tm_yday));
+ }
+ /* Adjust the default day, month and year according to zone. */
+ if ((min -= zone) < 0) {
+ if (hour-(59-min)/60 < 0 && --day <= 0) {
+ if (--mon < 0) {
+ --year;
+ mon = 11;
+ }
+ day = daytb[mon+1] - daytb[mon] + (mon==1&&!(year&3));
+ }
+ } else
+ if (
+ 24 <= hour+min/60 &&
+ daytb[mon+1] - daytb[mon] + (mon==1&&!(year&3)) < ++day
+ ) {
+ if (11 < ++mon) {
+ ++year;
+ mon = 0;
+ }
+ day = 1;
+ }
+ }
+ if (zone < -24*60 || 24*60 < zone)
+ return -1;
+
+
+#ifdef DEBUG
+printf("first YMD: %d %d %d\n",year,mon,day);
+#endif
+ tp = atm;
+
+ /* First must find date, using specified year, month, day.
+ * If one of these is unspecified, it defaults either to the
+ * current date (if no more global spec was given) or to the
+ * zero-value for that spec (i.e. a more global spec was seen).
+ * Reject times that do not fit in time_t,
+ * without assuming that time_t is 32 bits or is signed.
+ */
+ if (given(tp->tm_year))
+ {
+ year = tp->tm_year;
+ mon = 0; /* Since year was given, default */
+ day = 1; /* for remaining specs is zero */
+ }
+ if (year < 69) /* 1969/12/31 OK in some timezones. */
+ return -1; /* ERR: year out of range */
+ leap = !(year&3) && (year%100 || !((year+300)%400));
+ year -= 70; /* UNIX time starts at 1970 */
+
+ /*
+ * Find day of year.
+ */
+ {
+ if (given(tp->tm_mon))
+ { mon = tp->tm_mon; /* Month was specified */
+ day = 1; /* so set remaining default */
+ }
+ if (11 < (unsigned)mon)
+ return -1; /* ERR: bad month */
+ if (given(tp->tm_mday)) day = tp->tm_mday;
+ if(day < 1
+ || (((daytb[mon+1]-daytb[mon]) < day)
+ && (day!=29 || mon!=1 || !leap) ))
+ return -1; /* ERR: bad day */
+ yday = daytb[mon] /* Add # of days in months so far */
+ + ((leap /* Leap year, and past Feb? If */
+ && mon>1)? 1:0) /* so, add leap day for this year */
+ + day-1; /* And finally add # days this mon */
+
+ }
+ if (leap+365 <= (unsigned)yday)
+ return -1; /* ERR: bad YDAY */
+
+ if (year < 0) {
+ if (yday != 364)
+ return -1; /* ERR: too early */
+ t = -1;
+ } else {
+ tres = year*365; /* Get # days of years so far */
+ if (tres/365 != year)
+ return -1; /* ERR: overflow */
+ t = tres
+ + ((year+1)>>2) /* plus # of leap days since 1970 */
+ + yday; /* and finally add # days this year */
+ if (t+4 < tres)
+ return -1; /* ERR: overflow */
+ }
+ tres = t;
+
+ if (given(i = tp->tm_wday)) /* Check WDAY if present */
+ if (i != (tres+4)%7) /* 1970/01/01 was Thu = 4 */
+ return -1; /* ERR: bad WDAY */
+
+#ifdef DEBUG
+printf("YMD: %d %d %d, T=%ld\n",year,mon,day,tres);
+#endif
+ /*
+ * Now determine time. If not given, default to zeros
+ * (since time is always the least global spec)
+ */
+ tres *= 86400L; /* Get # seconds (24*60*60) */
+ if (tres/86400L != t)
+ return -1; /* ERR: overflow */
+ hour = min = sec = 0;
+ if (given(tp->tm_hour)) hour = tp->tm_hour;
+ if (given(tp->tm_min )) min = tp->tm_min;
+ if (given(tp->tm_sec )) sec = tp->tm_sec;
+ if (60 <= (unsigned)min || 60 < (unsigned)sec)
+ return -1; /* ERR: MS out of range */
+ if (24 <= (unsigned)hour)
+ if(hour != 24 || (min+sec) !=0) /* Allow 24:00 */
+ return -1; /* ERR: H out of range */
+
+ t = tres;
+ tres += sec + 60L*(zone + min + 60*hour);
+
+#ifdef DEBUG
+printf("HMS: %d %d %d T=%ld\n",hour,min,sec,tres);
+#endif
+
+ if (!localzone) /* check for overflow */
+ return (year<0 ? (tres<0||86400L<=tres) : tres<t) ? -1 : tres;
+
+ /* Check results; LT may have had a different GMT offset back then. */
+ tp = localtime(&tres);
+ if (given(atm->tm_sec) && atm->tm_sec != tp->tm_sec)
+ return -1; /* If seconds don't match, we're in trouble. */
+ if (!(
+ given(atm->tm_min) && atm->tm_min != tp->tm_min ||
+ given(atm->tm_hour) && atm->tm_hour != tp->tm_hour ||
+ given(atm->tm_mday) && atm->tm_mday != tp->tm_mday ||
+ given(atm->tm_mon) && atm->tm_mon != tp->tm_mon ||
+ given(atm->tm_year) && atm->tm_year != tp->tm_year
+ ))
+ return tres; /* Everything matches. */
+
+ } while (--attempts);
+
+ return -1;
+}
+
+/*
+* Convert Unix time to struct tm format.
+* Use Coordinated Universal Time (UTC) if version 5 or newer;
+* use local time otherwise.
+*/
+ static struct tm const *
+time2tm(unixtime)
+ time_t unixtime;
+{
+ struct tm const *tm;
+# if TZ_must_be_set
+ static char const *TZ;
+ if (!TZ && !(TZ = getenv("TZ")))
+ faterror("TZ is not set");
+# endif
+ if (!(tm = (RCSversion<VERSION(5) ? localtime : gmtime)(&unixtime)))
+ faterror("UTC is not available; perhaps TZ is not set?");
+ return tm;
+}
+
+/*
+* Convert Unix time to RCS format.
+* For compatibility with older versions of RCS,
+* dates before AD 2000 are stored without the leading "19".
+*/
+ void
+time2date(unixtime,date)
+ time_t unixtime;
+ char date[datesize];
+{
+ register struct tm const *tm = time2tm(unixtime);
+ VOID sprintf(date, DATEFORM,
+ tm->tm_year + (tm->tm_year<100 ? 0 : 1900),
+ tm->tm_mon+1, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec
+ );
+}
+
+
+
+ static time_t
+str2time(source)
+ char const *source;
+/* Parse a free-format date in SOURCE, yielding a Unix format time. */
+{
+ int zone;
+ time_t unixtime;
+ struct tm parseddate;
+
+ if (!partime(source, &parseddate, &zone))
+ faterror("can't parse date/time: %s", source);
+ if ((unixtime = maketime(&parseddate, zone)) == -1)
+ faterror("bad date/time: %s", source);
+ return unixtime;
+}
+
+ void
+str2date(source, target)
+ char const *source;
+ char target[datesize];
+/* Parse a free-format date in SOURCE, convert it
+ * into RCS internal format, and store the result into TARGET.
+ */
+{
+ time2date(str2time(source), target);
+}
+
+ int
+setfiledate(file, date)
+ char const *file, date[datesize];
+/* Set the access and modification time of FILE to DATE. */
+{
+ static struct utimbuf times; /* static so unused fields are zero */
+ char datebuf[datesize];
+
+ if (!date)
+ return 0;
+ times.actime = times.modtime = str2time(date2str(date, datebuf));
+ return utime(file, &times);
+}
diff --git a/gnu/usr.bin/rcs/lib/merger.c b/gnu/usr.bin/rcs/lib/merger.c
new file mode 100644
index 000000000000..7162ffa58eb7
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/merger.c
@@ -0,0 +1,139 @@
+/* merger - three-way file merge internals */
+
+/* Copyright 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+#include "rcsbase.h"
+
+libId(mergerId, "$Id: merger.c,v 1.3 1991/08/20 23:05:00 eggert Exp $")
+
+ static char const *
+normalize_arg(s, b)
+ char const *s;
+ char **b;
+/*
+ * If S looks like an option, prepend ./ to it. Yield the result.
+ * Set *B to the address of any storage that was allocated..
+ */
+{
+ char *t;
+ switch (*s) {
+ case '-': case '+':
+ *b = t = testalloc(strlen(s) + 3);
+ VOID sprintf(t, ".%c%s", SLASH, s);
+ return t;
+ default:
+ *b = 0;
+ return s;
+ }
+}
+
+ int
+merge(tostdout, label, argv)
+ int tostdout;
+ char const *const label[2];
+ char const *const argv[3];
+/*
+ * Do `merge [-p] -L l0 -L l1 a0 a1 a2',
+ * where TOSTDOUT specifies whether -p is present,
+ * LABEL gives l0 and l1, and ARGV gives a0, a1, and a2.
+ * Yield DIFF_SUCCESS or DIFF_FAILURE.
+ */
+{
+ register int i;
+ FILE *f;
+ RILE *rt;
+ char const *a[3], *t;
+ char *b[3];
+ int s;
+#if !DIFF3_BIN
+ char const *d[2];
+#endif
+
+ for (i=3; 0<=--i; )
+ a[i] = normalize_arg(argv[i], &b[i]);
+
+#if DIFF3_BIN
+ t = 0;
+ if (!tostdout)
+ t = maketemp(0);
+ s = run(
+ (char*)0, t,
+ DIFF3, "-am", "-L", label[0], "-L", label[1],
+ a[0], a[1], a[2], (char*)0
+ );
+ switch (s) {
+ case DIFF_SUCCESS:
+ break;
+ case DIFF_FAILURE:
+ if (!quietflag)
+ warn("overlaps during merge");
+ break;
+ default:
+ exiterr();
+ }
+ if (t) {
+ if (!(f = fopen(argv[0], FOPEN_W)))
+ efaterror(argv[0]);
+ if (!(rt = Iopen(t, FOPEN_R, (struct stat*)0)))
+ efaterror(t);
+ fastcopy(rt, f);
+ Ifclose(rt);
+ Ofclose(f);
+ }
+#else
+ for (i=0; i<2; i++)
+ switch (run(
+ (char*)0, d[i]=maketemp(i),
+ DIFF, a[i], a[2], (char*)0
+ )) {
+ case DIFF_FAILURE: case DIFF_SUCCESS: break;
+ default: exiterr();
+ }
+ t = maketemp(2);
+ s = run(
+ (char*)0, t,
+ DIFF3, "-E", d[0], d[1], a[0], a[1], a[2],
+ label[0], label[1], (char*)0
+ );
+ if (s != DIFF_SUCCESS) {
+ s = DIFF_FAILURE;
+ if (!quietflag)
+ warn("overlaps or other problems during merge");
+ }
+ if (!(f = fopen(t, "a")))
+ efaterror(t);
+ aputs(tostdout ? "1,$p\n" : "w\n", f);
+ Ofclose(f);
+ if (run(t, (char*)0, ED, "-", a[0], (char*)0))
+ exiterr();
+#endif
+
+ tempunlink();
+ for (i=3; 0<=--i; )
+ if (b[i])
+ tfree(b[i]);
+ return s;
+}
diff --git a/gnu/usr.bin/rcs/lib/partime.c b/gnu/usr.bin/rcs/lib/partime.c
new file mode 100644
index 000000000000..4751fc56d15a
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/partime.c
@@ -0,0 +1,639 @@
+/*
+ * PARTIME parse date/time string into a TM structure
+ *
+ * Returns:
+ * 0 if parsing failed
+ * else time values in specified TM structure and zone (unspecified values
+ * set to TMNULL)
+ * Notes:
+ * This code is quasi-public; it may be used freely in like software.
+ * It is not to be sold, nor used in licensed software without
+ * permission of the author.
+ * For everyone's benefit, please report bugs and improvements!
+ * Copyright 1980 by Ken Harrenstien, SRI International.
+ * (ARPANET: KLH @ SRI)
+ */
+
+/* Hacknotes:
+ * If parsing changed so that no backup needed, could perhaps modify
+ * to use a FILE input stream. Need terminator, though.
+ * Perhaps should return 0 on success, else a non-zero error val?
+ */
+
+/* $Log: partime.c,v $
+ * Revision 5.6 1991/08/19 03:13:55 eggert
+ * Update timezones.
+ *
+ * Revision 5.5 1991/04/21 11:58:18 eggert
+ * Don't put , just before } in initializer.
+ *
+ * Revision 5.4 1990/10/04 06:30:15 eggert
+ * Remove date vs time heuristics that fail between 2000 and 2400.
+ * Check for overflow when lexing an integer.
+ * Parse 'Jan 10 LT' as 'Jan 10, LT', not 'Jan, 10 LT'.
+ *
+ * Revision 5.3 1990/09/24 18:56:31 eggert
+ * Update timezones.
+ *
+ * Revision 5.2 1990/09/04 08:02:16 eggert
+ * Don't parse two-digit years, because it won't work after 1999/12/31.
+ * Don't permit 'Aug Aug'.
+ *
+ * Revision 5.1 1990/08/29 07:13:49 eggert
+ * Be able to parse our own date format. Don't assume year<10000.
+ *
+ * Revision 5.0 1990/08/22 08:12:40 eggert
+ * Switch to GMT and fix the bugs exposed thereby. Update timezones.
+ * Ansify and Posixate. Fix peekahead and int-size bugs.
+ *
+ * Revision 1.4 89/05/01 14:48:46 narten
+ * fixed #ifdef DEBUG construct
+ *
+ * Revision 1.3 88/08/28 14:53:40 eggert
+ * Remove unportable "#endif XXX"s.
+ *
+ * Revision 1.2 87/03/27 14:21:53 jenkins
+ * Port to suns
+ *
+ * Revision 1.1 82/05/06 11:38:26 wft
+ * Initial revision
+ *
+ */
+
+#include "rcsbase.h"
+
+libId(partId, "$Id: partime.c,v 5.6 1991/08/19 03:13:55 eggert Exp $")
+
+#define given(v) (0 <= (v))
+#define TMNULL (-1) /* Items not given are given this value */
+#define TZ_OFFSET (24*60) /* TMNULL < zone_offset - TZ_OFFSET */
+
+struct tmwent {
+ char const *went;
+ short wval;
+ char wflgs;
+ char wtype;
+};
+ /* wflgs */
+#define TWTIME 02 /* Word is a time value (absence implies date) */
+#define TWDST 04 /* Word is a DST-type timezone */
+ /* wtype */
+#define TM_MON 1 /* month name */
+#define TM_WDAY 2 /* weekday name */
+#define TM_ZON 3 /* time zone name */
+#define TM_LT 4 /* local time */
+#define TM_DST 5 /* daylight savings time */
+#define TM_12 6 /* AM, PM, NOON, or MIDNIGHT */
+ /* wval (for wtype==TM_12) */
+#define T12_AM 1
+#define T12_PM 2
+#define T12_NOON 12
+#define T12_MIDNIGHT 0
+
+static struct tmwent const tmwords [] = {
+ {"january", 0, 0, TM_MON},
+ {"february", 1, 0, TM_MON},
+ {"march", 2, 0, TM_MON},
+ {"april", 3, 0, TM_MON},
+ {"may", 4, 0, TM_MON},
+ {"june", 5, 0, TM_MON},
+ {"july", 6, 0, TM_MON},
+ {"august", 7, 0, TM_MON},
+ {"september", 8, 0, TM_MON},
+ {"october", 9, 0, TM_MON},
+ {"november", 10, 0, TM_MON},
+ {"december", 11, 0, TM_MON},
+
+ {"sunday", 0, 0, TM_WDAY},
+ {"monday", 1, 0, TM_WDAY},
+ {"tuesday", 2, 0, TM_WDAY},
+ {"wednesday", 3, 0, TM_WDAY},
+ {"thursday", 4, 0, TM_WDAY},
+ {"friday", 5, 0, TM_WDAY},
+ {"saturday", 6, 0, TM_WDAY},
+
+ {"gmt", 0*60, TWTIME, TM_ZON}, /* Greenwich */
+ {"utc", 0*60, TWTIME, TM_ZON},
+ {"ut", 0*60, TWTIME, TM_ZON},
+ {"cut", 0*60, TWTIME, TM_ZON},
+
+ {"nzst", -12*60, TWTIME, TM_ZON}, /* New Zealand */
+ {"jst", -9*60, TWTIME, TM_ZON}, /* Japan */
+ {"kst", -9*60, TWTIME, TM_ZON}, /* Korea */
+ {"ist", -5*60-30, TWTIME, TM_ZON},/* India */
+ {"eet", -2*60, TWTIME, TM_ZON}, /* Eastern Europe */
+ {"cet", -1*60, TWTIME, TM_ZON}, /* Central Europe */
+ {"met", -1*60, TWTIME, TM_ZON}, /* Middle Europe */
+ {"wet", 0*60, TWTIME, TM_ZON}, /* Western Europe */
+ {"nst", 3*60+30, TWTIME, TM_ZON},/* Newfoundland */
+ {"ast", 4*60, TWTIME, TM_ZON}, /* Atlantic */
+ {"est", 5*60, TWTIME, TM_ZON}, /* Eastern */
+ {"cst", 6*60, TWTIME, TM_ZON}, /* Central */
+ {"mst", 7*60, TWTIME, TM_ZON}, /* Mountain */
+ {"pst", 8*60, TWTIME, TM_ZON}, /* Pacific */
+ {"akst", 9*60, TWTIME, TM_ZON}, /* Alaska */
+ {"hast", 10*60, TWTIME, TM_ZON}, /* Hawaii-Aleutian */
+ {"hst", 10*60, TWTIME, TM_ZON}, /* Hawaii */
+ {"sst", 11*60, TWTIME, TM_ZON}, /* Samoa */
+
+ {"nzdt", -12*60, TWTIME+TWDST, TM_ZON}, /* New Zealand */
+ {"kdt", -9*60, TWTIME+TWDST, TM_ZON}, /* Korea */
+ {"bst", 0*60, TWTIME+TWDST, TM_ZON}, /* Britain */
+ {"ndt", 3*60+30, TWTIME+TWDST, TM_ZON}, /* Newfoundland */
+ {"adt", 4*60, TWTIME+TWDST, TM_ZON}, /* Atlantic */
+ {"edt", 5*60, TWTIME+TWDST, TM_ZON}, /* Eastern */
+ {"cdt", 6*60, TWTIME+TWDST, TM_ZON}, /* Central */
+ {"mdt", 7*60, TWTIME+TWDST, TM_ZON}, /* Mountain */
+ {"pdt", 8*60, TWTIME+TWDST, TM_ZON}, /* Pacific */
+ {"akdt", 9*60, TWTIME+TWDST, TM_ZON}, /* Alaska */
+ {"hadt", 10*60, TWTIME+TWDST, TM_ZON}, /* Hawaii-Aleutian */
+
+#if 0
+ /*
+ * The following names are duplicates or are not well attested.
+ * A standard is needed.
+ */
+ {"east", -10*60, TWTIME, TM_ZON}, /* Eastern Australia */
+ {"cast", -9*60-30, TWTIME, TM_ZON},/* Central Australia */
+ {"cst", -8*60, TWTIME, TM_ZON}, /* China */
+ {"hkt", -8*60, TWTIME, TM_ZON}, /* Hong Kong */
+ {"sst", -8*60, TWTIME, TM_ZON}, /* Singapore */
+ {"wast", -8*60, TWTIME, TM_ZON}, /* Western Australia */
+ {"?", -6*60-30, TWTIME, TM_ZON},/* Burma */
+ {"?", -4*60-30, TWTIME, TM_ZON},/* Afghanistan */
+ {"it", -3*60-30, TWTIME, TM_ZON},/* Iran */
+ {"ist", -2*60, TWTIME, TM_ZON}, /* Israel */
+ {"mez", -1*60, TWTIME, TM_ZON}, /* Mittel-Europaeische Zeit */
+ {"ast", 1*60, TWTIME, TM_ZON}, /* Azores */
+ {"fst", 2*60, TWTIME, TM_ZON}, /* Fernando de Noronha */
+ {"bst", 3*60, TWTIME, TM_ZON}, /* Brazil */
+ {"wst", 4*60, TWTIME, TM_ZON}, /* Western Brazil */
+ {"ast", 5*60, TWTIME, TM_ZON}, /* Acre Brazil */
+ {"?", 9*60+30, TWTIME, TM_ZON},/* Marquesas */
+ {"?", 12*60, TWTIME, TM_ZON}, /* Kwajalein */
+
+ {"eadt", -10*60, TWTIME+TWDST, TM_ZON}, /* Eastern Australia */
+ {"cadt", -9*60-30, TWTIME+TWDST, TM_ZON}, /* Central Australia */
+ {"cdt", -8*60, TWTIME+TWDST, TM_ZON}, /* China */
+ {"wadt", -8*60, TWTIME+TWDST, TM_ZON}, /* Western Australia */
+ {"idt", -2*60, TWTIME+TWDST, TM_ZON}, /* Israel */
+ {"eest", -2*60, TWTIME+TWDST, TM_ZON}, /* Eastern Europe */
+ {"cest", -1*60, TWTIME+TWDST, TM_ZON}, /* Central Europe */
+ {"mest", -1*60, TWTIME+TWDST, TM_ZON}, /* Middle Europe */
+ {"mesz", -1*60, TWTIME+TWDST, TM_ZON}, /* Mittel-Europaeische Sommerzeit */
+ {"west", 0*60, TWTIME+TWDST, TM_ZON}, /* Western Europe */
+ {"adt", 1*60, TWTIME+TWDST, TM_ZON}, /* Azores */
+ {"fdt", 2*60, TWTIME+TWDST, TM_ZON}, /* Fernando de Noronha */
+ {"edt", 3*60, TWTIME+TWDST, TM_ZON}, /* Eastern Brazil */
+ {"wdt", 4*60, TWTIME+TWDST, TM_ZON}, /* Western Brazil */
+ {"adt", 5*60, TWTIME+TWDST, TM_ZON}, /* Acre Brazil */
+#endif
+
+ {"lt", 0, TWTIME, TM_LT}, /* local time */
+ {"dst", 1*60, TWTIME, TM_DST}, /* daylight savings time */
+ {"ddst", 2*60, TWTIME, TM_DST}, /* double dst */
+
+ {"am", T12_AM, TWTIME, TM_12},
+ {"pm", T12_PM, TWTIME, TM_12},
+ {"noon", T12_NOON, TWTIME, TM_12},
+ {"midnight", T12_MIDNIGHT, TWTIME, TM_12},
+
+ {0, 0, 0, 0} /* Zero entry to terminate searches */
+};
+
+struct token {
+ char const *tcp;/* pointer to string */
+ int tcnt; /* # chars */
+ char tbrk; /* "break" char */
+ char tbrkl; /* last break char */
+ char tflg; /* 0 = alpha, 1 = numeric */
+ union { /* Resulting value; */
+ int tnum;/* either a #, or */
+ struct tmwent const *ttmw;/* a ptr to a tmwent. */
+ } tval;
+};
+
+static struct tmwent const*ptmatchstr P((char const*,int,struct tmwent const*));
+static int pt12hack P((struct tm *,int));
+static int ptitoken P((struct token *));
+static int ptstash P((int *,int));
+static int pttoken P((struct token *));
+
+ static int
+goodzone(t, offset, am)
+ register struct token const *t;
+ int offset;
+ int *am;
+{
+ register int m;
+ if (
+ t->tflg &&
+ t->tcnt == 4+offset &&
+ (m = t->tval.tnum) <= 2400 &&
+ isdigit(t->tcp[offset]) &&
+ (m%=100) < 60
+ ) {
+ m += t->tval.tnum/100 * 60;
+ if (t->tcp[offset-1]=='+')
+ m = -m;
+ *am = m;
+ return 1;
+ }
+ return 0;
+}
+
+ int
+partime(astr, atm, zone)
+char const *astr;
+register struct tm *atm;
+int *zone;
+{
+ register int i;
+ struct token btoken, atoken;
+ int zone_offset; /* minutes west of GMT, plus TZ_OFFSET */
+ register char const *cp;
+ register char ch;
+ int ord, midnoon;
+ int *atmfield, dst, m;
+ int got1 = 0;
+
+ atm->tm_sec = TMNULL;
+ atm->tm_min = TMNULL;
+ atm->tm_hour = TMNULL;
+ atm->tm_mday = TMNULL;
+ atm->tm_mon = TMNULL;
+ atm->tm_year = TMNULL;
+ atm->tm_wday = TMNULL;
+ atm->tm_yday = TMNULL;
+ midnoon = TMNULL; /* and our own temp stuff */
+ zone_offset = TMNULL;
+ dst = TMNULL;
+ btoken.tcnt = btoken.tbrk = 0;
+ btoken.tcp = astr;
+
+ for (;; got1=1) {
+ if (!ptitoken(&btoken)) /* Get a token */
+ { if(btoken.tval.tnum) return(0); /* Read error? */
+ if (given(midnoon)) /* EOF, wrap up */
+ if (!pt12hack(atm, midnoon))
+ return 0;
+ if (!given(atm->tm_min))
+ atm->tm_min = 0;
+ *zone =
+ (given(zone_offset) ? zone_offset-TZ_OFFSET : 0)
+ - (given(dst) ? dst : 0);
+ return got1;
+ }
+ if(btoken.tflg == 0) /* Alpha? */
+ { i = btoken.tval.ttmw->wval;
+ switch (btoken.tval.ttmw->wtype) {
+ default:
+ return 0;
+ case TM_MON:
+ atmfield = &atm->tm_mon;
+ break;
+ case TM_WDAY:
+ atmfield = &atm->tm_wday;
+ break;
+ case TM_DST:
+ atmfield = &dst;
+ break;
+ case TM_LT:
+ if (ptstash(&dst, 0))
+ return 0;
+ i = 48*60; /* local time magic number -- see maketime() */
+ /* fall into */
+ case TM_ZON:
+ i += TZ_OFFSET;
+ if (btoken.tval.ttmw->wflgs & TWDST)
+ if (ptstash(&dst, 60))
+ return 0;
+ /* Peek ahead for offset immediately afterwards. */
+ if (
+ (btoken.tbrk=='-' || btoken.tbrk=='+') &&
+ (atoken=btoken, ++atoken.tcnt, ptitoken(&atoken)) &&
+ goodzone(&atoken, 0, &m)
+ ) {
+ i += m;
+ btoken = atoken;
+ }
+ atmfield = &zone_offset;
+ break;
+ case TM_12:
+ atmfield = &midnoon;
+ }
+ if (ptstash(atmfield, i))
+ return(0); /* ERR: val already set */
+ continue;
+ }
+
+ /* Token is number. Lots of hairy heuristics. */
+ if (!isdigit(*btoken.tcp)) {
+ if (!goodzone(&btoken, 1, &m))
+ return 0;
+ zone_offset = TZ_OFFSET + m;
+ continue;
+ }
+
+ i = btoken.tval.tnum; /* Value now known to be valid; get it. */
+ if (btoken.tcnt == 3) /* 3 digits = HMM */
+ {
+hhmm4: if (ptstash(&atm->tm_min, i%100))
+ return(0); /* ERR: min conflict */
+ i /= 100;
+hh2: if (ptstash(&atm->tm_hour, i))
+ return(0); /* ERR: hour conflict */
+ continue;
+ }
+
+ if (4 < btoken.tcnt)
+ goto year4; /* far in the future */
+ if(btoken.tcnt == 4) /* 4 digits = YEAR or HHMM */
+ { if (given(atm->tm_year)) goto hhmm4; /* Already got yr? */
+ if (given(atm->tm_hour)) goto year4; /* Already got hr? */
+ if(btoken.tbrk == ':') /* HHMM:SS ? */
+ if ( ptstash(&atm->tm_hour, i/100)
+ || ptstash(&atm->tm_min, i%100))
+ return(0); /* ERR: hr/min clash */
+ else goto coltm2; /* Go handle SS */
+ if(btoken.tbrk != ',' && btoken.tbrk != '/'
+ && (atoken=btoken, ptitoken(&atoken)) /* Peek */
+ && ( atoken.tflg
+ ? !isdigit(*atoken.tcp)
+ : atoken.tval.ttmw->wflgs & TWTIME)) /* HHMM-ZON */
+ goto hhmm4;
+ goto year4; /* Give up, assume year. */
+ }
+
+ /* From this point on, assume tcnt == 1 or 2 */
+ /* 2 digits = MM, DD, or HH (MM and SS caught at coltime) */
+ if(btoken.tbrk == ':') /* HH:MM[:SS] */
+ goto coltime; /* must be part of time. */
+ if (31 < i)
+ return 0;
+
+ /* Check for numerical-format date */
+ for (cp = "/-."; ch = *cp++;)
+ { ord = (ch == '.' ? 0 : 1); /* n/m = D/M or M/D */
+ if(btoken.tbrk == ch) /* "NN-" */
+ { if(btoken.tbrkl != ch)
+ {
+ atoken = btoken;
+ atoken.tcnt++;
+ if (ptitoken(&atoken)
+ && atoken.tflg == 0
+ && atoken.tval.ttmw->wtype == TM_MON)
+ goto dd2;
+ if(ord)goto mm2; else goto dd2; /* "NN-" */
+ } /* "-NN-" */
+ if (!given(atm->tm_mday)
+ && given(atm->tm_year)) /* If "YYYY-NN-" */
+ goto mm2; /* then always MM */
+ if(ord)goto dd2; else goto mm2;
+ }
+ if(btoken.tbrkl == ch /* "-NN" */
+ && given(ord ? atm->tm_mon : atm->tm_mday))
+ if (!given(ord ? atm->tm_mday : atm->tm_mon)) /* MM/DD */
+ if(ord)goto dd2; else goto mm2;
+ }
+
+ /* Now reduced to choice between HH and DD */
+ if (given(atm->tm_hour)) goto dd2; /* Have hour? Assume day. */
+ if (given(atm->tm_mday)) goto hh2; /* Have day? Assume hour. */
+ if (given(atm->tm_mon)) goto dd2; /* Have month? Assume day. */
+ if(i > 24) goto dd2; /* Impossible HH means DD */
+ atoken = btoken;
+ if (!ptitoken(&atoken)) /* Read ahead! */
+ if(atoken.tval.tnum) return(0); /* ERR: bad token */
+ else goto dd2; /* EOF, assume day. */
+ if ( atoken.tflg
+ ? !isdigit(*atoken.tcp)
+ : atoken.tval.ttmw->wflgs & TWTIME)
+ /* If next token is a time spec, assume hour */
+ goto hh2; /* e.g. "3 PM", "11-EDT" */
+
+dd2: if (ptstash(&atm->tm_mday, i)) /* Store day (1 based) */
+ return(0);
+ continue;
+
+mm2: if (ptstash(&atm->tm_mon, i-1)) /* Store month (make zero based) */
+ return(0);
+ continue;
+
+year4: if ((i-=1900) < 0 || ptstash(&atm->tm_year, i)) /* Store year-1900 */
+ return(0); /* ERR: year conflict */
+ continue;
+
+ /* Hack HH:MM[[:]SS] */
+coltime:
+ if (ptstash(&atm->tm_hour, i)) return 0;
+ if (!ptitoken(&btoken))
+ return(!btoken.tval.tnum);
+ if(!btoken.tflg) return(0); /* ERR: HH:<alpha> */
+ if(btoken.tcnt == 4) /* MMSS */
+ if (ptstash(&atm->tm_min, btoken.tval.tnum/100)
+ || ptstash(&atm->tm_sec, btoken.tval.tnum%100))
+ return(0);
+ else continue;
+ if(btoken.tcnt != 2
+ || ptstash(&atm->tm_min, btoken.tval.tnum))
+ return(0); /* ERR: MM bad */
+ if (btoken.tbrk != ':') continue; /* Seconds follow? */
+coltm2: if (!ptitoken(&btoken))
+ return(!btoken.tval.tnum);
+ if(!btoken.tflg || btoken.tcnt != 2 /* Verify SS */
+ || ptstash(&atm->tm_sec, btoken.tval.tnum))
+ return(0); /* ERR: SS bad */
+ }
+}
+
+/* Store date/time value, return 0 if successful.
+ * Fail if entry is already set.
+ */
+ static int
+ptstash(adr,val)
+int *adr;
+int val;
+{ register int *a;
+ if (given(*(a=adr)))
+ return 1;
+ *a = val;
+ return(0);
+}
+
+/* This subroutine is invoked for AM, PM, NOON and MIDNIGHT when wrapping up
+ * just prior to returning from partime.
+ */
+ static int
+pt12hack(tm, aval)
+register struct tm *tm;
+register int aval;
+{ register int h = tm->tm_hour;
+ switch (aval) {
+ case T12_AM:
+ case T12_PM:
+ if (h > 12)
+ return 0;
+ if (h == 12)
+ tm->tm_hour = 0;
+ if (aval == T12_PM)
+ tm->tm_hour += 12;
+ break;
+ default:
+ if (0 < tm->tm_min || 0 < tm->tm_sec)
+ return 0;
+ if (!given(h) || h==12)
+ tm->tm_hour = aval;
+ else if (aval==T12_MIDNIGHT && (h==0 || h==24))
+ return 0;
+ }
+ return 1;
+}
+
+/* Get a token and identify it to some degree.
+ * Returns 0 on failure; token.tval will be 0 for normal EOF, otherwise
+ * hit error of some sort
+ */
+
+ static int
+ptitoken(tkp)
+register struct token *tkp;
+{
+ register char const *cp;
+ register int i, j, k;
+
+ if (!pttoken(tkp))
+#ifdef DEBUG
+ {
+ VOID printf("EOF\n");
+ return(0);
+ }
+#else
+ return(0);
+#endif
+ cp = tkp->tcp;
+
+#ifdef DEBUG
+ VOID printf("Token: \"%.*s\" ", tkp->tcnt, cp);
+#endif
+
+ if (tkp->tflg) {
+ i = tkp->tcnt;
+ if (*cp == '+' || *cp == '-') {
+ cp++;
+ i--;
+ }
+ while (0 <= --i) {
+ j = tkp->tval.tnum*10;
+ k = j + (*cp++ - '0');
+ if (j/10 != tkp->tval.tnum || k < j) {
+ /* arithmetic overflow */
+ tkp->tval.tnum = 1;
+ return 0;
+ }
+ tkp->tval.tnum = k;
+ }
+ } else if (!(tkp->tval.ttmw = ptmatchstr(cp, tkp->tcnt, tmwords)))
+ {
+#ifdef DEBUG
+ VOID printf("Not found!\n");
+#endif
+ tkp->tval.tnum = 1;
+ return 0;
+ }
+
+#ifdef DEBUG
+ if(tkp->tflg)
+ VOID printf("Val: %d.\n",tkp->tval.tnum);
+ else VOID printf("Found: \"%s\", val: %d, type %d\n",
+ tkp->tval.ttmw->went,tkp->tval.ttmw->wval,tkp->tval.ttmw->wtype);
+#endif
+
+ return(1);
+}
+
+/* Read token from input string into token structure */
+ static int
+pttoken(tkp)
+register struct token *tkp;
+{
+ register char const *cp;
+ register int c;
+ char const *astr;
+
+ tkp->tcp = astr = cp = tkp->tcp + tkp->tcnt;
+ tkp->tbrkl = tkp->tbrk; /* Set "last break" */
+ tkp->tcnt = tkp->tbrk = tkp->tflg = 0;
+ tkp->tval.tnum = 0;
+
+ while(c = *cp++)
+ { switch(c)
+ { case ' ': case '\t': /* Flush all whitespace */
+ case '\r': case '\n':
+ case '\v': case '\f':
+ if (!tkp->tcnt) { /* If no token yet */
+ tkp->tcp = cp; /* ignore the brk */
+ continue; /* and go on. */
+ }
+ /* fall into */
+ case '(': case ')': /* Perhaps any non-alphanum */
+ case '-': case ',': /* shd qualify as break? */
+ case '+':
+ case '/': case ':': case '.': /* Break chars */
+ if(tkp->tcnt == 0) /* If no token yet */
+ { tkp->tcp = cp; /* ignore the brk */
+ tkp->tbrkl = c;
+ continue; /* and go on. */
+ }
+ tkp->tbrk = c;
+ return(tkp->tcnt);
+ }
+ if (!tkp->tcnt++) { /* If first char of token, */
+ if (isdigit(c)) {
+ tkp->tflg = 1;
+ if (astr<cp-2 && (cp[-2]=='-'||cp[-2]=='+')) {
+ /* timezone is break+sign+digit */
+ tkp->tcp--;
+ tkp->tcnt++;
+ }
+ }
+ } else if ((isdigit(c)!=0) != tkp->tflg) { /* else check type */
+ tkp->tbrk = c;
+ return --tkp->tcnt; /* Wrong type, back up */
+ }
+ }
+ return(tkp->tcnt); /* When hit EOF */
+}
+
+
+ static struct tmwent const *
+ptmatchstr(astr,cnt,astruc)
+ char const *astr;
+ int cnt;
+ struct tmwent const *astruc;
+{
+ register char const *cp, *mp;
+ register int c;
+ struct tmwent const *lastptr;
+ int i;
+
+ lastptr = 0;
+ for(;mp = astruc->went; astruc += 1)
+ { cp = astr;
+ for(i = cnt; i > 0; i--)
+ {
+ switch (*cp++ - (c = *mp++))
+ { case 0: continue; /* Exact match */
+ case 'A'-'a':
+ if (ctab[c] == Letter)
+ continue;
+ }
+ break;
+ }
+ if(i==0)
+ if (!*mp) return astruc; /* Exact match */
+ else if(lastptr) return(0); /* Ambiguous */
+ else lastptr = astruc; /* 1st ambig */
+ }
+ return lastptr;
+}
diff --git a/gnu/usr.bin/rcs/lib/rcsbase.h b/gnu/usr.bin/rcs/lib/rcsbase.h
new file mode 100644
index 000000000000..c0904bbfb8b9
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcsbase.h
@@ -0,0 +1,677 @@
+
+/*
+ * RCS common definitions and data structures
+ */
+#define RCSBASE "$Id: rcsbase.h,v 5.11 1991/10/07 17:32:46 eggert Exp $"
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+/*****************************************************************************
+ * INSTRUCTIONS:
+ * =============
+ * See the Makefile for how to define C preprocessor symbols.
+ * If you need to change the comment leaders, update the table comtable[]
+ * in rcsfnms.c. (This can wait until you know what a comment leader is.)
+ *****************************************************************************
+ */
+
+
+/* $Log: rcsbase.h,v $
+ * Revision 5.11 1991/10/07 17:32:46 eggert
+ * Support piece tables even if !has_mmap.
+ *
+ * Revision 5.10 1991/09/24 00:28:39 eggert
+ * Remove unexported functions.
+ *
+ * Revision 5.9 1991/08/19 03:13:55 eggert
+ * Add piece tables and other tuneups, and NFS workarounds.
+ *
+ * Revision 5.8 1991/04/21 11:58:20 eggert
+ * Add -x, RCSINIT, MS-DOS support.
+ *
+ * Revision 5.7 1991/02/28 19:18:50 eggert
+ * Try setuid() if seteuid() doesn't work.
+ *
+ * Revision 5.6 1991/02/26 17:48:37 eggert
+ * Support new link behavior. Move ANSI C / Posix declarations into conf.sh.
+ *
+ * Revision 5.5 1990/12/04 05:18:43 eggert
+ * Use -I for prompts and -q for diagnostics.
+ *
+ * Revision 5.4 1990/11/01 05:03:35 eggert
+ * Don't assume that builtins are functions; they may be macros.
+ * Permit arbitrary data in logs.
+ *
+ * Revision 5.3 1990/09/26 23:36:58 eggert
+ * Port wait() to non-Posix ANSI C hosts.
+ *
+ * Revision 5.2 1990/09/04 08:02:20 eggert
+ * Don't redefine NAME_MAX, PATH_MAX.
+ * Improve incomplete line handling. Standardize yes-or-no procedure.
+ *
+ * Revision 5.1 1990/08/29 07:13:53 eggert
+ * Add -kkvl. Fix type typos exposed by porting. Clean old log messages too.
+ *
+ * Revision 5.0 1990/08/22 08:12:44 eggert
+ * Adjust ANSI C / Posix support. Add -k, -V, setuid. Don't call access().
+ * Remove compile-time limits; use malloc instead.
+ * Ansify and Posixate. Add support for ISO 8859.
+ * Remove snoop and v2 support.
+ *
+ * Revision 4.9 89/05/01 15:17:14 narten
+ * botched previous USG fix
+ *
+ * Revision 4.8 89/05/01 14:53:05 narten
+ * changed #include <strings.h> -> string.h for USG systems.
+ *
+ * Revision 4.7 88/11/08 15:58:45 narten
+ * removed defs for functions loaded from libraries
+ *
+ * Revision 4.6 88/08/09 19:12:36 eggert
+ * Shrink stdio code size; remove lint; permit -Dhshsize=nn.
+ *
+ * Revision 4.5 87/12/18 17:06:41 narten
+ * made removed BSD ifdef, now uses V4_2BSD
+ *
+ * Revision 4.4 87/10/18 10:29:49 narten
+ * Updating version numbers
+ * Changes relative to 1.1 are actually relative to 4.2
+ *
+ * Revision 1.3 87/09/24 14:02:25 narten
+ * changes for lint
+ *
+ * Revision 1.2 87/03/27 14:22:02 jenkins
+ * Port to suns
+ *
+ * Revision 4.2 83/12/20 16:04:20 wft
+ * merged 3.6.1.1 and 4.1 (SMALLOG, logsize).
+ * moved setting of STRICT_LOCKING to Makefile.
+ * changed DOLLAR to UNKN (conflict with KDELIM).
+ *
+ * Revision 4.1 83/05/04 09:12:41 wft
+ * Added markers Id and RCSfile.
+ * Added Dbranch for default branches.
+ *
+ * Revision 3.6.1.1 83/12/02 21:56:22 wft
+ * Increased logsize, added macro SMALLOG.
+ *
+ * Revision 3.6 83/01/15 16:43:28 wft
+ * 4.2 prerelease
+ *
+ * Revision 3.6 83/01/15 16:43:28 wft
+ * Replaced dbm.h with BYTESIZ, fixed definition of rindex().
+ * Added variants of NCPFN and NCPPN for bsd 4.2, selected by defining V4_2BSD.
+ * Added macro DELNUMFORM to have uniform format for printing delta text nodes.
+ * Added macro DELETE to mark deleted deltas.
+ *
+ * Revision 3.5 82/12/10 12:16:56 wft
+ * Added two forms of DATEFORM, one using %02d, the other %.2d.
+ *
+ * Revision 3.4 82/12/04 20:01:25 wft
+ * added LOCKER, Locker, and USG (redefinition of rindex).
+ *
+ * Revision 3.3 82/12/03 12:22:04 wft
+ * Added dbm.h, stdio.h, RCSBASE, RCSSEP, RCSSUF, WORKMODE, TMPFILE3,
+ * PRINTDATE, PRINTTIME, map, and ctab; removed Suffix. Redefined keyvallength
+ * using NCPPN. Changed putc() to abort on write error.
+ *
+ * Revision 3.2 82/10/18 15:03:52 wft
+ * added macro STRICT_LOCKING, removed RCSUMASK.
+ * renamed JOINFILE[1,2] to JOINFIL[1,2].
+ *
+ * Revision 3.1 82/10/11 19:41:17 wft
+ * removed NBPW, NBPC, NCPW.
+ * added typdef int void to aid compiling
+ */
+
+
+#include "conf.h"
+
+
+#define EXIT_TROUBLE DIFF_TROUBLE
+
+#ifdef PATH_MAX
+# define SIZEABLE_PATH PATH_MAX /* size of a large path; not a hard limit */
+#else
+# define SIZEABLE_PATH _POSIX_PATH_MAX
+#endif
+
+/* for traditional C hosts with unusual size arguments */
+#define Fread(p,s,n,f) fread(p, (freadarg_type)(s), (freadarg_type)(n), f)
+#define Fwrite(p,s,n,f) fwrite(p, (freadarg_type)(s), (freadarg_type)(n), f)
+
+
+/*
+ * Parameters
+ */
+
+/* backwards compatibility with old versions of RCS */
+#define VERSION_min 3 /* old output RCS format supported */
+#define VERSION_max 5 /* newest output RCS format supported */
+#ifndef VERSION_DEFAULT /* default RCS output format */
+# define VERSION_DEFAULT VERSION_max
+#endif
+#define VERSION(n) ((n) - VERSION_DEFAULT) /* internally, 0 is the default */
+
+#ifndef STRICT_LOCKING
+#define STRICT_LOCKING 1
+#endif
+ /* 0 sets the default locking to non-strict; */
+ /* used in experimental environments. */
+ /* 1 sets the default locking to strict; */
+ /* used in production environments. */
+
+#define yearlength 16 /* (good through AD 9,999,999,999,999,999) */
+#define datesize (yearlength+16) /* size of output of DATEFORM */
+#define joinlength 20 /* number of joined revisions permitted */
+#define RCSTMPPREFIX '_' /* prefix for temp files in working dir */
+#define KDELIM '$' /* delimiter for keywords */
+#define VDELIM ':' /* separates keywords from values */
+#define DEFAULTSTATE "Exp" /* default state of revisions */
+
+
+
+#define true 1
+#define false 0
+#define nil 0
+
+
+/*
+ * RILE - readonly file
+ * declarecache; - declares local cache for RILE variable(s)
+ * setupcache - sets up the local RILE cache, but does not initialize it
+ * cache, uncache - caches and uncaches the local RILE;
+ * (uncache,cache) is needed around functions that advance the RILE pointer
+ * Igeteof(f,c,s) - get a char c from f, executing statement s at EOF
+ * cachegeteof(c,s) - Igeteof applied to the local RILE
+ * Iget(f,c) - like Igeteof, except EOF is an error
+ * cacheget(c) - Iget applied to the local RILE
+ * Ifileno, Irewind, Iseek, Itell - analogs to stdio routines
+ */
+
+#if large_memory
+ typedef unsigned char const *Iptr_type;
+ typedef struct {
+ Iptr_type ptr, lim;
+ unsigned char *base; /* for lint, not Iptr_type even if has_mmap */
+# if has_mmap
+# define Ifileno(f) ((f)->fd)
+ int fd;
+# else
+# define Ifileno(f) fileno((f)->stream)
+ FILE *stream;
+ unsigned char *readlim;
+# endif
+ } RILE;
+# if has_mmap
+# define declarecache register Iptr_type ptr, lim
+# define setupcache(f) (lim = (f)->lim)
+# define Igeteof(f,c,s) if ((f)->ptr==(f)->lim) s else (c)= *(f)->ptr++
+# define cachegeteof(c,s) if (ptr==lim) s else (c)= *ptr++
+# else
+# define declarecache register Iptr_type ptr; register RILE *rRILE
+# define setupcache(f) (rRILE = (f))
+# define Igeteof(f,c,s) if ((f)->ptr==(f)->readlim && !Igetmore(f)) s else (c)= *(f)->ptr++
+# define cachegeteof(c,s) if (ptr==rRILE->readlim && !Igetmore(rRILE)) s else (c)= *ptr++
+# endif
+# define uncache(f) ((f)->ptr = ptr)
+# define cache(f) (ptr = (f)->ptr)
+# define Iget(f,c) Igeteof(f,c,Ieof();)
+# define cacheget(c) cachegeteof(c,Ieof();)
+# define Itell(f) ((f)->ptr)
+# define Iseek(f,p) ((f)->ptr = (p))
+# define Irewind(f) Iseek(f, (f)->base)
+# define cachetell() ptr
+#else
+# define RILE FILE
+# define declarecache register FILE *ptr
+# define setupcache(f) (ptr = (f))
+# define uncache(f)
+# define cache(f)
+# define Igeteof(f,c,s) if(((c)=getc(f))<0){testIerror(f);if(feof(f))s}else
+# define cachegeteof(c,s) Igeteof(ptr,c,s)
+# define Iget(f,c) if (((c)=getc(f))<0) testIeof(f); else
+# define cacheget(c) Iget(ptr,c)
+# define Ifileno(f) fileno(f)
+#endif
+
+/* Print a char, but abort on write error. */
+#define aputc(c,o) if (putc(c,o)<0) testOerror(o); else
+
+/* Get a character from an RCS file, perhaps copying to a new RCS file. */
+#define GETCeof(o,c,s) { cachegeteof(c,s); if (o) aputc(c,o); }
+#define GETC(o,c) { cacheget(c); if (o) aputc(c,o); }
+
+
+#define WORKMODE(RCSmode, writable) ((RCSmode)&~(S_IWUSR|S_IWGRP|S_IWOTH) | ((writable)?S_IWUSR:0))
+/* computes mode of working file: same as RCSmode, but write permission */
+/* determined by writable */
+
+
+/* character classes and token codes */
+enum tokens {
+/* classes */ DELIM, DIGIT, IDCHAR, NEWLN, LETTER, Letter,
+ PERIOD, SBEGIN, SPACE, UNKN,
+/* tokens */ COLON, ID, NUM, SEMI, STRING
+};
+
+#define SDELIM '@' /* the actual character is needed for string handling*/
+/* SDELIM must be consistent with ctab[], so that ctab[SDELIM]==SBEGIN.
+ * there should be no overlap among SDELIM, KDELIM, and VDELIM
+ */
+
+#define isdigit(c) ((unsigned)((c)-'0') <= 9) /* faster than ctab[c]==DIGIT */
+
+
+
+
+
+/***************************************
+ * Data structures for the symbol table
+ ***************************************/
+
+/* Buffer of arbitrary data */
+struct buf {
+ char *string;
+ size_t size;
+};
+struct cbuf {
+ char const *string;
+ size_t size;
+};
+
+/* Hash table entry */
+struct hshentry {
+ char const * num; /* pointer to revision number (ASCIZ) */
+ char const * date; /* pointer to date of checkin */
+ char const * author; /* login of person checking in */
+ char const * lockedby; /* who locks the revision */
+ char const * state; /* state of revision (Exp by default) */
+ struct cbuf log; /* log message requested at checkin */
+ struct branchhead * branches; /* list of first revisions on branches*/
+ struct cbuf ig; /* ignored phrases of revision */
+ struct hshentry * next; /* next revision on same branch */
+ struct hshentry * nexthsh; /* next revision with same hash value */
+ unsigned long insertlns;/* lines inserted (computed by rlog) */
+ unsigned long deletelns;/* lines deleted (computed by rlog) */
+ char selector; /* true if selected, false if deleted */
+};
+
+/* list of hash entries */
+struct hshentries {
+ struct hshentries *rest;
+ struct hshentry *first;
+};
+
+/* list element for branch lists */
+struct branchhead {
+ struct hshentry * hsh;
+ struct branchhead * nextbranch;
+};
+
+/* accesslist element */
+struct access {
+ char const * login;
+ struct access * nextaccess;
+};
+
+/* list element for locks */
+struct lock {
+ char const * login;
+ struct hshentry * delta;
+ struct lock * nextlock;
+};
+
+/* list element for symbolic names */
+struct assoc {
+ char const * symbol;
+ char const * num;
+ struct assoc * nextassoc;
+};
+
+
+#define mainArgs (argc,argv) int argc; char **argv;
+
+#if lint
+# define libId(name,rcsid)
+# define mainProg(name,cmd,rcsid) int name mainArgs
+#else
+# define libId(name,rcsid) char const name[] = rcsid;
+# define mainProg(name,cmd,rcsid) char const copyright[] = "Copyright 1982,1988,1989 by Walter F. Tichy\nPurdue CS\nCopyright 1990,1991 by Paul Eggert", rcsbaseId[] = RCSBASE, cmdid[] = cmd; libId(name,rcsid) int main mainArgs
+#endif
+
+/*
+ * Markers for keyword expansion (used in co and ident)
+ * Every byte must have class LETTER or Letter.
+ */
+#define AUTHOR "Author"
+#define DATE "Date"
+#define HEADER "Header"
+#define IDH "Id"
+#define LOCKER "Locker"
+#define LOG "Log"
+#define RCSFILE "RCSfile"
+#define REVISION "Revision"
+#define SOURCE "Source"
+#define STATE "State"
+#define keylength 8 /* max length of any of the above keywords */
+
+enum markers { Nomatch, Author, Date, Header, Id,
+ Locker, Log, RCSfile, Revision, Source, State };
+ /* This must be in the same order as rcskeys.c's Keyword[] array. */
+
+#define DELNUMFORM "\n\n%s\n%s\n"
+/* used by putdtext and scanlogtext */
+
+#define EMPTYLOG "*** empty log message ***" /* used by ci and rlog */
+
+/* main program */
+extern char const cmdid[];
+exiting void exiterr P((void));
+
+/* maketime */
+int setfiledate P((char const*,char const[datesize]));
+void str2date P((char const*,char[datesize]));
+void time2date P((time_t,char[datesize]));
+
+/* merge */
+int merge P((int,char const*const[2],char const*const[3]));
+
+/* partime */
+int partime P((char const*,struct tm*,int*));
+
+/* rcsedit */
+#define ciklogsize 23 /* sizeof("checked in with -k by ") */
+extern FILE *fcopy;
+extern char const *resultfile;
+extern char const ciklog[ciklogsize];
+extern int locker_expansion;
+extern struct buf dirtfname[];
+#define newRCSfilename (dirtfname[0].string)
+RILE *rcswriteopen P((struct buf*,struct stat*,int));
+char const *makedirtemp P((char const*,int));
+char const *getcaller P((void));
+int addlock P((struct hshentry*));
+int addsymbol P((char const*,char const*,int));
+int checkaccesslist P((void));
+int chnamemod P((FILE**,char const*,char const*,mode_t));
+int donerewrite P((int));
+int dorewrite P((int,int));
+int expandline P((RILE*,FILE*,struct hshentry const*,int,FILE*));
+int findlock P((int,struct hshentry**));
+void aflush P((FILE*));
+void copystring P((void));
+void dirtempunlink P((void));
+void enterstring P((void));
+void finishedit P((struct hshentry const*,FILE*,int));
+void keepdirtemp P((char const*));
+void openfcopy P((FILE*));
+void snapshotedit P((FILE*));
+void xpandstring P((struct hshentry const*));
+#if has_NFS || bad_unlink
+ int un_link P((char const*));
+#else
+# define un_link(s) unlink(s)
+#endif
+#if large_memory
+ void edit_string P((void));
+# define editstring(delta) edit_string()
+#else
+ void editstring P((struct hshentry const*));
+#endif
+
+/* rcsfcmp */
+int rcsfcmp P((RILE*,struct stat const*,char const*,struct hshentry const*));
+
+/* rcsfnms */
+#define bufautobegin(b) ((void) ((b)->string = 0, (b)->size = 0))
+extern FILE *workstdout;
+extern char *workfilename;
+extern char const *RCSfilename;
+extern char const *suffixes;
+extern struct stat RCSstat;
+RILE *rcsreadopen P((struct buf*,struct stat*,int));
+char *bufenlarge P((struct buf*,char const**));
+char const *basename P((char const*));
+char const *getfullRCSname P((void));
+char const *maketemp P((int));
+char const *rcssuffix P((char const*));
+int pairfilenames P((int,char**,RILE*(*)P((struct buf*,struct stat*,int)),int,int));
+size_t dirlen P((char const*));
+struct cbuf bufremember P((struct buf*,size_t));
+void bufalloc P((struct buf*,size_t));
+void bufautoend P((struct buf*));
+void bufrealloc P((struct buf*,size_t));
+void bufscat P((struct buf*,char const*));
+void bufscpy P((struct buf*,char const*));
+void tempunlink P((void));
+
+/* rcsgen */
+extern int interactiveflag;
+extern struct buf curlogbuf;
+char const *buildrevision P((struct hshentries const*,struct hshentry*,FILE*,int));
+int getcstdin P((void));
+int ttystdin P((void));
+int yesorno P((int,char const*,...));
+struct cbuf cleanlogmsg P((char*,size_t));
+struct cbuf getsstdin P((char const*,char const*,char const*,struct buf*));
+void putdesc P((int,char*));
+
+/* rcskeep */
+extern int prevkeys;
+extern struct buf prevauthor, prevdate, prevrev, prevstate;
+int getoldkeys P((RILE*));
+
+/* rcskeys */
+extern char const *const Keyword[];
+enum markers trymatch P((char const*));
+
+/* rcslex */
+extern FILE *foutptr;
+extern FILE *frewrite;
+extern RILE *finptr;
+extern char const *NextString;
+extern enum tokens nexttok;
+extern int hshenter;
+extern int nerror;
+extern int nextc;
+extern int quietflag;
+extern unsigned long rcsline;
+char const *getid P((void));
+exiting void efaterror P((char const*));
+exiting void enfaterror P((int,char const*));
+exiting void faterror P((char const*,...));
+exiting void fatserror P((char const*,...));
+exiting void Ieof P((void));
+exiting void Ierror P((void));
+exiting void Oerror P((void));
+char *checkid P((char*,int));
+int eoflex P((void));
+int getkeyopt P((char const*));
+int getlex P((enum tokens));
+struct cbuf getphrases P((char const*));
+struct cbuf savestring P((struct buf*));
+struct hshentry *getnum P((void));
+void Ifclose P((RILE*));
+void Izclose P((RILE**));
+void Lexinit P((void));
+void Ofclose P((FILE*));
+void Ozclose P((FILE**));
+void afputc P((int,FILE*));
+void aprintf P((FILE*,char const*,...));
+void aputs P((char const*,FILE*));
+void checksid P((char*));
+void diagnose P((char const*,...));
+void eerror P((char const*));
+void eflush P((void));
+void enerror P((int,char const*));
+void error P((char const*,...));
+void fvfprintf P((FILE*,char const*,va_list));
+void getkey P((char const*));
+void getkeystring P((char const*));
+void nextlex P((void));
+void oflush P((void));
+void printstring P((void));
+void readstring P((void));
+void redefined P((int));
+void testIerror P((FILE*));
+void testOerror P((FILE*));
+void warn P((char const*,...));
+void warnignore P((void));
+#if has_madvise && has_mmap && large_memory
+ void advise_access P((RILE*,int));
+# define if_advise_access(p,f,advice) if (p) advise_access(f,advice)
+#else
+# define advise_access(f,advice)
+# define if_advise_access(p,f,advice)
+#endif
+#if has_mmap && large_memory
+ RILE *I_open P((char const*,struct stat*));
+# define Iopen(f,m,s) I_open(f,s)
+#else
+ RILE *Iopen P((char const*,char const*,struct stat*));
+#endif
+#if !large_memory
+ void testIeof P((FILE*));
+ void Irewind P((RILE*));
+#endif
+
+/* rcsmap */
+extern const enum tokens ctab[];
+
+/* rcsrev */
+char *partialno P((struct buf*,char const*,unsigned));
+char const *tiprev P((void));
+int cmpnum P((char const*,char const*));
+int cmpnumfld P((char const*,char const*,unsigned));
+int compartial P((char const*,char const*,unsigned));
+int expandsym P((char const*,struct buf*));
+int fexpandsym P((char const*,struct buf*,RILE*));
+struct hshentry *genrevs P((char const*,char const*,char const*,char const*,struct hshentries**));
+unsigned countnumflds P((char const*));
+void getbranchno P((char const*,struct buf*));
+
+/* rcssyn */
+/* These expand modes must agree with Expand_names[] in rcssyn.c. */
+#define KEYVAL_EXPAND 0 /* -kkv `$Keyword: value $' */
+#define KEYVALLOCK_EXPAND 1 /* -kkvl `$Keyword: value locker $' */
+#define KEY_EXPAND 2 /* -kk `$Keyword$' */
+#define VAL_EXPAND 3 /* -kv `value' */
+#define OLD_EXPAND 4 /* -ko use old string, omitting expansion */
+struct diffcmd {
+ unsigned long
+ line1, /* number of first line */
+ nlines, /* number of lines affected */
+ adprev, /* previous 'a' line1+1 or 'd' line1 */
+ dafter; /* sum of previous 'd' line1 and previous 'd' nlines */
+};
+extern char const * Dbranch;
+extern struct access * AccessList;
+extern struct assoc * Symbols;
+extern struct cbuf Comment;
+extern struct lock * Locks;
+extern struct hshentry * Head;
+extern int Expand;
+extern int StrictLocks;
+extern unsigned TotalDeltas;
+extern char const *const expand_names[];
+extern char const Kdesc[];
+extern char const Klog[];
+extern char const Ktext[];
+int getdiffcmd P((RILE*,int,FILE*,struct diffcmd*));
+int putdftext P((char const*,struct cbuf,RILE*,FILE*,int));
+int putdtext P((char const*,struct cbuf,char const*,FILE*,int));
+int str2expmode P((char const*));
+void getadmin P((void));
+void getdesc P((int));
+void gettree P((void));
+void ignorephrase P((void));
+void initdiffcmd P((struct diffcmd*));
+void putadmin P((FILE*));
+void putstring P((FILE*,int,struct cbuf,int));
+void puttree P((struct hshentry const*,FILE*));
+
+/* rcsutil */
+extern int RCSversion;
+char *cgetenv P((char const*));
+char *fstr_save P((char const*));
+char *str_save P((char const*));
+char const *date2str P((char const[datesize],char[datesize]));
+char const *getusername P((int));
+int getRCSINIT P((int,char**,char***));
+int run P((char const*,char const*,...));
+int runv P((char const**));
+malloc_type fremember P((malloc_type));
+malloc_type ftestalloc P((size_t));
+malloc_type testalloc P((size_t));
+malloc_type testrealloc P((malloc_type,size_t));
+#define ftalloc(T) ftnalloc(T,1)
+#define talloc(T) tnalloc(T,1)
+#if lint
+ extern malloc_type lintalloc;
+# define ftnalloc(T,n) (lintalloc = ftestalloc(sizeof(T)*(n)), (T*)0)
+# define tnalloc(T,n) (lintalloc = testalloc(sizeof(T)*(n)), (T*)0)
+# define trealloc(T,p,n) (lintalloc = testrealloc((malloc_type)0, sizeof(T)*(n)), p)
+# define tfree(p)
+#else
+# define ftnalloc(T,n) ((T*) ftestalloc(sizeof(T)*(n)))
+# define tnalloc(T,n) ((T*) testalloc(sizeof(T)*(n)))
+# define trealloc(T,p,n) ((T*) testrealloc((malloc_type)(p), sizeof(T)*(n)))
+# define tfree(p) free((malloc_type)(p))
+#endif
+void awrite P((char const*,size_t,FILE*));
+void fastcopy P((RILE*,FILE*));
+void ffree P((void));
+void ffree1 P((char const*));
+void setRCSversion P((char const*));
+#if has_signal
+ void catchints P((void));
+ void ignoreints P((void));
+ void restoreints P((void));
+#else
+# define catchints()
+# define ignoreints()
+# define restoreints()
+#endif
+#if has_getuid
+ uid_t ruid P((void));
+# define myself(u) ((u) == ruid())
+#else
+# define myself(u) true
+#endif
+#if has_setuid
+ uid_t euid P((void));
+ void nosetid P((void));
+ void seteid P((void));
+ void setrid P((void));
+#else
+# define nosetid()
+# define seteid()
+# define setrid()
+#endif
diff --git a/gnu/usr.bin/rcs/lib/rcsedit.c b/gnu/usr.bin/rcs/lib/rcsedit.c
new file mode 100644
index 000000000000..fab4f62214bb
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcsedit.c
@@ -0,0 +1,1656 @@
+/*
+ * RCS stream editor
+ */
+/**********************************************************************************
+ * edits the input file according to a
+ * script from stdin, generated by diff -n
+ * performs keyword expansion
+ **********************************************************************************
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+/* $Log: rcsedit.c,v $
+ * Revision 5.11 1991/11/03 01:11:44 eggert
+ * Move the warning about link breaking to where they're actually being broken.
+ *
+ * Revision 5.10 1991/10/07 17:32:46 eggert
+ * Support piece tables even if !has_mmap. Fix rare NFS bugs.
+ *
+ * Revision 5.9 1991/09/17 19:07:40 eggert
+ * SGI readlink() yields ENXIO, not EINVAL, for nonlinks.
+ *
+ * Revision 5.8 1991/08/19 03:13:55 eggert
+ * Add piece tables, NFS bug workarounds. Catch odd filenames. Tune.
+ *
+ * Revision 5.7 1991/04/21 11:58:21 eggert
+ * Fix errno bugs. Add -x, RCSINIT, MS-DOS support.
+ *
+ * Revision 5.6 1991/02/25 07:12:40 eggert
+ * Fix setuid bug. Support new link behavior. Work around broken "w+" fopen.
+ *
+ * Revision 5.5 1990/12/30 05:07:35 eggert
+ * Fix report of busy RCS files when !defined(O_CREAT) | !defined(O_EXCL).
+ *
+ * Revision 5.4 1990/11/01 05:03:40 eggert
+ * Permit arbitrary data in comment leaders.
+ *
+ * Revision 5.3 1990/09/11 02:41:13 eggert
+ * Tune expandline().
+ *
+ * Revision 5.2 1990/09/04 08:02:21 eggert
+ * Count RCS lines better. Improve incomplete line handling.
+ *
+ * Revision 5.1 1990/08/29 07:13:56 eggert
+ * Add -kkvl.
+ * Fix bug when getting revisions to files ending in incomplete lines.
+ * Fix bug in comment leader expansion.
+ *
+ * Revision 5.0 1990/08/22 08:12:47 eggert
+ * Don't require final newline.
+ * Don't append "checked in with -k by " to logs,
+ * so that checking in a program with -k doesn't change it.
+ * Don't generate trailing white space for empty comment leader.
+ * Remove compile-time limits; use malloc instead. Add -k, -V.
+ * Permit dates past 1999/12/31. Make lock and temp files faster and safer.
+ * Ansify and Posixate. Check diff's output.
+ *
+ * Revision 4.8 89/05/01 15:12:35 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.7 88/11/08 13:54:14 narten
+ * misplaced semicolon caused infinite loop
+ *
+ * Revision 4.6 88/08/09 19:12:45 eggert
+ * Shrink stdio code size; allow cc -R.
+ *
+ * Revision 4.5 87/12/18 11:38:46 narten
+ * Changes from the 43. version. Don't know the significance of the
+ * first change involving "rewind". Also, additional "lint" cleanup.
+ * (Guy Harris)
+ *
+ * Revision 4.4 87/10/18 10:32:21 narten
+ * Updating version numbers. Changes relative to version 1.1 actually
+ * relative to 4.1
+ *
+ * Revision 1.4 87/09/24 13:59:29 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.3 87/09/15 16:39:39 shepler
+ * added an initializatin of the variables editline and linecorr
+ * this will be done each time a file is processed.
+ * (there was an obscure bug where if co was used to retrieve multiple files
+ * it would dump)
+ * fix attributed to Roy Morris @FileNet Corp ...!felix!roy
+ *
+ * Revision 1.2 87/03/27 14:22:17 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/05/12 13:10:30 wft
+ * Added new markers Id and RCSfile; added locker to Header and Id.
+ * Overhauled expandline completely() (problem with $01234567890123456789@).
+ * Moved trymatch() and marker table to rcskeys.c.
+ *
+ * Revision 3.7 83/05/12 13:04:39 wft
+ * Added retry to expandline to resume after failed match which ended in $.
+ * Fixed truncation problem for $19chars followed by@@.
+ * Log no longer expands full path of RCS file.
+ *
+ * Revision 3.6 83/05/11 16:06:30 wft
+ * added retry to expandline to resume after failed match which ended in $.
+ * Fixed truncation problem for $19chars followed by@@.
+ *
+ * Revision 3.5 82/12/04 13:20:56 wft
+ * Added expansion of keyword Locker.
+ *
+ * Revision 3.4 82/12/03 12:26:54 wft
+ * Added line number correction in case editing does not start at the
+ * beginning of the file.
+ * Changed keyword expansion to always print a space before closing KDELIM;
+ * Expansion for Header shortened.
+ *
+ * Revision 3.3 82/11/14 14:49:30 wft
+ * removed Suffix from keyword expansion. Replaced fclose with ffclose.
+ * keyreplace() gets log message from delta, not from curlogmsg.
+ * fixed expression overflow in while(c=putc(GETC....
+ * checked nil printing.
+ *
+ * Revision 3.2 82/10/18 21:13:39 wft
+ * I added checks for write errors during the co process, and renamed
+ * expandstring() to xpandstring().
+ *
+ * Revision 3.1 82/10/13 15:52:55 wft
+ * changed type of result of getc() from char to int.
+ * made keyword expansion loop in expandline() portable to machines
+ * without sign-extension.
+ */
+
+
+#include "rcsbase.h"
+
+libId(editId, "$Id: rcsedit.c,v 5.11 1991/11/03 01:11:44 eggert Exp $")
+
+static void keyreplace P((enum markers,struct hshentry const*,FILE*));
+
+
+FILE *fcopy; /* result file descriptor */
+char const *resultfile; /* result file name */
+int locker_expansion; /* should the locker name be appended to Id val? */
+#if !large_memory
+ static RILE *fedit; /* edit file descriptor */
+ static char const *editfile; /* edit pathname */
+#endif
+static unsigned long editline; /* edit line counter; #lines before cursor */
+static long linecorr; /* #adds - #deletes in each edit run. */
+ /*used to correct editline in case file is not rewound after */
+ /* applying one delta */
+
+#define DIRTEMPNAMES 2
+enum maker {notmade, real, effective};
+struct buf dirtfname[DIRTEMPNAMES]; /* unlink these when done */
+static enum maker volatile dirtfmaker[DIRTEMPNAMES]; /* if these are set */
+
+
+#if has_NFS || bad_unlink
+ int
+un_link(s)
+ char const *s;
+/*
+ * Remove S, even if it is unwritable.
+ * Ignore unlink() ENOENT failures; NFS generates bogus ones.
+ */
+{
+# if bad_unlink
+ int e;
+ if (unlink(s) == 0)
+ return 0;
+ e = errno;
+# if has_NFS
+ if (e == ENOENT)
+ return 0;
+# endif
+ if (chmod(s, S_IWUSR) != 0) {
+ errno = e;
+ return -1;
+ }
+# endif
+# if has_NFS
+ return unlink(s)==0 || errno==ENOENT ? 0 : -1;
+# else
+ return unlink(s);
+# endif
+}
+#endif
+
+#if !has_rename
+# if !has_NFS
+# define do_link(s,t) link(s,t)
+# else
+ static int
+do_link(s, t)
+ char const *s, *t;
+/* Link S to T, ignoring bogus EEXIST problems due to NFS failures. */
+{
+ struct stat sb, tb;
+
+ if (link(s,t) == 0)
+ return 0;
+ if (errno != EEXIST)
+ return -1;
+ if (
+ stat(s, &sb) == 0 &&
+ stat(t, &tb) == 0 &&
+ sb.st_ino == tb.st_ino &&
+ sb.st_dev == tb.st_dev
+ )
+ return 0;
+ errno = EEXIST;
+ return -1;
+}
+# endif
+#endif
+
+
+ static exiting void
+editEndsPrematurely()
+{
+ fatserror("edit script ends prematurely");
+}
+
+ static exiting void
+editLineNumberOverflow()
+{
+ fatserror("edit script refers to line past end of file");
+}
+
+
+#if large_memory
+
+#if has_memmove
+# define movelines(s1, s2, n) VOID memmove(s1, s2, (n)*sizeof(Iptr_type))
+#else
+ static void
+movelines(s1, s2, n)
+ register Iptr_type *s1;
+ register Iptr_type const *s2;
+ register unsigned long n;
+{
+ if (s1 < s2)
+ do {
+ *s1++ = *s2++;
+ } while (--n);
+ else {
+ s1 += n;
+ s2 += n;
+ do {
+ *--s1 = *--s2;
+ } while (--n);
+ }
+}
+#endif
+
+/*
+ * `line' contains pointers to the lines in the currently `edited' file.
+ * It is a 0-origin array that represents linelim-gapsize lines.
+ * line[0..gap-1] and line[gap+gapsize..linelim-1] contain pointers to lines.
+ * line[gap..gap+gapsize-1] contains garbage.
+ *
+ * Any @s in lines are duplicated.
+ * Lines are terminated by \n, or (for a last partial line only) by single @.
+ */
+static Iptr_type *line;
+static unsigned long gap, gapsize, linelim;
+
+
+ static void
+insertline(n, l)
+ unsigned long n;
+ Iptr_type l;
+/* Before line N, insert line L. N is 0-origin. */
+{
+ if (linelim-gapsize < n)
+ editLineNumberOverflow();
+ if (!gapsize)
+ line =
+ !linelim ?
+ tnalloc(Iptr_type, linelim = gapsize = 1024)
+ : (
+ gap = gapsize = linelim,
+ trealloc(Iptr_type, line, linelim <<= 1)
+ );
+ if (n < gap)
+ movelines(line+n+gapsize, line+n, gap-n);
+ else if (gap < n)
+ movelines(line+gap, line+gap+gapsize, n-gap);
+
+ line[n] = l;
+ gap = n + 1;
+ gapsize--;
+}
+
+ static void
+deletelines(n, nlines)
+ unsigned long n, nlines;
+/* Delete lines N through N+NLINES-1. N is 0-origin. */
+{
+ unsigned long l = n + nlines;
+ if (linelim-gapsize < l || l < n)
+ editLineNumberOverflow();
+ if (l < gap)
+ movelines(line+l+gapsize, line+l, gap-l);
+ else if (gap < n)
+ movelines(line+gap, line+gap+gapsize, n-gap);
+
+ gap = n;
+ gapsize += nlines;
+}
+
+ static void
+snapshotline(f, l)
+ register FILE *f;
+ register Iptr_type l;
+{
+ register int c;
+ do {
+ if ((c = *l++) == SDELIM && *l++ != SDELIM)
+ return;
+ aputc(c, f);
+ } while (c != '\n');
+}
+
+ void
+snapshotedit(f)
+ FILE *f;
+/* Copy the current state of the edits to F. */
+{
+ register Iptr_type *p, *lim, *l=line;
+ for (p=l, lim=l+gap; p<lim; )
+ snapshotline(f, *p++);
+ for (p+=gapsize, lim=l+linelim; p<lim; )
+ snapshotline(f, *p++);
+}
+
+ static void
+finisheditline(fin, fout, l, delta)
+ RILE *fin;
+ FILE *fout;
+ Iptr_type l;
+ struct hshentry const *delta;
+{
+ Iseek(fin, l);
+ if (expandline(fin, fout, delta, true, (FILE*)0) < 0)
+ faterror("finisheditline internal error");
+}
+
+ void
+finishedit(delta, outfile, done)
+ struct hshentry const *delta;
+ FILE *outfile;
+ int done;
+/*
+ * Doing expansion if DELTA is set, output the state of the edits to OUTFILE.
+ * But do nothing unless DONE is set (which means we are on the last pass).
+ */
+{
+ if (done) {
+ openfcopy(outfile);
+ outfile = fcopy;
+ if (!delta)
+ snapshotedit(outfile);
+ else {
+ register Iptr_type *p, *lim, *l = line;
+ register RILE *fin = finptr;
+ Iptr_type here = Itell(fin);
+ for (p=l, lim=l+gap; p<lim; )
+ finisheditline(fin, outfile, *p++, delta);
+ for (p+=gapsize, lim=l+linelim; p<lim; )
+ finisheditline(fin, outfile, *p++, delta);
+ Iseek(fin, here);
+ }
+ }
+}
+
+/* Open a temporary FILENAME for output, truncating any previous contents. */
+# define fopen_update_truncate(filename) fopen(filename, FOPEN_W_WORK)
+#else /* !large_memory */
+ static FILE *
+fopen_update_truncate(filename)
+ char const *filename;
+{
+# if bad_fopen_wplus
+ if (un_link(filename) != 0)
+ efaterror(filename);
+# endif
+ return fopen(filename, FOPEN_WPLUS_WORK);
+}
+#endif
+
+
+ void
+openfcopy(f)
+ FILE *f;
+{
+ if (!(fcopy = f)) {
+ if (!resultfile)
+ resultfile = maketemp(2);
+ if (!(fcopy = fopen_update_truncate(resultfile)))
+ efaterror(resultfile);
+ }
+}
+
+
+#if !large_memory
+
+ static void
+swapeditfiles(outfile)
+ FILE *outfile;
+/* Function: swaps resultfile and editfile, assigns fedit=fcopy,
+ * and rewinds fedit for reading. Set fcopy to outfile if nonnull;
+ * otherwise, set fcopy to be resultfile opened for reading and writing.
+ */
+{
+ char const *tmpptr;
+
+ editline = 0; linecorr = 0;
+ if (fseek(fcopy, 0L, SEEK_SET) != 0)
+ Oerror();
+ fedit = fcopy;
+ tmpptr=editfile; editfile=resultfile; resultfile=tmpptr;
+ openfcopy(outfile);
+}
+
+ void
+snapshotedit(f)
+ FILE *f;
+/* Copy the current state of the edits to F. */
+{
+ finishedit((struct hshentry *)nil, (FILE*)0, false);
+ fastcopy(fedit, f);
+ Irewind(fedit);
+}
+
+ void
+finishedit(delta, outfile, done)
+ struct hshentry const *delta;
+ FILE *outfile;
+ int done;
+/* copy the rest of the edit file and close it (if it exists).
+ * if delta!=nil, perform keyword substitution at the same time.
+ * If DONE is set, we are finishing the last pass.
+ */
+{
+ register RILE *fe;
+ register FILE *fc;
+
+ fe = fedit;
+ if (fe) {
+ fc = fcopy;
+ if (delta!=nil) {
+ while (1 < expandline(fe,fc,delta,false,(FILE*)0))
+ ;
+ } else {
+ fastcopy(fe,fc);
+ }
+ Ifclose(fe);
+ }
+ if (!done)
+ swapeditfiles(outfile);
+}
+#endif
+
+
+
+#if large_memory
+# define copylines(upto,delta) (editline = (upto))
+#else
+ static void
+copylines(upto,delta)
+ register unsigned long upto;
+ struct hshentry const *delta;
+/*
+ * Copy input lines editline+1..upto from fedit to fcopy.
+ * If delta != nil, keyword expansion is done simultaneously.
+ * editline is updated. Rewinds a file only if necessary.
+ */
+{
+ register int c;
+ declarecache;
+ register FILE *fc;
+ register RILE *fe;
+
+ if (upto < editline) {
+ /* swap files */
+ finishedit((struct hshentry *)nil, (FILE*)0, false);
+ /* assumes edit only during last pass, from the beginning*/
+ }
+ fe = fedit;
+ fc = fcopy;
+ if (editline < upto)
+ if (delta)
+ do {
+ if (expandline(fe,fc,delta,false,(FILE*)0) <= 1)
+ editLineNumberOverflow();
+ } while (++editline < upto);
+ else {
+ setupcache(fe); cache(fe);
+ do {
+ do {
+ cachegeteof(c, editLineNumberOverflow(););
+ aputc(c, fc);
+ } while (c != '\n');
+ } while (++editline < upto);
+ uncache(fe);
+ }
+}
+#endif
+
+
+
+ void
+xpandstring(delta)
+ struct hshentry const *delta;
+/* Function: Reads a string terminated by SDELIM from finptr and writes it
+ * to fcopy. Double SDELIM is replaced with single SDELIM.
+ * Keyword expansion is performed with data from delta.
+ * If foutptr is nonnull, the string is also copied unchanged to foutptr.
+ */
+{
+ while (1 < expandline(finptr,fcopy,delta,true,foutptr))
+ ;
+}
+
+
+ void
+copystring()
+/* Function: copies a string terminated with a single SDELIM from finptr to
+ * fcopy, replacing all double SDELIM with a single SDELIM.
+ * If foutptr is nonnull, the string also copied unchanged to foutptr.
+ * editline is incremented by the number of lines copied.
+ * Assumption: next character read is first string character.
+ */
+{ register c;
+ declarecache;
+ register FILE *frew, *fcop;
+ register int amidline;
+ register RILE *fin;
+
+ fin = finptr;
+ setupcache(fin); cache(fin);
+ frew = foutptr;
+ fcop = fcopy;
+ amidline = false;
+ for (;;) {
+ GETC(frew,c);
+ switch (c) {
+ case '\n':
+ ++editline;
+ ++rcsline;
+ amidline = false;
+ break;
+ case SDELIM:
+ GETC(frew,c);
+ if (c != SDELIM) {
+ /* end of string */
+ nextc = c;
+ editline += amidline;
+ uncache(fin);
+ return;
+ }
+ /* fall into */
+ default:
+ amidline = true;
+ break;
+ }
+ aputc(c,fcop);
+ }
+}
+
+
+ void
+enterstring()
+/* Like copystring, except the string is put into the edit data structure. */
+{
+#if !large_memory
+ editfile = 0;
+ fedit = 0;
+ editline = linecorr = 0;
+ resultfile = maketemp(1);
+ if (!(fcopy = fopen_update_truncate(resultfile)))
+ efaterror(resultfile);
+ copystring();
+#else
+ register int c;
+ declarecache;
+ register FILE *frew;
+ register unsigned long e, oe;
+ register int amidline, oamidline;
+ register Iptr_type optr;
+ register RILE *fin;
+
+ e = 0;
+ gap = 0;
+ gapsize = linelim;
+ fin = finptr;
+ setupcache(fin); cache(fin);
+ advise_access(fin, MADV_NORMAL);
+ frew = foutptr;
+ amidline = false;
+ for (;;) {
+ optr = cachetell();
+ GETC(frew,c);
+ oamidline = amidline;
+ oe = e;
+ switch (c) {
+ case '\n':
+ ++e;
+ ++rcsline;
+ amidline = false;
+ break;
+ case SDELIM:
+ GETC(frew,c);
+ if (c != SDELIM) {
+ /* end of string */
+ nextc = c;
+ editline = e + amidline;
+ linecorr = 0;
+ uncache(fin);
+ return;
+ }
+ /* fall into */
+ default:
+ amidline = true;
+ break;
+ }
+ if (!oamidline)
+ insertline(oe, optr);
+ }
+#endif
+}
+
+
+
+
+ void
+#if large_memory
+edit_string()
+#else
+ editstring(delta)
+ struct hshentry const *delta;
+#endif
+/*
+ * Read an edit script from finptr and applies it to the edit file.
+#if !large_memory
+ * The result is written to fcopy.
+ * If delta!=nil, keyword expansion is performed simultaneously.
+ * If running out of lines in fedit, fedit and fcopy are swapped.
+ * editfile is the name of the file that goes with fedit.
+#endif
+ * If foutptr is set, the edit script is also copied verbatim to foutptr.
+ * Assumes that all these files are open.
+ * resultfile is the name of the file that goes with fcopy.
+ * Assumes the next input character from finptr is the first character of
+ * the edit script. Resets nextc on exit.
+ */
+{
+ int ed; /* editor command */
+ register int c;
+ declarecache;
+ register FILE *frew;
+# if !large_memory
+ register FILE *f;
+ unsigned long line_lim = ULONG_MAX;
+ register RILE *fe;
+# endif
+ register unsigned long i;
+ register RILE *fin;
+# if large_memory
+ register unsigned long j;
+# endif
+ struct diffcmd dc;
+
+ editline += linecorr; linecorr=0; /*correct line number*/
+ frew = foutptr;
+ fin = finptr;
+ setupcache(fin);
+ initdiffcmd(&dc);
+ while (0 <= (ed = getdiffcmd(fin,true,frew,&dc)))
+#if !large_memory
+ if (line_lim <= dc.line1)
+ editLineNumberOverflow();
+ else
+#endif
+ if (!ed) {
+ copylines(dc.line1-1, delta);
+ /* skip over unwanted lines */
+ i = dc.nlines;
+ linecorr -= i;
+ editline += i;
+# if large_memory
+ deletelines(editline+linecorr, i);
+# else
+ fe = fedit;
+ do {
+ /*skip next line*/
+ do {
+ Igeteof(fe, c, { if (i!=1) editLineNumberOverflow(); line_lim = dc.dafter; break; } );
+ } while (c != '\n');
+ } while (--i);
+# endif
+ } else {
+ copylines(dc.line1, delta); /*copy only; no delete*/
+ i = dc.nlines;
+# if large_memory
+ j = editline+linecorr;
+# endif
+ linecorr += i;
+#if !large_memory
+ f = fcopy;
+ if (delta)
+ do {
+ switch (expandline(fin,f,delta,true,frew)) {
+ case 0: case 1:
+ if (i==1)
+ return;
+ /* fall into */
+ case -1:
+ editEndsPrematurely();
+ }
+ } while (--i);
+ else
+#endif
+ {
+ cache(fin);
+ do {
+# if large_memory
+ insertline(j++, cachetell());
+# endif
+ for (;;) {
+ GETC(frew, c);
+# if !large_memory
+ aputc(c, f);
+# endif
+ if (c == '\n')
+ break;
+ if (c==SDELIM) {
+ GETC(frew, c);
+ if (c!=SDELIM) {
+ if (--i)
+ editEndsPrematurely();
+ nextc = c;
+ uncache(fin);
+ return;
+ }
+ }
+ }
+ ++rcsline;
+ } while (--i);
+ uncache(fin);
+ }
+ }
+}
+
+
+
+/* The rest is for keyword expansion */
+
+
+
+ int
+expandline(infile, outfile, delta, delimstuffed, frewfile)
+ RILE *infile;
+ FILE *outfile, *frewfile;
+ struct hshentry const *delta;
+ int delimstuffed;
+/*
+ * Read a line from INFILE and write it to OUTFILE.
+ * If DELIMSTUFFED is true, double SDELIM is replaced with single SDELIM.
+ * Keyword expansion is performed with data from delta.
+ * If FREWFILE is set, copy the line unchanged to FREWFILE.
+ * DELIMSTUFFED must be true if FREWFILE is set.
+ * Yields -1 if no data is copied, 0 if an incomplete line is copied,
+ * 2 if a complete line is copied; adds 1 to yield if expansion occurred.
+ */
+{
+ register c;
+ declarecache;
+ register FILE *out, *frew;
+ register char * tp;
+ register int e, ds, r;
+ char const *tlim;
+ static struct buf keyval;
+ enum markers matchresult;
+
+ setupcache(infile); cache(infile);
+ out = outfile;
+ frew = frewfile;
+ ds = delimstuffed;
+ bufalloc(&keyval, keylength+3);
+ e = 0;
+ r = -1;
+
+ for (;;) {
+ if (ds) {
+ GETC(frew, c);
+ } else
+ cachegeteof(c, goto uncache_exit;);
+ for (;;) {
+ switch (c) {
+ case SDELIM:
+ if (ds) {
+ GETC(frew, c);
+ if (c != SDELIM) {
+ /* end of string */
+ nextc=c;
+ goto uncache_exit;
+ }
+ }
+ /* fall into */
+ default:
+ aputc(c,out);
+ r = 0;
+ break;
+
+ case '\n':
+ rcsline += ds;
+ aputc(c,out);
+ r = 2;
+ goto uncache_exit;
+
+ case KDELIM:
+ r = 0;
+ /* check for keyword */
+ /* first, copy a long enough string into keystring */
+ tp = keyval.string;
+ *tp++ = KDELIM;
+ for (;;) {
+ if (ds) {
+ GETC(frew, c);
+ } else
+ cachegeteof(c, goto keystring_eof;);
+ if (tp < keyval.string+keylength+1)
+ switch (ctab[c]) {
+ case LETTER: case Letter:
+ *tp++ = c;
+ continue;
+ default:
+ break;
+ }
+ break;
+ }
+ *tp++ = c; *tp = '\0';
+ matchresult = trymatch(keyval.string+1);
+ if (matchresult==Nomatch) {
+ tp[-1] = 0;
+ aputs(keyval.string, out);
+ continue; /* last c handled properly */
+ }
+
+ /* Now we have a keyword terminated with a K/VDELIM */
+ if (c==VDELIM) {
+ /* try to find closing KDELIM, and replace value */
+ tlim = keyval.string + keyval.size;
+ for (;;) {
+ if (ds) {
+ GETC(frew, c);
+ } else
+ cachegeteof(c, goto keystring_eof;);
+ if (c=='\n' || c==KDELIM)
+ break;
+ *tp++ =c;
+ if (tlim <= tp)
+ tp = bufenlarge(&keyval, &tlim);
+ if (c==SDELIM && ds) { /*skip next SDELIM */
+ GETC(frew, c);
+ if (c != SDELIM) {
+ /* end of string before closing KDELIM or newline */
+ nextc = c;
+ goto keystring_eof;
+ }
+ }
+ }
+ if (c!=KDELIM) {
+ /* couldn't find closing KDELIM -- give up */
+ *tp = 0;
+ aputs(keyval.string, out);
+ continue; /* last c handled properly */
+ }
+ }
+ /* now put out the new keyword value */
+ keyreplace(matchresult,delta,out);
+ e = 1;
+ break;
+ }
+ break;
+ }
+ }
+
+ keystring_eof:
+ *tp = 0;
+ aputs(keyval.string, out);
+ uncache_exit:
+ uncache(infile);
+ return r + e;
+}
+
+
+char const ciklog[ciklogsize] = "checked in with -k by ";
+
+ static void
+keyreplace(marker,delta,out)
+ enum markers marker;
+ register struct hshentry const *delta;
+ register FILE *out;
+/* function: outputs the keyword value(s) corresponding to marker.
+ * Attributes are derived from delta.
+ */
+{
+ register char const *sp, *cp, *date;
+ register char c;
+ register size_t cs, cw, ls;
+ char const *sp1;
+ char datebuf[datesize];
+ int RCSv;
+
+ sp = Keyword[(int)marker];
+
+ if (Expand == KEY_EXPAND) {
+ aprintf(out, "%c%s%c", KDELIM, sp, KDELIM);
+ return;
+ }
+
+ date= delta->date;
+ RCSv = RCSversion;
+
+ if (Expand == KEYVAL_EXPAND || Expand == KEYVALLOCK_EXPAND)
+ aprintf(out, "%c%s%c%c", KDELIM, sp, VDELIM,
+ marker==Log && RCSv<VERSION(5) ? '\t' : ' '
+ );
+
+ switch (marker) {
+ case Author:
+ aputs(delta->author, out);
+ break;
+ case Date:
+ aputs(date2str(date,datebuf), out);
+ break;
+ case Id:
+ case Header:
+ aprintf(out, "%s %s %s %s %s",
+ marker==Id || RCSv<VERSION(4)
+ ? basename(RCSfilename)
+ : getfullRCSname(),
+ delta->num,
+ date2str(date, datebuf),
+ delta->author,
+ RCSv==VERSION(3) && delta->lockedby ? "Locked"
+ : delta->state
+ );
+ if (delta->lockedby!=nil)
+ if (VERSION(5) <= RCSv) {
+ if (locker_expansion || Expand==KEYVALLOCK_EXPAND)
+ aprintf(out, " %s", delta->lockedby);
+ } else if (RCSv == VERSION(4))
+ aprintf(out, " Locker: %s", delta->lockedby);
+ break;
+ case Locker:
+ if (delta->lockedby)
+ if (
+ locker_expansion
+ || Expand == KEYVALLOCK_EXPAND
+ || RCSv <= VERSION(4)
+ )
+ aputs(delta->lockedby, out);
+ break;
+ case Log:
+ case RCSfile:
+ aputs(basename(RCSfilename), out);
+ break;
+ case Revision:
+ aputs(delta->num, out);
+ break;
+ case Source:
+ aputs(getfullRCSname(), out);
+ break;
+ case State:
+ aputs(delta->state, out);
+ break;
+ default:
+ break;
+ }
+ if (Expand == KEYVAL_EXPAND || Expand == KEYVALLOCK_EXPAND) {
+ afputc(' ', out);
+ afputc(KDELIM, out);
+ }
+ if (marker == Log) {
+ sp = delta->log.string;
+ ls = delta->log.size;
+ if (sizeof(ciklog)-1<=ls && !memcmp(sp,ciklog,sizeof(ciklog)-1))
+ return;
+ afputc('\n', out);
+ cp = Comment.string;
+ cw = cs = Comment.size;
+ awrite(cp, cs, out);
+ /* oddity: 2 spaces between date and time, not 1 as usual */
+ sp1 = strchr(date2str(date,datebuf), ' ');
+ aprintf(out, "Revision %s %.*s %s %s",
+ delta->num, (int)(sp1-datebuf), datebuf, sp1, delta->author
+ );
+ /* Do not include state: it may change and is not updated. */
+ /* Comment is the comment leader. */
+ if (VERSION(5) <= RCSv)
+ for (; cw && (cp[cw-1]==' ' || cp[cw-1]=='\t'); --cw)
+ ;
+ for (;;) {
+ afputc('\n', out);
+ awrite(cp, cw, out);
+ if (!ls)
+ break;
+ --ls;
+ c = *sp++;
+ if (c != '\n') {
+ awrite(cp+cw, cs-cw, out);
+ do {
+ afputc(c,out);
+ if (!ls)
+ break;
+ --ls;
+ c = *sp++;
+ } while (c != '\n');
+ }
+ }
+ }
+}
+
+#if has_readlink
+ static int
+resolve_symlink(L)
+ struct buf *L;
+/*
+ * If L is a symbolic link, resolve it to the name that it points to.
+ * If unsuccessful, set errno and yield -1.
+ * If it points to an existing file, yield 1.
+ * Otherwise, set errno=ENOENT and yield 0.
+ */
+{
+ char *b, a[SIZEABLE_PATH];
+ int e;
+ size_t s;
+ ssize_t r;
+ struct buf bigbuf;
+ unsigned linkcount = MAXSYMLINKS + 1;
+
+ b = a;
+ s = sizeof(a);
+ bufautobegin(&bigbuf);
+ while ((r = readlink(L->string,b,s)) != -1)
+ if (r == s) {
+ bufalloc(&bigbuf, s<<1);
+ b = bigbuf.string;
+ s = bigbuf.size;
+ } else if (!--linkcount) {
+ errno = ELOOP;
+ return -1;
+ } else {
+ /* Splice symbolic link into L. */
+ b[r] = '\0';
+ L->string[ROOTPATH(b) ? (size_t)0 : dirlen(L->string)] = '\0';
+ bufscat(L, b);
+ }
+ e = errno;
+ bufautoend(&bigbuf);
+ errno = e;
+ switch (e) {
+ case ENXIO:
+ case EINVAL: return 1;
+ case ENOENT: return 0;
+ default: return -1;
+ }
+}
+#endif
+
+ RILE *
+rcswriteopen(RCSbuf, status, mustread)
+ struct buf *RCSbuf;
+ struct stat *status;
+ int mustread;
+/*
+ * Create the lock file corresponding to RCSNAME.
+ * Then try to open RCSNAME for reading and yield its FILE* descriptor.
+ * Put its status into *STATUS too.
+ * MUSTREAD is true if the file must already exist, too.
+ * If all goes well, discard any previously acquired locks,
+ * and set frewrite to the FILE* descriptor of the lock file,
+ * which will eventually turn into the new RCS file.
+ */
+{
+ register char *tp;
+ register char const *sp, *RCSname, *x;
+ RILE *f;
+ size_t l;
+ int e, exists, fdesc, previouslock, r;
+ struct buf *dirt;
+ struct stat statbuf;
+
+ previouslock = frewrite != 0;
+ exists =
+# if has_readlink
+ resolve_symlink(RCSbuf);
+# else
+ stat(RCSbuf->string, &statbuf) == 0 ? 1
+ : errno==ENOENT ? 0 : -1;
+# endif
+ if (exists < (mustread|previouslock))
+ /*
+ * There's an unusual problem with the RCS file;
+ * or the RCS file doesn't exist,
+ * and we must read or we already have a lock elsewhere.
+ */
+ return 0;
+
+ RCSname = RCSbuf->string;
+ sp = basename(RCSname);
+ l = sp - RCSname;
+ dirt = &dirtfname[previouslock];
+ bufscpy(dirt, RCSname);
+ tp = dirt->string + l;
+ x = rcssuffix(RCSname);
+# if has_readlink
+ if (!x) {
+ error("symbolic link to non RCS filename `%s'", RCSname);
+ errno = EINVAL;
+ return 0;
+ }
+# endif
+ if (*sp == *x) {
+ error("RCS filename `%s' incompatible with suffix `%s'", sp, x);
+ errno = EINVAL;
+ return 0;
+ }
+ /* Create a lock file whose name is a function of the RCS filename. */
+ if (*x) {
+ /*
+ * The suffix is nonempty.
+ * The lock filename is the first char of of the suffix,
+ * followed by the RCS filename with last char removed. E.g.:
+ * foo,v RCS filename with suffix ,v
+ * ,foo, lock filename
+ */
+ *tp++ = *x;
+ while (*sp)
+ *tp++ = *sp++;
+ *--tp = 0;
+ } else {
+ /*
+ * The suffix is empty.
+ * The lock filename is the RCS filename
+ * with last char replaced by '_'.
+ */
+ while ((*tp++ = *sp++))
+ ;
+ tp -= 2;
+ if (*tp == '_') {
+ error("RCS filename `%s' ends with `%c'", RCSname, *tp);
+ errno = EINVAL;
+ return 0;
+ }
+ *tp = '_';
+ }
+
+ sp = tp = dirt->string;
+
+ f = 0;
+
+ /*
+ * good news:
+ * open(f, O_CREAT|O_EXCL|O_TRUNC|O_WRONLY, READONLY) is atomic
+ * according to Posix 1003.1-1990.
+ * bad news:
+ * NFS ignores O_EXCL and doesn't comply with Posix 1003.1-1990.
+ * good news:
+ * (O_TRUNC,READONLY) normally guarantees atomicity even with NFS.
+ * bad news:
+ * If you're root, (O_TRUNC,READONLY) doesn't guarantee atomicity.
+ * good news:
+ * Root-over-the-wire NFS access is rare for security reasons.
+ * This bug has never been reported in practice with RCS.
+ * So we don't worry about this bug.
+ *
+ * An even rarer NFS bug can occur when clients retry requests.
+ * Suppose client A renames the lock file ",f," to "f,v"
+ * at about the same time that client B creates ",f,",
+ * and suppose A's first rename request is delayed, so A reissues it.
+ * The sequence of events might be:
+ * A sends rename(",f,", "f,v")
+ * B sends create(",f,")
+ * A sends retry of rename(",f,", "f,v")
+ * server receives, does, and acknowledges A's first rename()
+ * A receives acknowledgment, and its RCS program exits
+ * server receives, does, and acknowledges B's create()
+ * server receives, does, and acknowledges A's retry of rename()
+ * This not only wrongly deletes B's lock, it removes the RCS file!
+ * Most NFS implementations have idempotency caches that usually prevent
+ * this scenario, but such caches are finite and can be overrun.
+ * This problem afflicts programs that use the traditional
+ * Unix method of using link() and unlink() to get and release locks,
+ * as well as RCS's method of using open() and rename().
+ * There is no easy workaround for either link-unlink or open-rename.
+ * Any new method based on lockf() seemingly would be incompatible with
+ * the old methods; besides, lockf() is notoriously buggy under NFS.
+ * Since this problem afflicts scads of Unix programs, but is so rare
+ * that nobody seems to be worried about it, we won't worry either.
+ */
+# define READONLY (S_IRUSR|S_IRGRP|S_IROTH)
+# if !open_can_creat
+# define create(f) creat(f, READONLY)
+# else
+# define create(f) open(f, O_BINARY|O_CREAT|O_EXCL|O_TRUNC|O_WRONLY, READONLY)
+# endif
+
+ catchints();
+ ignoreints();
+
+ /*
+ * Create a lock file for an RCS file. This should be atomic, i.e.
+ * if two processes try it simultaneously, at most one should succeed.
+ */
+ seteid();
+ fdesc = create(sp);
+ e = errno;
+ setrid();
+
+ if (fdesc < 0) {
+ if (e == EACCES && stat(tp,&statbuf) == 0)
+ /* The RCS file is busy. */
+ e = EEXIST;
+ } else {
+ dirtfmaker[0] = effective;
+ e = ENOENT;
+ if (exists) {
+ f = Iopen(RCSname, FOPEN_R, status);
+ e = errno;
+ if (f && previouslock) {
+ /* Discard the previous lock in favor of this one. */
+ Ozclose(&frewrite);
+ seteid();
+ if ((r = un_link(newRCSfilename)) != 0)
+ e = errno;
+ setrid();
+ if (r != 0)
+ enfaterror(e, newRCSfilename);
+ bufscpy(&dirtfname[0], tp);
+ }
+ }
+ if (!(frewrite = fdopen(fdesc, FOPEN_W))) {
+ efaterror(newRCSfilename);
+ }
+ }
+
+ restoreints();
+
+ errno = e;
+ return f;
+}
+
+ void
+keepdirtemp(name)
+ char const *name;
+/* Do not unlink name, either because it's not there any more,
+ * or because it has already been unlinked.
+ */
+{
+ register int i;
+ for (i=DIRTEMPNAMES; 0<=--i; )
+ if (dirtfname[i].string == name) {
+ dirtfmaker[i] = notmade;
+ return;
+ }
+ faterror("keepdirtemp");
+}
+
+ char const *
+makedirtemp(name, n)
+ register char const *name;
+ int n;
+/*
+ * Have maketemp() do all the work if name is null.
+ * Otherwise, create a unique filename in name's dir using n and name
+ * and store it into the dirtfname[n].
+ * Because of storage in tfnames, dirtempunlink() can unlink the file later.
+ * Return a pointer to the filename created.
+ */
+{
+ register char *tp, *np;
+ register size_t dl;
+ register struct buf *bn;
+
+ if (!name)
+ return maketemp(n);
+ dl = dirlen(name);
+ bn = &dirtfname[n];
+ bufalloc(bn,
+# if has_mktemp
+ dl + 9
+# else
+ strlen(name) + 3
+# endif
+ );
+ bufscpy(bn, name);
+ np = tp = bn->string;
+ tp += dl;
+ *tp++ = '_';
+ *tp++ = '0'+n;
+ catchints();
+# if has_mktemp
+ VOID strcpy(tp, "XXXXXX");
+ if (!mktemp(np) || !*np)
+ faterror("can't make temporary file name `%.*s%c_%cXXXXXX'",
+ (int)dl, name, SLASH, '0'+n
+ );
+# else
+ /*
+ * Posix 1003.1-1990 has no reliable way
+ * to create a unique file in a named directory.
+ * We fudge here. If the working file name is abcde,
+ * the temp filename is _Ncde where N is a digit.
+ */
+ name += dl;
+ if (*name) name++;
+ if (*name) name++;
+ VOID strcpy(tp, name);
+# endif
+ dirtfmaker[n] = real;
+ return np;
+}
+
+ void
+dirtempunlink()
+/* Clean up makedirtemp() files. May be invoked by signal handler. */
+{
+ register int i;
+ enum maker m;
+
+ for (i = DIRTEMPNAMES; 0 <= --i; )
+ if ((m = dirtfmaker[i]) != notmade) {
+ if (m == effective)
+ seteid();
+ VOID un_link(dirtfname[i].string);
+ if (m == effective)
+ setrid();
+ dirtfmaker[i] = notmade;
+ }
+}
+
+
+ int
+#if has_prototypes
+chnamemod(FILE **fromp, char const *from, char const *to, mode_t mode)
+ /* The `#if has_prototypes' is needed because mode_t might promote to int. */
+#else
+ chnamemod(fromp,from,to,mode) FILE **fromp; char const *from,*to; mode_t mode;
+#endif
+/*
+ * Rename a file (with optional stream pointer *FROMP) from FROM to TO.
+ * FROM already exists.
+ * Change its mode to MODE, before renaming if possible.
+ * If FROMP, close and clear *FROMP before renaming it.
+ * Unlink TO if it already exists.
+ * Return -1 on error (setting errno), 0 otherwise.
+ */
+{
+# if bad_a_rename
+ /*
+ * This host is brain damaged. A race condition is possible
+ * while the lock file is temporarily writable.
+ * There doesn't seem to be a workaround.
+ */
+ mode_t mode_while_renaming = mode|S_IWUSR;
+# else
+# define mode_while_renaming mode
+# endif
+ if (fromp) {
+# if has_fchmod
+ if (fchmod(fileno(*fromp), mode_while_renaming) != 0)
+ return -1;
+# endif
+ Ozclose(fromp);
+ }
+# if has_fchmod
+ else
+# endif
+ if (chmod(from, mode_while_renaming) != 0)
+ return -1;
+
+# if !has_rename || bad_b_rename
+ VOID un_link(to);
+ /*
+ * We need not check the result;
+ * link() or rename() will catch it.
+ * No harm is done if TO does not exist.
+ * However, there's a short window of inconsistency
+ * during which TO does not exist.
+ */
+# endif
+
+ return
+# if !has_rename
+ do_link(from,to) != 0 ? -1 : un_link(from)
+# else
+ rename(from, to) != 0
+# if has_NFS
+ && errno != ENOENT
+# endif
+ ? -1
+# if bad_a_rename
+ : mode != mode_while_renaming ? chmod(to, mode)
+# endif
+ : 0
+# endif
+ ;
+
+# undef mode_while_renaming
+}
+
+
+
+ int
+findlock(delete, target)
+ int delete;
+ struct hshentry **target;
+/*
+ * Find the first lock held by caller and return a pointer
+ * to the locked delta; also removes the lock if DELETE.
+ * If one lock, put it into *TARGET.
+ * Return 0 for no locks, 1 for one, 2 for two or more.
+ */
+{
+ register struct lock *next, **trail, **found;
+
+ found = 0;
+ for (trail = &Locks; (next = *trail); trail = &next->nextlock)
+ if (strcmp(getcaller(), next->login) == 0) {
+ if (found) {
+ error("multiple revisions locked by %s; please specify one", getcaller());
+ return 2;
+ }
+ found = trail;
+ }
+ if (!found)
+ return 0;
+ next = *found;
+ *target = next->delta;
+ if (delete) {
+ next->delta->lockedby = nil;
+ *found = next->nextlock;
+ }
+ return 1;
+}
+
+ int
+addlock(delta)
+ struct hshentry * delta;
+/*
+ * Add a lock held by caller to DELTA and yield 1 if successful.
+ * Print an error message and yield -1 if no lock is added because
+ * DELTA is locked by somebody other than caller.
+ * Return 0 if the caller already holds the lock.
+ */
+{
+ register struct lock *next;
+
+ next=Locks;
+ for (next = Locks; next; next = next->nextlock)
+ if (cmpnum(delta->num, next->delta->num) == 0)
+ if (strcmp(getcaller(), next->login) == 0)
+ return 0;
+ else {
+ error("revision %s already locked by %s",
+ delta->num, next->login
+ );
+ return -1;
+ }
+ next = ftalloc(struct lock);
+ delta->lockedby = next->login = getcaller();
+ next->delta = delta;
+ next->nextlock = Locks;
+ Locks = next;
+ return 1;
+}
+
+
+ int
+addsymbol(num, name, rebind)
+ char const *num, *name;
+ int rebind;
+/*
+ * Associate with revision NUM the new symbolic NAME.
+ * If NAME already exists and REBIND is set, associate NAME with NUM;
+ * otherwise, print an error message and return false;
+ * Return true if successful.
+ */
+{
+ register struct assoc *next;
+
+ for (next = Symbols; next; next = next->nextassoc)
+ if (strcmp(name, next->symbol) == 0)
+ if (rebind || strcmp(next->num,num) == 0) {
+ next->num = num;
+ return true;
+ } else {
+ error("symbolic name %s already bound to %s",
+ name, next->num
+ );
+ return false;
+ }
+ next = ftalloc(struct assoc);
+ next->symbol = name;
+ next->num = num;
+ next->nextassoc = Symbols;
+ Symbols = next;
+ return true;
+}
+
+
+
+ char const *
+getcaller()
+/* Get the caller's login name. */
+{
+# if has_setuid
+ return getusername(euid()!=ruid());
+# else
+ return getusername(false);
+# endif
+}
+
+
+ int
+checkaccesslist()
+/*
+ * Return true if caller is the superuser, the owner of the
+ * file, the access list is empty, or caller is on the access list.
+ * Otherwise, print an error message and return false.
+ */
+{
+ register struct access const *next;
+
+ if (!AccessList || myself(RCSstat.st_uid) || strcmp(getcaller(),"root")==0)
+ return true;
+
+ next = AccessList;
+ do {
+ if (strcmp(getcaller(), next->login) == 0)
+ return true;
+ } while ((next = next->nextaccess));
+
+ error("user %s not on the access list", getcaller());
+ return false;
+}
+
+
+ int
+dorewrite(lockflag, changed)
+ int lockflag, changed;
+/*
+ * Do nothing if LOCKFLAG is zero.
+ * Prepare to rewrite an RCS file if CHANGED is positive.
+ * Stop rewriting if CHANGED is zero, because there won't be any changes.
+ * Fail if CHANGED is negative.
+ * Return true on success.
+ */
+{
+ int r, e;
+
+ if (lockflag)
+ if (changed) {
+ if (changed < 0)
+ return false;
+ putadmin(frewrite);
+ puttree(Head, frewrite);
+ aprintf(frewrite, "\n\n%s%c", Kdesc, nextc);
+ foutptr = frewrite;
+ } else {
+ Ozclose(&frewrite);
+ seteid();
+ ignoreints();
+ r = un_link(newRCSfilename);
+ e = errno;
+ keepdirtemp(newRCSfilename);
+ restoreints();
+ setrid();
+ if (r != 0) {
+ enerror(e, RCSfilename);
+ return false;
+ }
+ }
+ return true;
+}
+
+ int
+donerewrite(changed)
+ int changed;
+/*
+ * Finish rewriting an RCS file if CHANGED is nonzero.
+ * Return true on success.
+ */
+{
+ int r, e;
+
+ if (changed && !nerror) {
+ if (finptr) {
+ fastcopy(finptr, frewrite);
+ Izclose(&finptr);
+ }
+ if (1 < RCSstat.st_nlink)
+ warn("breaking hard link to %s", RCSfilename);
+ seteid();
+ ignoreints();
+ r = chnamemod(&frewrite, newRCSfilename, RCSfilename,
+ RCSstat.st_mode & ~(S_IWUSR|S_IWGRP|S_IWOTH)
+ );
+ e = errno;
+ keepdirtemp(newRCSfilename);
+ restoreints();
+ setrid();
+ if (r != 0) {
+ enerror(e, RCSfilename);
+ error("saved in %s", newRCSfilename);
+ dirtempunlink();
+ return false;
+ }
+ }
+ return true;
+}
+
+ void
+aflush(f)
+ FILE *f;
+{
+ if (fflush(f) != 0)
+ Oerror();
+}
diff --git a/gnu/usr.bin/rcs/lib/rcsfcmp.c b/gnu/usr.bin/rcs/lib/rcsfcmp.c
new file mode 100644
index 000000000000..75a6bbce1439
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcsfcmp.c
@@ -0,0 +1,321 @@
+/*
+ * RCS file comparison
+ */
+/*****************************************************************************
+ * rcsfcmp()
+ * Testprogram: define FCMPTEST
+ *****************************************************************************
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+
+
+/* $Log: rcsfcmp.c,v $
+ * Revision 5.9 1991/10/07 17:32:46 eggert
+ * Count log lines correctly.
+ *
+ * Revision 5.8 1991/08/19 03:13:55 eggert
+ * Tune.
+ *
+ * Revision 5.7 1991/04/21 11:58:22 eggert
+ * Fix errno bug. Add MS-DOS support.
+ *
+ * Revision 5.6 1991/02/28 19:18:47 eggert
+ * Open work file at most once.
+ *
+ * Revision 5.5 1990/11/27 09:26:05 eggert
+ * Fix comment leader bug.
+ *
+ * Revision 5.4 1990/11/01 05:03:42 eggert
+ * Permit arbitrary data in logs and comment leaders.
+ *
+ * Revision 5.3 1990/09/11 02:41:15 eggert
+ * Don't ignore differences inside keyword strings if -ko is set.
+ *
+ * Revision 5.1 1990/08/29 07:13:58 eggert
+ * Clean old log messages too.
+ *
+ * Revision 5.0 1990/08/22 08:12:49 eggert
+ * Don't append "checked in with -k by " log to logs,
+ * so that checking in a program with -k doesn't change it.
+ * Ansify and Posixate. Remove lint.
+ *
+ * Revision 4.5 89/05/01 15:12:42 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.4 88/08/09 19:12:50 eggert
+ * Shrink stdio code size.
+ *
+ * Revision 4.3 87/12/18 11:40:02 narten
+ * lint cleanups (Guy Harris)
+ *
+ * Revision 4.2 87/10/18 10:33:06 narten
+ * updting version number. Changes relative to 1.1 actually relative to
+ * 4.1
+ *
+ * Revision 1.2 87/03/27 14:22:19 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/05/10 16:24:04 wft
+ * Marker matching now uses trymatch(). Marker pattern is now
+ * checked precisely.
+ *
+ * Revision 3.1 82/12/04 13:21:40 wft
+ * Initial revision.
+ *
+ */
+
+/*
+#define FCMPTEST
+*/
+/* Testprogram; prints out whether two files are identical,
+ * except for keywords
+ */
+
+#include "rcsbase.h"
+
+libId(fcmpId, "$Id: rcsfcmp.c,v 5.9 1991/10/07 17:32:46 eggert Exp $")
+
+ static int
+discardkeyval(c, f)
+ register int c;
+ register RILE *f;
+{
+ for (;;)
+ switch (c) {
+ case KDELIM:
+ case '\n':
+ return c;
+ default:
+ Igeteof(f, c, return EOF;);
+ break;
+ }
+}
+
+ int
+rcsfcmp(xfp, xstatp, ufname, delta)
+ register RILE *xfp;
+ struct stat const *xstatp;
+ char const *ufname;
+ struct hshentry const *delta;
+/* Compare the files xfp and ufname. Return zero
+ * if xfp has the same contents as ufname and neither has keywords,
+ * otherwise -1 if they are the same ignoring keyword values,
+ * and 1 if they differ even ignoring
+ * keyword values. For the LOG-keyword, rcsfcmp skips the log message
+ * given by the parameter delta in xfp. Thus, rcsfcmp returns nonpositive
+ * if xfp contains the same as ufname, with the keywords expanded.
+ * Implementation: character-by-character comparison until $ is found.
+ * If a $ is found, read in the marker keywords; if they are real keywords
+ * and identical, read in keyword value. If value is terminated properly,
+ * disregard it and optionally skip log message; otherwise, compare value.
+ */
+{
+ register int xc, uc;
+ char xkeyword[keylength+2];
+ int eqkeyvals;
+ register RILE *ufp;
+ register int xeof, ueof;
+ register char * tp;
+ register char const *sp;
+ int result;
+ enum markers match1;
+ struct stat ustat;
+
+ if (!(ufp = Iopen(ufname, FOPEN_R_WORK, &ustat))) {
+ efaterror(ufname);
+ }
+ xeof = ueof = false;
+ if (Expand==OLD_EXPAND) {
+ if (!(result = xstatp->st_size!=ustat.st_size)) {
+# if has_mmap && large_memory
+ result = !!memcmp(xfp->base,ufp->base,(size_t)xstatp->st_size);
+# else
+ for (;;) {
+ /* get the next characters */
+ Igeteof(xfp, xc, xeof=true;);
+ Igeteof(ufp, uc, ueof=true;);
+ if (xeof | ueof)
+ goto eof;
+ if (xc != uc)
+ goto return1;
+ }
+# endif
+ }
+ } else {
+ xc = 0;
+ uc = 0; /* Keep lint happy. */
+ result = 0;
+
+ for (;;) {
+ if (xc != KDELIM) {
+ /* get the next characters */
+ Igeteof(xfp, xc, xeof=true;);
+ Igeteof(ufp, uc, ueof=true;);
+ if (xeof | ueof)
+ goto eof;
+ } else {
+ /* try to get both keywords */
+ tp = xkeyword;
+ for (;;) {
+ Igeteof(xfp, xc, xeof=true;);
+ Igeteof(ufp, uc, ueof=true;);
+ if (xeof | ueof)
+ goto eof;
+ if (xc != uc)
+ break;
+ switch (xc) {
+ default:
+ if (xkeyword+keylength <= tp)
+ break;
+ *tp++ = xc;
+ continue;
+ case '\n': case KDELIM: case VDELIM:
+ break;
+ }
+ break;
+ }
+ if (
+ (xc==KDELIM || xc==VDELIM) && (uc==KDELIM || uc==VDELIM) &&
+ (*tp = xc, (match1 = trymatch(xkeyword)) != Nomatch)
+ ) {
+#ifdef FCMPTEST
+ VOID printf("found common keyword %s\n",xkeyword);
+#endif
+ result = -1;
+ for (;;) {
+ if (xc != uc) {
+ xc = discardkeyval(xc, xfp);
+ uc = discardkeyval(uc, ufp);
+ if ((xeof = xc==EOF) | (ueof = uc==EOF))
+ goto eof;
+ eqkeyvals = false;
+ break;
+ }
+ switch (xc) {
+ default:
+ Igeteof(xfp, xc, xeof=true;);
+ Igeteof(ufp, uc, ueof=true;);
+ if (xeof | ueof)
+ goto eof;
+ continue;
+
+ case '\n': case KDELIM:
+ eqkeyvals = true;
+ break;
+ }
+ break;
+ }
+ if (xc != uc)
+ goto return1;
+ if (xc==KDELIM) {
+ /* Skip closing KDELIM. */
+ Igeteof(xfp, xc, xeof=true;);
+ Igeteof(ufp, uc, ueof=true;);
+ if (xeof | ueof)
+ goto eof;
+ /* if the keyword is LOG, also skip the log message in xfp*/
+ if (match1==Log) {
+ /* first, compute the number of line feeds in log msg */
+ unsigned lncnt;
+ size_t ls, ccnt;
+ sp = delta->log.string;
+ ls = delta->log.size;
+ if (ls<sizeof(ciklog)-1 || memcmp(sp,ciklog,sizeof(ciklog)-1)) {
+ /* This log message was inserted. */
+ lncnt = 3;
+ while (ls--) if (*sp++=='\n') lncnt++;
+ for (;;) {
+ if (xc=='\n')
+ if(--lncnt==0) break;
+ Igeteof(xfp, xc, goto returnresult;);
+ }
+ /* skip last comment leader */
+ /* Can't just skip another line here, because there may be */
+ /* additional characters on the line (after the Log....$) */
+ for (ccnt=Comment.size; ccnt--; ) {
+ Igeteof(xfp, xc, goto returnresult;);
+ if(xc=='\n') break;
+ /*
+ * Read to the end of the comment leader or '\n',
+ * whatever comes first. Some editors strip
+ * trailing white space from a leader like " * ".
+ */
+ }
+ }
+ }
+ } else {
+ /* both end in the same character, but not a KDELIM */
+ /* must compare string values.*/
+#ifdef FCMPTEST
+ VOID printf("non-terminated keywords %s, potentially different values\n",xkeyword);
+#endif
+ if (!eqkeyvals)
+ goto return1;
+ }
+ }
+ }
+ if (xc != uc)
+ goto return1;
+ }
+ }
+
+ eof:
+ if (xeof==ueof)
+ goto returnresult;
+ return1:
+ result = 1;
+ returnresult:
+ Ifclose(ufp);
+ return result;
+}
+
+
+
+#ifdef FCMPTEST
+
+char const cmdid[] = "rcsfcmp";
+
+main(argc, argv)
+int argc; char *argv[];
+/* first argument: comment leader; 2nd: log message, 3rd: expanded file,
+ * 4th: unexpanded file
+ */
+{ struct hshentry delta;
+
+ Comment.string = argv[1];
+ Comment.size = strlen(argv[1]);
+ delta.log.string = argv[2];
+ delta.log.size = strlen(argv[2]);
+ if (rcsfcmp(Iopen(argv[3], FOPEN_R_WORK, (struct stat*)0), argv[4], &delta))
+ VOID printf("files are the same\n");
+ else VOID printf("files are different\n");
+}
+#endif
diff --git a/gnu/usr.bin/rcs/lib/rcsfnms.c b/gnu/usr.bin/rcs/lib/rcsfnms.c
new file mode 100644
index 000000000000..02562f08621f
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcsfnms.c
@@ -0,0 +1,1088 @@
+/*
+ * RCS file name handling
+ */
+/****************************************************************************
+ * creation and deletion of /tmp temporaries
+ * pairing of RCS file names and working file names.
+ * Testprogram: define PAIRTEST
+ ****************************************************************************
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+
+/* $Log: rcsfnms.c,v $
+ * Revision 5.8 1991/09/24 00:28:40 eggert
+ * Don't export bindex().
+ *
+ * Revision 5.7 1991/08/19 03:13:55 eggert
+ * Fix messages when rcswriteopen fails.
+ * Look in $TMP and $TEMP if $TMPDIR isn't set. Tune.
+ *
+ * Revision 5.6 1991/04/21 11:58:23 eggert
+ * Fix errno bugs. Add -x, RCSINIT, MS-DOS support.
+ *
+ * Revision 5.5 1991/02/26 17:48:38 eggert
+ * Fix setuid bug. Support new link behavior.
+ * Define more portable getcwd().
+ *
+ * Revision 5.4 1990/11/01 05:03:43 eggert
+ * Permit arbitrary data in comment leaders.
+ *
+ * Revision 5.3 1990/09/14 22:56:16 hammer
+ * added more filename extensions and their comment leaders
+ *
+ * Revision 5.2 1990/09/04 08:02:23 eggert
+ * Fix typo when !RCSSEP.
+ *
+ * Revision 5.1 1990/08/29 07:13:59 eggert
+ * Work around buggy compilers with defective argument promotion.
+ *
+ * Revision 5.0 1990/08/22 08:12:50 eggert
+ * Ignore signals when manipulating the semaphore file.
+ * Modernize list of file name extensions.
+ * Permit paths of arbitrary length. Beware file names beginning with "-".
+ * Remove compile-time limits; use malloc instead.
+ * Permit dates past 1999/12/31. Make lock and temp files faster and safer.
+ * Ansify and Posixate.
+ * Don't use access(). Fix test for non-regular files. Tune.
+ *
+ * Revision 4.8 89/05/01 15:09:41 narten
+ * changed getwd to not stat empty directories.
+ *
+ * Revision 4.7 88/08/09 19:12:53 eggert
+ * Fix troff macro comment leader bug; add Prolog; allow cc -R; remove lint.
+ *
+ * Revision 4.6 87/12/18 11:40:23 narten
+ * additional file types added from 4.3 BSD version, and SPARC assembler
+ * comment character added. Also, more lint cleanups. (Guy Harris)
+ *
+ * Revision 4.5 87/10/18 10:34:16 narten
+ * Updating version numbers. Changes relative to 1.1 actually relative
+ * to verion 4.3
+ *
+ * Revision 1.3 87/03/27 14:22:21 jenkins
+ * Port to suns
+ *
+ * Revision 1.2 85/06/26 07:34:28 svb
+ * Comment leader '% ' for '*.tex' files added.
+ *
+ * Revision 4.3 83/12/15 12:26:48 wft
+ * Added check for KDELIM in file names to pairfilenames().
+ *
+ * Revision 4.2 83/12/02 22:47:45 wft
+ * Added csh, red, and sl file name suffixes.
+ *
+ * Revision 4.1 83/05/11 16:23:39 wft
+ * Added initialization of Dbranch to InitAdmin(). Canged pairfilenames():
+ * 1. added copying of path from workfile to RCS file, if RCS file is omitted;
+ * 2. added getting the file status of RCS and working files;
+ * 3. added ignoring of directories.
+ *
+ * Revision 3.7 83/05/11 15:01:58 wft
+ * Added comtable[] which pairs file name suffixes with comment leaders;
+ * updated InitAdmin() accordingly.
+ *
+ * Revision 3.6 83/04/05 14:47:36 wft
+ * fixed Suffix in InitAdmin().
+ *
+ * Revision 3.5 83/01/17 18:01:04 wft
+ * Added getwd() and rename(); these can be removed by defining
+ * V4_2BSD, since they are not needed in 4.2 bsd.
+ * Changed sys/param.h to sys/types.h.
+ *
+ * Revision 3.4 82/12/08 21:55:20 wft
+ * removed unused variable.
+ *
+ * Revision 3.3 82/11/28 20:31:37 wft
+ * Changed mktempfile() to store the generated file names.
+ * Changed getfullRCSname() to store the file and pathname, and to
+ * delete leading "../" and "./".
+ *
+ * Revision 3.2 82/11/12 14:29:40 wft
+ * changed pairfilenames() to handle file.sfx,v; also deleted checkpathnosfx(),
+ * checksuffix(), checkfullpath(). Semaphore name generation updated.
+ * mktempfile() now checks for nil path; freefilename initialized properly.
+ * Added Suffix .h to InitAdmin. Added testprogram PAIRTEST.
+ * Moved rmsema, trysema, trydiraccess, getfullRCSname from rcsutil.c to here.
+ *
+ * Revision 3.1 82/10/18 14:51:28 wft
+ * InitAdmin() now initializes StrictLocks=STRICT_LOCKING (def. in rcsbase.h).
+ * renamed checkpath() to checkfullpath().
+ */
+
+
+#include "rcsbase.h"
+
+libId(fnmsId, "$Id: rcsfnms.c,v 5.8 1991/09/24 00:28:40 eggert Exp $")
+
+char const *RCSfilename;
+char *workfilename;
+FILE *workstdout;
+struct stat RCSstat;
+char const *suffixes;
+
+static char const rcsdir[] = "RCS";
+#define rcsdirlen (sizeof(rcsdir)-1)
+
+static struct buf RCSbuf, RCSb;
+static int RCSerrno;
+
+
+/* Temp file names to be unlinked when done, if they are not nil. */
+#define TEMPNAMES 5 /* must be at least DIRTEMPNAMES (see rcsedit.c) */
+static char *volatile tfnames[TEMPNAMES];
+
+
+struct compair {
+ char const *suffix, *comlead;
+};
+
+static struct compair const comtable[] = {
+/* comtable pairs each filename suffix with a comment leader. The comment */
+/* leader is placed before each line generated by the $Log keyword. This */
+/* table is used to guess the proper comment leader from the working file's */
+/* suffix during initial ci (see InitAdmin()). Comment leaders are needed */
+/* for languages without multiline comments; for others they are optional. */
+ "a", "-- ", /* Ada */
+ "ada", "-- ",
+ "asm", ";; ", /* assembler (MS-DOS) */
+ "bat", ":: ", /* batch (MS-DOS) */
+ "c", " * ", /* C */
+ "c++", "// ", /* C++ in all its infinite guises */
+ "cc", "// ",
+ "cpp", "// ",
+ "cxx", "// ",
+ "cl", ";;; ", /* Common Lisp */
+ "cmd", ":: ", /* command (OS/2) */
+ "cmf", "c ", /* CM Fortran */
+ "cs", " * ", /* C* */
+ "el", "; ", /* Emacs Lisp */
+ "f", "c ", /* Fortran */
+ "for", "c ",
+ "h", " * ", /* C-header */
+ "hpp", "// ", /* C++ header */
+ "hxx", "// ",
+ "l", " * ", /* lex NOTE: conflict between lex and franzlisp */
+ "lisp",";;; ", /* Lucid Lisp */
+ "lsp", ";; ", /* Microsoft Lisp */
+ "mac", ";; ", /* macro (DEC-10, MS-DOS, PDP-11, VMS, etc) */
+ "me", ".\\\" ",/* me-macros t/nroff*/
+ "ml", "; ", /* mocklisp */
+ "mm", ".\\\" ",/* mm-macros t/nroff*/
+ "ms", ".\\\" ",/* ms-macros t/nroff*/
+ "p", " * ", /* Pascal */
+ "pas", " * ",
+ "pl", "% ", /* Prolog */
+ "tex", "% ", /* TeX */
+ "y", " * ", /* yacc */
+ nil, "# " /* default for unknown suffix; must always be last */
+};
+
+#if has_mktemp
+ static char const *
+tmp()
+/* Yield the name of the tmp directory. */
+{
+ static char const *s;
+ if (!s
+ && !(s = cgetenv("TMPDIR")) /* Unix tradition */
+ && !(s = cgetenv("TMP")) /* DOS tradition */
+ && !(s = cgetenv("TEMP")) /* another DOS tradition */
+ )
+ s = TMPDIR;
+ return s;
+}
+#endif
+
+ char const *
+maketemp(n)
+ int n;
+/* Create a unique filename using n and the process id and store it
+ * into the nth slot in tfnames.
+ * Because of storage in tfnames, tempunlink() can unlink the file later.
+ * Returns a pointer to the filename created.
+ */
+{
+ char *p;
+ char const *t = tfnames[n];
+
+ if (t)
+ return t;
+
+ catchints();
+ {
+# if has_mktemp
+ char const *tp = tmp();
+ p = testalloc(strlen(tp) + 10);
+ VOID sprintf(p, "%s%cT%cXXXXXX", tp, SLASH, '0'+n);
+ if (!mktemp(p) || !*p)
+ faterror("can't make temporary file name `%s%cT%cXXXXXX'",
+ tp, SLASH, '0'+n
+ );
+# else
+ static char tfnamebuf[TEMPNAMES][L_tmpnam];
+ p = tfnamebuf[n];
+ if (!tmpnam(p) || !*p)
+# ifdef P_tmpdir
+ faterror("can't make temporary file name `%s...'",P_tmpdir);
+# else
+ faterror("can't make temporary file name");
+# endif
+# endif
+ }
+
+ tfnames[n] = p;
+ return p;
+}
+
+ void
+tempunlink()
+/* Clean up maketemp() files. May be invoked by signal handler.
+ */
+{
+ register int i;
+ register char *p;
+
+ for (i = TEMPNAMES; 0 <= --i; )
+ if ((p = tfnames[i])) {
+ VOID unlink(p);
+ /*
+ * We would tfree(p) here,
+ * but this might dump core if we're handing a signal.
+ * We're about to exit anyway, so we won't bother.
+ */
+ tfnames[i] = 0;
+ }
+}
+
+
+ static char const *
+bindex(sp,ch)
+ register char const *sp;
+ int ch;
+/* Function: Finds the last occurrence of character c in string sp
+ * and returns a pointer to the character just beyond it. If the
+ * character doesn't occur in the string, sp is returned.
+ */
+{
+ register char const c=ch, *r;
+ r = sp;
+ while (*sp) {
+ if (*sp++ == c) r=sp;
+ }
+ return r;
+}
+
+
+
+ static int
+suffix_matches(suffix, pattern)
+ register char const *suffix, *pattern;
+{
+ register int c;
+ if (!pattern)
+ return true;
+ for (;;)
+ switch (*suffix++ - (c = *pattern++)) {
+ case 0:
+ if (!c)
+ return true;
+ break;
+
+ case 'A'-'a':
+ if (ctab[c] == Letter)
+ break;
+ /* fall into */
+ default:
+ return false;
+ }
+}
+
+
+ static void
+InitAdmin()
+/* function: initializes an admin node */
+{
+ register char const *Suffix;
+ register int i;
+
+ Head=nil; Dbranch=nil; AccessList=nil; Symbols=nil; Locks=nil;
+ StrictLocks=STRICT_LOCKING;
+
+ /* guess the comment leader from the suffix*/
+ Suffix=bindex(workfilename, '.');
+ if (Suffix==workfilename) Suffix= ""; /* empty suffix; will get default*/
+ for (i=0; !suffix_matches(Suffix,comtable[i].suffix); i++)
+ ;
+ Comment.string = comtable[i].comlead;
+ Comment.size = strlen(comtable[i].comlead);
+ Lexinit(); /* note: if !finptr, reads nothing; only initializes */
+}
+
+
+/* 'cpp' does not like this line. It seems to be the leading '_' in the */
+/* second occurence of '_POSIX_NO_TRUNC'. It evaluates correctly with */
+/* just the first term so lets just do that for now. */
+/*#if defined(_POSIX_NO_TRUNC) && _POSIX_NO_TRUNC!=-1*/
+#if defined(_POSIX_NO_TRUNC)
+# define LONG_NAMES_MAY_BE_SILENTLY_TRUNCATED 0
+#else
+# define LONG_NAMES_MAY_BE_SILENTLY_TRUNCATED 1
+#endif
+
+#if LONG_NAMES_MAY_BE_SILENTLY_TRUNCATED
+#ifdef NAME_MAX
+# define filenametoolong(path) (NAME_MAX < strlen(basename(path)))
+#else
+ static int
+filenametoolong(path)
+ char *path;
+/* Yield true if the last file name in PATH is too long. */
+{
+ static unsigned long dot_namemax;
+
+ register size_t namelen;
+ register char *base;
+ register unsigned long namemax;
+
+ base = path + dirlen(path);
+ namelen = strlen(base);
+ if (namelen <= _POSIX_NAME_MAX) /* fast check for shorties */
+ return false;
+ if (base != path) {
+ *--base = 0;
+ namemax = pathconf(path, _PC_NAME_MAX);
+ *base = SLASH;
+ } else {
+ /* Cache the results for the working directory, for speed. */
+ if (!dot_namemax)
+ dot_namemax = pathconf(".", _PC_NAME_MAX);
+ namemax = dot_namemax;
+ }
+ /* If pathconf() yielded -1, namemax is now ULONG_MAX. */
+ return namemax<namelen;
+}
+#endif
+#endif
+
+ void
+bufalloc(b, size)
+ register struct buf *b;
+ size_t size;
+/* Ensure *B is a name buffer of at least SIZE bytes.
+ * *B's old contents can be freed; *B's new contents are undefined.
+ */
+{
+ if (b->size < size) {
+ if (b->size)
+ tfree(b->string);
+ else
+ b->size = sizeof(malloc_type);
+ while (b->size < size)
+ b->size <<= 1;
+ b->string = tnalloc(char, b->size);
+ }
+}
+
+ void
+bufrealloc(b, size)
+ register struct buf *b;
+ size_t size;
+/* like bufalloc, except *B's old contents, if any, are preserved */
+{
+ if (b->size < size) {
+ if (!b->size)
+ bufalloc(b, size);
+ else {
+ while ((b->size <<= 1) < size)
+ ;
+ b->string = trealloc(char, b->string, b->size);
+ }
+ }
+}
+
+ void
+bufautoend(b)
+ struct buf *b;
+/* Free an auto buffer at block exit. */
+{
+ if (b->size)
+ tfree(b->string);
+}
+
+ struct cbuf
+bufremember(b, s)
+ struct buf *b;
+ size_t s;
+/*
+ * Free the buffer B with used size S.
+ * Yield a cbuf with identical contents.
+ * The cbuf will be reclaimed when this input file is finished.
+ */
+{
+ struct cbuf cb;
+
+ if ((cb.size = s))
+ cb.string = fremember(trealloc(char, b->string, s));
+ else {
+ bufautoend(b); /* not really auto */
+ cb.string = "";
+ }
+ return cb;
+}
+
+ char *
+bufenlarge(b, alim)
+ register struct buf *b;
+ char const **alim;
+/* Make *B larger. Set *ALIM to its new limit, and yield the relocated value
+ * of its old limit.
+ */
+{
+ size_t s = b->size;
+ bufrealloc(b, s + 1);
+ *alim = b->string + b->size;
+ return b->string + s;
+}
+
+ void
+bufscat(b, s)
+ struct buf *b;
+ char const *s;
+/* Concatenate S to B's end. */
+{
+ size_t blen = b->string ? strlen(b->string) : 0;
+ bufrealloc(b, blen+strlen(s)+1);
+ VOID strcpy(b->string+blen, s);
+}
+
+ void
+bufscpy(b, s)
+ struct buf *b;
+ char const *s;
+/* Copy S into B. */
+{
+ bufalloc(b, strlen(s)+1);
+ VOID strcpy(b->string, s);
+}
+
+
+ char const *
+basename(p)
+ char const *p;
+/* Yield the address of the base filename of the pathname P. */
+{
+ register char const *b = p, *q = p;
+ for (;;)
+ switch (*q++) {
+ case SLASHes: b = q; break;
+ case 0: return b;
+ }
+}
+
+ size_t
+dirlen(p)
+ char const *p;
+/* Yield the length of P's directory, including its trailing SLASH. */
+{
+ return basename(p) - p;
+}
+
+
+ static size_t
+suffixlen(x)
+ char const *x;
+/* Yield the length of X, an RCS filename suffix. */
+{
+ register char const *p;
+
+ p = x;
+ for (;;)
+ switch (*p) {
+ case 0: case SLASHes:
+ return p - x;
+
+ default:
+ ++p;
+ continue;
+ }
+}
+
+ char const *
+rcssuffix(name)
+ char const *name;
+/* Yield the suffix of NAME if it is an RCS filename, 0 otherwise. */
+{
+ char const *x, *p, *nz;
+ size_t dl, nl, xl;
+
+ nl = strlen(name);
+ nz = name + nl;
+ x = suffixes;
+ do {
+ if ((xl = suffixlen(x))) {
+ if (xl <= nl && memcmp(p = nz-xl, x, xl) == 0)
+ return p;
+ } else {
+ dl = dirlen(name);
+ if (
+ rcsdirlen < dl &&
+ !memcmp(p = name+(dl-=rcsdirlen+1), rcsdir, rcsdirlen) &&
+ (!dl || isSLASH(*--p))
+ )
+ return nz;
+ }
+ x += xl;
+ } while (*x++);
+ return 0;
+}
+
+ /*ARGSUSED*/ RILE *
+rcsreadopen(RCSname, status, mustread)
+ struct buf *RCSname;
+ struct stat *status;
+ int mustread;
+/* Open RCSNAME for reading and yield its FILE* descriptor.
+ * If successful, set *STATUS to its status.
+ * Pass this routine to pairfilenames() for read-only access to the file. */
+{
+ return Iopen(RCSname->string, FOPEN_R, status);
+}
+
+ static int
+finopen(rcsopen, mustread)
+ RILE *(*rcsopen)P((struct buf*,struct stat*,int));
+ int mustread;
+/*
+ * Use RCSOPEN to open an RCS file; MUSTREAD is set if the file must be read.
+ * Set finptr to the result and yield true if successful.
+ * RCSb holds the file's name.
+ * Set RCSbuf to the best RCS name found so far, and RCSerrno to its errno.
+ * Yield true if successful or if an unusual failure.
+ */
+{
+ int interesting, preferold;
+
+ /*
+ * We prefer an old name to that of a nonexisting new RCS file,
+ * unless we tried locking the old name and failed.
+ */
+ preferold = RCSbuf.string[0] && (mustread||frewrite);
+
+ finptr = (*rcsopen)(&RCSb, &RCSstat, mustread);
+ interesting = finptr || errno!=ENOENT;
+ if (interesting || !preferold) {
+ /* Use the new name. */
+ RCSerrno = errno;
+ bufscpy(&RCSbuf, RCSb.string);
+ }
+ return interesting;
+}
+
+ static int
+fin2open(d, dlen, base, baselen, x, xlen, rcsopen, mustread)
+ char const *d, *base, *x;
+ size_t dlen, baselen, xlen;
+ RILE *(*rcsopen)P((struct buf*,struct stat*,int));
+ int mustread;
+/*
+ * D is a directory name with length DLEN (including trailing slash).
+ * BASE is a filename with length BASELEN.
+ * X is an RCS filename suffix with length XLEN.
+ * Use RCSOPEN to open an RCS file; MUSTREAD is set if the file must be read.
+ * Yield true if successful.
+ * Try dRCS/basex first; if that fails and x is nonempty, try dbasex.
+ * Put these potential names in RCSb.
+ * Set RCSbuf to the best RCS name found so far, and RCSerrno to its errno.
+ * Yield true if successful or if an unusual failure.
+ */
+{
+ register char *p;
+
+ bufalloc(&RCSb, dlen + rcsdirlen + 1 + baselen + xlen + 1);
+
+ /* Try dRCS/basex. */
+ VOID memcpy(p = RCSb.string, d, dlen);
+ VOID memcpy(p += dlen, rcsdir, rcsdirlen);
+ p += rcsdirlen;
+ *p++ = SLASH;
+ VOID memcpy(p, base, baselen);
+ VOID memcpy(p += baselen, x, xlen);
+ p[xlen] = 0;
+ if (xlen) {
+ if (finopen(rcsopen, mustread))
+ return true;
+
+ /* Try dbasex. */
+ /* Start from scratch, because finopen() may have changed RCSb. */
+ VOID memcpy(p = RCSb.string, d, dlen);
+ VOID memcpy(p += dlen, base, baselen);
+ VOID memcpy(p += baselen, x, xlen);
+ p[xlen] = 0;
+ }
+ return finopen(rcsopen, mustread);
+}
+
+ int
+pairfilenames(argc, argv, rcsopen, mustread, quiet)
+ int argc;
+ char **argv;
+ RILE *(*rcsopen)P((struct buf*,struct stat*,int));
+ int mustread, quiet;
+/* Function: Pairs the filenames pointed to by argv; argc indicates
+ * how many there are.
+ * Places a pointer to the RCS filename into RCSfilename,
+ * and a pointer to the name of the working file into workfilename.
+ * If both the workfilename and the RCS filename are given, and workstdout
+ * is set, a warning is printed.
+ *
+ * If the RCS file exists, places its status into RCSstat.
+ *
+ * If the RCS file exists, it is RCSOPENed for reading, the file pointer
+ * is placed into finptr, and the admin-node is read in; returns 1.
+ * If the RCS file does not exist and MUSTREAD,
+ * print an error unless QUIET and return 0.
+ * Otherwise, initialize the admin node and return -1.
+ *
+ * 0 is returned on all errors, e.g. files that are not regular files.
+ */
+{
+ static struct buf tempbuf;
+
+ register char *p, *arg, *RCS1;
+ char const *purefname, *pureRCSname, *x;
+ int paired;
+ size_t arglen, dlen, baselen, xlen;
+
+ if (!(arg = *argv)) return 0; /* already paired filename */
+ if (*arg == '-') {
+ error("%s option is ignored after file names", arg);
+ return 0;
+ }
+
+ purefname = basename(arg);
+
+ /* Allocate buffer temporary to hold the default paired file name. */
+ p = arg;
+ for (;;) {
+ switch (*p++) {
+ /* Beware characters that cause havoc with ci -k. */
+ case KDELIM:
+ error("RCS file name `%s' contains %c", arg, KDELIM);
+ return 0;
+ case ' ': case '\n': case '\t':
+ error("RCS file name `%s' contains white space", arg);
+ return 0;
+ default:
+ continue;
+ case 0:
+ break;
+ }
+ break;
+ }
+
+ paired = false;
+
+ /* first check suffix to see whether it is an RCS file or not */
+ if ((x = rcssuffix(arg)))
+ {
+ /* RCS file name given*/
+ RCS1 = arg;
+ pureRCSname = purefname;
+ baselen = x - purefname;
+ if (
+ 1 < argc &&
+ !rcssuffix(workfilename = p = argv[1]) &&
+ baselen <= (arglen = strlen(p)) &&
+ ((p+=arglen-baselen) == workfilename || isSLASH(p[-1])) &&
+ memcmp(purefname, p, baselen) == 0
+ ) {
+ argv[1] = 0;
+ paired = true;
+ } else {
+ bufscpy(&tempbuf, purefname);
+ workfilename = p = tempbuf.string;
+ p[baselen] = 0;
+ }
+ } else {
+ /* working file given; now try to find RCS file */
+ workfilename = arg;
+ baselen = p - purefname - 1;
+ /* derive RCS file name*/
+ if (
+ 1 < argc &&
+ (x = rcssuffix(RCS1 = argv[1])) &&
+ baselen <= x - RCS1 &&
+ ((pureRCSname=x-baselen)==RCS1 || isSLASH(pureRCSname[-1])) &&
+ memcmp(purefname, pureRCSname, baselen) == 0
+ ) {
+ argv[1] = 0;
+ paired = true;
+ } else
+ pureRCSname = RCS1 = 0;
+ }
+ /* now we have a (tentative) RCS filename in RCS1 and workfilename */
+ /* Second, try to find the right RCS file */
+ if (pureRCSname!=RCS1) {
+ /* a path for RCSfile is given; single RCS file to look for */
+ bufscpy(&RCSbuf, RCS1);
+ finptr = (*rcsopen)(&RCSbuf, &RCSstat, mustread);
+ RCSerrno = errno;
+ } else {
+ bufscpy(&RCSbuf, "");
+ if (RCS1)
+ /* RCS file name was given without path. */
+ VOID fin2open(arg, (size_t)0, pureRCSname, baselen,
+ x, strlen(x), rcsopen, mustread
+ );
+ else {
+ /* No RCS file name was given. */
+ /* Try each suffix in turn. */
+ dlen = purefname-arg;
+ x = suffixes;
+ while (! fin2open(arg, dlen, purefname, baselen,
+ x, xlen=suffixlen(x), rcsopen, mustread
+ )) {
+ x += xlen;
+ if (!*x++)
+ break;
+ }
+ }
+ }
+ RCSfilename = p = RCSbuf.string;
+ if (finptr) {
+ if (!S_ISREG(RCSstat.st_mode)) {
+ error("%s isn't a regular file -- ignored", p);
+ return 0;
+ }
+ Lexinit(); getadmin();
+ } else {
+ if (RCSerrno!=ENOENT || mustread || !frewrite) {
+ if (RCSerrno == EEXIST)
+ error("RCS file %s is in use", p);
+ else if (!quiet || RCSerrno!=ENOENT)
+ enerror(RCSerrno, p);
+ return 0;
+ }
+ InitAdmin();
+ };
+# if LONG_NAMES_MAY_BE_SILENTLY_TRUNCATED
+ if (filenametoolong(p)) {
+ error("RCS file name %s is too long", p);
+ return 0;
+ }
+# ifndef NAME_MAX
+ /*
+ * Check workfilename too, even though it cannot be longer,
+ * because it may reside on a different filesystem.
+ */
+ if (filenametoolong(workfilename)) {
+ error("working file name %s is too long", workfilename);
+ return 0;
+ }
+# endif
+# endif
+
+ if (paired && workstdout)
+ warn("Option -p is set; ignoring output file %s",workfilename);
+
+ prevkeys = false;
+ return finptr ? 1 : -1;
+}
+
+
+ char const *
+getfullRCSname()
+/* Function: returns a pointer to the full path name of the RCS file.
+ * Gets the working directory's name at most once.
+ * Removes leading "../" and "./".
+ */
+{
+ static char const *wdptr;
+ static struct buf rcsbuf, wdbuf;
+ static size_t pathlength;
+
+ register char const *realname;
+ register size_t parentdirlength;
+ register unsigned dotdotcounter;
+ register char *d;
+ register char const *wd;
+
+ if (ROOTPATH(RCSfilename)) {
+ return(RCSfilename);
+ } else {
+ if (!(wd = wdptr)) {
+ /* Get working directory for the first time. */
+ if (!(d = cgetenv("PWD"))) {
+ bufalloc(&wdbuf, SIZEABLE_PATH + 1);
+# if !has_getcwd && has_getwd
+ d = getwd(wdbuf.string);
+# else
+ while (
+ !(d = getcwd(wdbuf.string, wdbuf.size))
+ && errno==ERANGE
+ )
+ bufalloc(&wdbuf, wdbuf.size<<1);
+# endif
+ if (!d)
+ efaterror("working directory");
+ }
+ parentdirlength = strlen(d);
+ while (parentdirlength && isSLASH(d[parentdirlength-1])) {
+ d[--parentdirlength] = 0;
+ /* Check needed because some getwd implementations */
+ /* generate "/" for the root. */
+ }
+ wdptr = wd = d;
+ pathlength = parentdirlength;
+ }
+ /*the following must be redone since RCSfilename may change*/
+ /* Find how many `../'s to remove from RCSfilename. */
+ dotdotcounter =0;
+ realname = RCSfilename;
+ while (realname[0]=='.') {
+ if (isSLASH(realname[1])) {
+ /* drop leading ./ */
+ realname += 2;
+ } else if (realname[1]=='.' && isSLASH(realname[2])) {
+ /* drop leading ../ and remember */
+ dotdotcounter++;
+ realname += 3;
+ } else
+ break;
+ }
+ /* Now remove dotdotcounter trailing directories from wd. */
+ parentdirlength = pathlength;
+ while (dotdotcounter && parentdirlength) {
+ /* move pointer backwards over trailing directory */
+ if (isSLASH(wd[--parentdirlength])) {
+ dotdotcounter--;
+ }
+ }
+ /* build full path name */
+ bufalloc(&rcsbuf, parentdirlength+strlen(realname)+2);
+ d = rcsbuf.string;
+ VOID memcpy(d, wd, parentdirlength);
+ d += parentdirlength;
+ *d++ = SLASH;
+ VOID strcpy(d, realname);
+ return rcsbuf.string;
+ }
+}
+
+#ifndef isSLASH
+ int
+isSLASH(c)
+ int c;
+{
+ switch (c) {
+ case SLASHes:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif
+
+
+#if !has_getcwd && !has_getwd
+
+ char *
+getcwd(path, size)
+ char *path;
+ size_t size;
+{
+ static char const usrbinpwd[] = "/usr/bin/pwd";
+# define binpwd (usrbinpwd+4)
+
+ register FILE *fp;
+ register int c;
+ register char *p, *lim;
+ int closeerrno, closeerror, e, fd[2], readerror, toolong, wstatus;
+ pid_t child;
+# if !has_waitpid
+ pid_t w;
+# endif
+
+ if (!size) {
+ errno = EINVAL;
+ return 0;
+ }
+ if (pipe(fd) != 0)
+ return 0;
+ if (!(child = vfork())) {
+ if (
+ close(fd[0]) == 0 &&
+ (fd[1] == STDOUT_FILENO ||
+# ifdef F_DUPFD
+ (VOID close(STDOUT_FILENO),
+ fcntl(fd[1], F_DUPFD, STDOUT_FILENO))
+# else
+ dup2(fd[1], STDOUT_FILENO)
+# endif
+ == STDOUT_FILENO &&
+ close(fd[1]) == 0
+ )
+ ) {
+ VOID close(STDERR_FILENO);
+ VOID execl(binpwd, binpwd, (char *)0);
+ VOID execl(usrbinpwd, usrbinpwd, (char *)0);
+ }
+ _exit(EXIT_FAILURE);
+ }
+ e = errno;
+ closeerror = close(fd[1]);
+ closeerrno = errno;
+ fp = 0;
+ readerror = toolong = wstatus = 0;
+ p = path;
+ if (0 <= child) {
+ fp = fdopen(fd[0], "r");
+ e = errno;
+ if (fp) {
+ lim = p + size;
+ for (p = path; ; *p++ = c) {
+ if ((c=getc(fp)) < 0) {
+ if (feof(fp))
+ break;
+ if (ferror(fp)) {
+ readerror = 1;
+ e = errno;
+ break;
+ }
+ }
+ if (p == lim) {
+ toolong = 1;
+ break;
+ }
+ }
+ }
+# if has_waitpid
+ if (waitpid(child, &wstatus, 0) < 0)
+ wstatus = 1;
+# else
+ do {
+ if ((w = wait(&wstatus)) < 0) {
+ wstatus = 1;
+ break;
+ }
+ } while (w != child);
+# endif
+ }
+ if (!fp) {
+ VOID close(fd[0]);
+ errno = e;
+ return 0;
+ }
+ if (fclose(fp) != 0)
+ return 0;
+ if (readerror) {
+ errno = e;
+ return 0;
+ }
+ if (closeerror) {
+ errno = closeerrno;
+ return 0;
+ }
+ if (toolong) {
+ errno = ERANGE;
+ return 0;
+ }
+ if (wstatus || p == path || *--p != '\n') {
+ errno = EACCES;
+ return 0;
+ }
+ *p = '\0';
+ return path;
+}
+#endif
+
+
+#ifdef PAIRTEST
+/* test program for pairfilenames() and getfullRCSname() */
+
+char const cmdid[] = "pair";
+
+main(argc, argv)
+int argc; char *argv[];
+{
+ int result;
+ int initflag;
+ quietflag = initflag = false;
+
+ while(--argc, ++argv, argc>=1 && ((*argv)[0] == '-')) {
+ switch ((*argv)[1]) {
+
+ case 'p': workstdout = stdout;
+ break;
+ case 'i': initflag=true;
+ break;
+ case 'q': quietflag=true;
+ break;
+ default: error("unknown option: %s", *argv);
+ break;
+ }
+ }
+
+ do {
+ RCSfilename=workfilename=nil;
+ result = pairfilenames(argc,argv,rcsreadopen,!initflag,quietflag);
+ if (result!=0) {
+ diagnose("RCS file: %s; working file: %s\nFull RCS file name: %s\n",
+ RCSfilename,workfilename,getfullRCSname()
+ );
+ }
+ switch (result) {
+ case 0: continue; /* already paired file */
+
+ case 1: if (initflag) {
+ error("RCS file %s exists already",RCSfilename);
+ } else {
+ diagnose("RCS file %s exists\n",RCSfilename);
+ }
+ Ifclose(finptr);
+ break;
+
+ case -1:diagnose("RCS file doesn't exist\n");
+ break;
+ }
+
+ } while (++argv, --argc>=1);
+
+}
+
+ exiting void
+exiterr()
+{
+ dirtempunlink();
+ tempunlink();
+ _exit(EXIT_FAILURE);
+}
+#endif
diff --git a/gnu/usr.bin/rcs/lib/rcsgen.c b/gnu/usr.bin/rcs/lib/rcsgen.c
new file mode 100644
index 000000000000..9a6072ea1b27
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcsgen.c
@@ -0,0 +1,432 @@
+/*
+ * RCS revision generation
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+/* $Log: rcsgen.c,v $
+ * Revision 5.10 1991/10/07 17:32:46 eggert
+ * Fix log bugs, e.g. ci -t/dev/null when has_mmap.
+ *
+ * Revision 5.9 1991/09/10 22:15:46 eggert
+ * Fix test for redirected stdin.
+ *
+ * Revision 5.8 1991/08/19 03:13:55 eggert
+ * Add piece tables. Tune.
+ *
+ * Revision 5.7 1991/04/21 11:58:24 eggert
+ * Add MS-DOS support.
+ *
+ * Revision 5.6 1990/12/27 19:54:26 eggert
+ * Fix bug: rcs -t inserted \n, making RCS file grow.
+ *
+ * Revision 5.5 1990/12/04 05:18:45 eggert
+ * Use -I for prompts and -q for diagnostics.
+ *
+ * Revision 5.4 1990/11/01 05:03:47 eggert
+ * Add -I and new -t behavior. Permit arbitrary data in logs.
+ *
+ * Revision 5.3 1990/09/21 06:12:43 hammer
+ * made putdesc() treat stdin the same whether or not it was from a terminal
+ * by making it recognize that a single '.' was then end of the
+ * description always
+ *
+ * Revision 5.2 1990/09/04 08:02:25 eggert
+ * Fix `co -p1.1 -ko' bug. Standardize yes-or-no procedure.
+ *
+ * Revision 5.1 1990/08/29 07:14:01 eggert
+ * Clean old log messages too.
+ *
+ * Revision 5.0 1990/08/22 08:12:52 eggert
+ * Remove compile-time limits; use malloc instead.
+ * Ansify and Posixate.
+ *
+ * Revision 4.7 89/05/01 15:12:49 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.6 88/08/28 14:59:10 eggert
+ * Shrink stdio code size; allow cc -R; remove lint; isatty() -> ttystdin()
+ *
+ * Revision 4.5 87/12/18 11:43:25 narten
+ * additional lint cleanups, and a bug fix from the 4.3BSD version that
+ * keeps "ci" from sticking a '\377' into the description if you run it
+ * with a zero-length file as the description. (Guy Harris)
+ *
+ * Revision 4.4 87/10/18 10:35:10 narten
+ * Updating version numbers. Changes relative to 1.1 actually relative to
+ * 4.2
+ *
+ * Revision 1.3 87/09/24 13:59:51 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:22:27 jenkins
+ * Port to suns
+ *
+ * Revision 4.2 83/12/02 23:01:39 wft
+ * merged 4.1 and 3.3.1.1 (clearerr(stdin)).
+ *
+ * Revision 4.1 83/05/10 16:03:33 wft
+ * Changed putamin() to abort if trying to reread redirected stdin.
+ * Fixed getdesc() to output a prompt on initial newline.
+ *
+ * Revision 3.3.1.1 83/10/19 04:21:51 lepreau
+ * Added clearerr(stdin) for re-reading description from stdin.
+ *
+ * Revision 3.3 82/11/28 21:36:49 wft
+ * 4.2 prerelease
+ *
+ * Revision 3.3 82/11/28 21:36:49 wft
+ * Replaced ferror() followed by fclose() with ffclose().
+ * Putdesc() now suppresses the prompts if stdin
+ * is not a terminal. A pointer to the current log message is now
+ * inserted into the corresponding delta, rather than leaving it in a
+ * global variable.
+ *
+ * Revision 3.2 82/10/18 21:11:26 wft
+ * I added checks for write errors during editing, and improved
+ * the prompt on putdesc().
+ *
+ * Revision 3.1 82/10/13 15:55:09 wft
+ * corrected type of variables assigned to by getc (char --> int)
+ */
+
+
+
+
+#include "rcsbase.h"
+
+libId(genId, "$Id: rcsgen.c,v 5.10 1991/10/07 17:32:46 eggert Exp $")
+
+int interactiveflag; /* Should we act as if stdin is a tty? */
+struct buf curlogbuf; /* buffer for current log message */
+
+enum stringwork { enter, copy, edit, expand, edit_expand };
+static void scandeltatext P((struct hshentry*,enum stringwork,int));
+
+
+
+
+ char const *
+buildrevision(deltas, target, outfile, expandflag)
+ struct hshentries const *deltas;
+ struct hshentry *target;
+ FILE *outfile;
+ int expandflag;
+/* Function: Generates the revision given by target
+ * by retrieving all deltas given by parameter deltas and combining them.
+ * If outfile is set, the revision is output to it,
+ * otherwise written into a temporary file.
+ * Temporary files are allocated by maketemp().
+ * if expandflag is set, keyword expansion is performed.
+ * Return nil if outfile is set, the name of the temporary file otherwise.
+ *
+ * Algorithm: Copy initial revision unchanged. Then edit all revisions but
+ * the last one into it, alternating input and output files (resultfile and
+ * editfile). The last revision is then edited in, performing simultaneous
+ * keyword substitution (this saves one extra pass).
+ * All this simplifies if only one revision needs to be generated,
+ * or no keyword expansion is necessary, or if output goes to stdout.
+ */
+{
+ if (deltas->first == target) {
+ /* only latest revision to generate */
+ openfcopy(outfile);
+ scandeltatext(target, expandflag?expand:copy, true);
+ if (outfile)
+ return 0;
+ else {
+ Ozclose(&fcopy);
+ return(resultfile);
+ }
+ } else {
+ /* several revisions to generate */
+ /* Get initial revision without keyword expansion. */
+ scandeltatext(deltas->first, enter, false);
+ while ((deltas=deltas->rest)->rest) {
+ /* do all deltas except last one */
+ scandeltatext(deltas->first, edit, false);
+ }
+ if (expandflag || outfile) {
+ /* first, get to beginning of file*/
+ finishedit((struct hshentry *)nil, outfile, false);
+ }
+ scandeltatext(deltas->first, expandflag?edit_expand:edit, true);
+ finishedit(
+ expandflag ? deltas->first : (struct hshentry*)nil,
+ outfile, true
+ );
+ if (outfile)
+ return 0;
+ Ozclose(&fcopy);
+ return resultfile;
+ }
+}
+
+
+
+ static void
+scandeltatext(delta, func, needlog)
+ struct hshentry * delta;
+ enum stringwork func;
+ int needlog;
+/* Function: Scans delta text nodes up to and including the one given
+ * by delta. For the one given by delta, the log message is saved into
+ * delta->log if needlog is set; func specifies how to handle the text.
+ * Assumes the initial lexeme must be read in first.
+ * Does not advance nexttok after it is finished.
+ */
+{
+ struct hshentry const *nextdelta;
+ struct cbuf cb;
+
+ for (;;) {
+ if (eoflex())
+ fatserror("can't find delta for revision %s", delta->num);
+ nextlex();
+ if (!(nextdelta=getnum())) {
+ fatserror("delta number corrupted");
+ }
+ getkeystring(Klog);
+ if (needlog && delta==nextdelta) {
+ cb = savestring(&curlogbuf);
+ delta->log = cleanlogmsg(curlogbuf.string, cb.size);
+ } else {readstring();
+ }
+ nextlex();
+ while (nexttok==ID && strcmp(NextString,Ktext)!=0)
+ ignorephrase();
+ getkeystring(Ktext);
+
+ if (delta==nextdelta)
+ break;
+ readstring(); /* skip over it */
+
+ }
+ switch (func) {
+ case enter: enterstring(); break;
+ case copy: copystring(); break;
+ case expand: xpandstring(delta); break;
+ case edit: editstring((struct hshentry *)nil); break;
+ case edit_expand: editstring(delta); break;
+ }
+}
+
+ struct cbuf
+cleanlogmsg(m, s)
+ char *m;
+ size_t s;
+{
+ register char *t = m;
+ register char const *f = t;
+ struct cbuf r;
+ while (s) {
+ --s;
+ if ((*t++ = *f++) == '\n')
+ while (m < --t)
+ if (t[-1]!=' ' && t[-1]!='\t') {
+ *t++ = '\n';
+ break;
+ }
+ }
+ while (m < t && (t[-1]==' ' || t[-1]=='\t' || t[-1]=='\n'))
+ --t;
+ r.string = m;
+ r.size = t - m;
+ return r;
+}
+
+
+int ttystdin()
+{
+ static int initialized;
+ if (!initialized) {
+ if (!interactiveflag)
+ interactiveflag = isatty(STDIN_FILENO);
+ initialized = true;
+ }
+ return interactiveflag;
+}
+
+ int
+getcstdin()
+{
+ register FILE *in;
+ register int c;
+
+ in = stdin;
+ if (feof(in) && ttystdin())
+ clearerr(in);
+ c = getc(in);
+ if (c < 0) {
+ testIerror(in);
+ if (feof(in) && ttystdin())
+ afputc('\n',stderr);
+ }
+ return c;
+}
+
+#if has_prototypes
+ int
+yesorno(int default_answer, char const *question, ...)
+#else
+ /*VARARGS2*/ int
+ yesorno(default_answer, question, va_alist)
+ int default_answer; char const *question; va_dcl
+#endif
+{
+ va_list args;
+ register int c, r;
+ if (!quietflag && ttystdin()) {
+ oflush();
+ vararg_start(args, question);
+ fvfprintf(stderr, question, args);
+ va_end(args);
+ eflush();
+ r = c = getcstdin();
+ while (c!='\n' && !feof(stdin))
+ c = getcstdin();
+ if (r=='y' || r=='Y')
+ return true;
+ if (r=='n' || r=='N')
+ return false;
+ }
+ return default_answer;
+}
+
+
+ void
+putdesc(textflag, textfile)
+ int textflag;
+ char *textfile;
+/* Function: puts the descriptive text into file frewrite.
+ * if finptr && !textflag, the text is copied from the old description.
+ * Otherwise, if the textfile!=nil, the text is read from that
+ * file, or from stdin, if textfile==nil.
+ * A textfile with a leading '-' is treated as a string, not a file name.
+ * If finptr, the old descriptive text is discarded.
+ * Always clears foutptr.
+ */
+{
+ static struct buf desc;
+ static struct cbuf desclean;
+
+ register FILE *txt;
+ register int c;
+ register FILE * frew;
+ register char *p;
+ register size_t s;
+ char const *plim;
+
+ frew = frewrite;
+ if (finptr && !textflag) {
+ /* copy old description */
+ aprintf(frew, "\n\n%s%c", Kdesc, nextc);
+ foutptr = frewrite;
+ getdesc(false);
+ foutptr = 0;
+ } else {
+ foutptr = 0;
+ /* get new description */
+ if (finptr) {
+ /*skip old description*/
+ getdesc(false);
+ }
+ aprintf(frew,"\n\n%s\n%c",Kdesc,SDELIM);
+ if (!textfile)
+ desclean = getsstdin(
+ "t-", "description",
+ "NOTE: This is NOT the log message!\n", &desc
+ );
+ else if (!desclean.string) {
+ if (*textfile == '-') {
+ p = textfile + 1;
+ s = strlen(p);
+ } else {
+ if (!(txt = fopen(textfile, "r")))
+ efaterror(textfile);
+ bufalloc(&desc, 1);
+ p = desc.string;
+ plim = p + desc.size;
+ for (;;) {
+ if ((c=getc(txt)) < 0) {
+ testIerror(txt);
+ if (feof(txt))
+ break;
+ }
+ if (plim <= p)
+ p = bufenlarge(&desc, &plim);
+ *p++ = c;
+ }
+ if (fclose(txt) != 0)
+ Ierror();
+ s = p - desc.string;
+ p = desc.string;
+ }
+ desclean = cleanlogmsg(p, s);
+ }
+ putstring(frew, false, desclean, true);
+ aputc('\n', frew);
+ }
+}
+
+ struct cbuf
+getsstdin(option, name, note, buf)
+ char const *option, *name, *note;
+ struct buf *buf;
+{
+ register int c;
+ register char *p;
+ register size_t i;
+ register int tty = ttystdin();
+
+ if (tty)
+ aprintf(stderr,
+ "enter %s, terminated with single '.' or end of file:\n%s>> ",
+ name, note
+ );
+ else if (feof(stdin))
+ faterror("can't reread redirected stdin for %s; use -%s<%s>",
+ name, option, name
+ );
+
+ for (
+ i = 0, p = 0;
+ c = getcstdin(), !feof(stdin);
+ bufrealloc(buf, i+1), p = buf->string, p[i++] = c
+ )
+ if (c == '\n')
+ if (i && p[i-1]=='.' && (i==1 || p[i-2]=='\n')) {
+ /* Remove trailing '.'. */
+ --i;
+ break;
+ } else if (tty)
+ aputs(">> ", stderr);
+ return cleanlogmsg(p, i);
+}
diff --git a/gnu/usr.bin/rcs/lib/rcskeep.c b/gnu/usr.bin/rcs/lib/rcskeep.c
new file mode 100644
index 000000000000..1a0c78f25cf5
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcskeep.c
@@ -0,0 +1,422 @@
+/*
+ * RCS keyword extraction
+ */
+/*****************************************************************************
+ * main routine: getoldkeys()
+ * Testprogram: define KEEPTEST
+ *****************************************************************************
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+/* $Log: rcskeep.c,v $
+ * Revision 5.4 1991/08/19 03:13:55 eggert
+ * Tune.
+ *
+ * Revision 5.3 1991/04/21 11:58:25 eggert
+ * Shorten names to keep them distinct on shortname hosts.
+ *
+ * Revision 5.2 1990/10/04 06:30:20 eggert
+ * Parse time zone offsets; future RCS versions may output them.
+ *
+ * Revision 5.1 1990/09/20 02:38:56 eggert
+ * ci -k now checks dates more thoroughly.
+ *
+ * Revision 5.0 1990/08/22 08:12:53 eggert
+ * Retrieve old log message if there is one.
+ * Don't require final newline.
+ * Remove compile-time limits; use malloc instead. Tune.
+ * Permit dates past 1999/12/31. Ansify and Posixate.
+ *
+ * Revision 4.6 89/05/01 15:12:56 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.5 88/08/09 19:13:03 eggert
+ * Remove lint and speed up by making FILE *fp local, not global.
+ *
+ * Revision 4.4 87/12/18 11:44:21 narten
+ * more lint cleanups (Guy Harris)
+ *
+ * Revision 4.3 87/10/18 10:35:50 narten
+ * Updating version numbers. Changes relative to 1.1 actually relative
+ * to 4.1
+ *
+ * Revision 1.3 87/09/24 14:00:00 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:22:29 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/05/10 16:26:44 wft
+ * Added new markers Id and RCSfile; extraction added.
+ * Marker matching with trymatch().
+ *
+ * Revision 3.2 82/12/24 12:08:26 wft
+ * added missing #endif.
+ *
+ * Revision 3.1 82/12/04 13:22:41 wft
+ * Initial revision.
+ *
+ */
+
+/*
+#define KEEPTEST
+*/
+/* Testprogram; prints out the keyword values found. */
+
+#include "rcsbase.h"
+
+libId(keepId, "$Id: rcskeep.c,v 5.4 1991/08/19 03:13:55 eggert Exp $")
+
+static int checknum P((char const*,int));
+static int getval P((RILE*,struct buf*,int));
+static int get0val P((int,RILE*,struct buf*,int));
+static int keepdate P((RILE*));
+static int keepid P((int,RILE*,struct buf*));
+static int keeprev P((RILE*));
+
+int prevkeys;
+struct buf prevauthor, prevdate, prevrev, prevstate;
+
+ int
+getoldkeys(fp)
+ register RILE *fp;
+/* Function: Tries to read keyword values for author, date,
+ * revision number, and state out of the file fp.
+ * If FNAME is nonnull, it is opened and closed instead of using FP.
+ * The results are placed into
+ * prevauthor, prevdate, prevrev, prevstate.
+ * Aborts immediately if it finds an error and returns false.
+ * If it returns true, it doesn't mean that any of the
+ * values were found; instead, check to see whether the corresponding arrays
+ * contain the empty string.
+ */
+{
+ register int c;
+ char keyword[keylength+1];
+ register char * tp;
+ int needs_closing;
+
+ if (prevkeys)
+ return true;
+
+ needs_closing = false;
+ if (!fp) {
+ if (!(fp = Iopen(workfilename, FOPEN_R_WORK, (struct stat*)0))) {
+ eerror(workfilename);
+ return false;
+ }
+ needs_closing = true;
+ }
+
+ /* initialize to empty */
+ bufscpy(&prevauthor, "");
+ bufscpy(&prevdate, "");
+ bufscpy(&prevrev, "");
+ bufscpy(&prevstate, "");
+
+ c = '\0'; /* anything but KDELIM */
+ for (;;) {
+ if ( c==KDELIM) {
+ do {
+ /* try to get keyword */
+ tp = keyword;
+ for (;;) {
+ Igeteof(fp, c, goto ok;);
+ switch (c) {
+ default:
+ if (keyword+keylength <= tp)
+ break;
+ *tp++ = c;
+ continue;
+
+ case '\n': case KDELIM: case VDELIM:
+ break;
+ }
+ break;
+ }
+ } while (c==KDELIM);
+ if (c!=VDELIM) continue;
+ *tp = c;
+ Igeteof(fp, c, break;);
+ switch (c) {
+ case ' ': case '\t': break;
+ default: continue;
+ }
+
+ switch (trymatch(keyword)) {
+ case Author:
+ if (!keepid(0, fp, &prevauthor))
+ return false;
+ c = 0;
+ break;
+ case Date:
+ if (!(c = keepdate(fp)))
+ return false;
+ break;
+ case Header:
+ case Id:
+ if (!(
+ getval(fp, (struct buf*)nil, false) &&
+ keeprev(fp) &&
+ (c = keepdate(fp)) &&
+ keepid(c, fp, &prevauthor) &&
+ keepid(0, fp, &prevstate)
+ ))
+ return false;
+ /* Skip either ``who'' (new form) or ``Locker: who'' (old). */
+ if (getval(fp, (struct buf*)nil, true) &&
+ getval(fp, (struct buf*)nil, true))
+ c = 0;
+ else if (nerror)
+ return false;
+ else
+ c = KDELIM;
+ break;
+ case Locker:
+ case Log:
+ case RCSfile:
+ case Source:
+ if (!getval(fp, (struct buf*)nil, false))
+ return false;
+ c = 0;
+ break;
+ case Revision:
+ if (!keeprev(fp))
+ return false;
+ c = 0;
+ break;
+ case State:
+ if (!keepid(0, fp, &prevstate))
+ return false;
+ c = 0;
+ break;
+ default:
+ continue;
+ }
+ if (!c)
+ Igeteof(fp, c, c=0;);
+ if (c != KDELIM) {
+ error("closing %c missing on keyword", KDELIM);
+ return false;
+ }
+ if (*prevauthor.string && *prevdate.string && *prevrev.string && *prevstate.string) {
+ break;
+ }
+ }
+ Igeteof(fp, c, break;);
+ }
+
+ ok:
+ if (needs_closing)
+ Ifclose(fp);
+ else
+ Irewind(fp);
+ prevkeys = true;
+ return true;
+}
+
+ static int
+badly_terminated()
+{
+ error("badly terminated keyword value");
+ return false;
+}
+
+ static int
+getval(fp, target, optional)
+ register RILE *fp;
+ struct buf *target;
+ int optional;
+/* Reads a keyword value from FP into TARGET.
+ * Returns true if one is found, false otherwise.
+ * Does not modify target if it is nil.
+ * Do not report an error if OPTIONAL is set and KDELIM is found instead.
+ */
+{
+ int c;
+ Igeteof(fp, c, return badly_terminated(););
+ return get0val(c, fp, target, optional);
+}
+
+ static int
+get0val(c, fp, target, optional)
+ register int c;
+ register RILE *fp;
+ struct buf *target;
+ int optional;
+/* Reads a keyword value from C+FP into TARGET, perhaps OPTIONALly.
+ * Same as getval, except C is the lookahead character.
+ */
+{ register char * tp;
+ char const *tlim;
+ register int got1;
+
+ if (target) {
+ bufalloc(target, 1);
+ tp = target->string;
+ tlim = tp + target->size;
+ } else
+ tlim = tp = 0;
+ got1 = false;
+ for (;;) {
+ switch (c) {
+ default:
+ got1 = true;
+ if (tp) {
+ *tp++ = c;
+ if (tlim <= tp)
+ tp = bufenlarge(target, &tlim);
+ }
+ break;
+
+ case ' ':
+ case '\t':
+ if (tp) {
+ *tp = 0;
+# ifdef KEEPTEST
+ VOID printf("getval: %s\n", target);
+# endif
+ }
+ if (!got1)
+ error("too much white space in keyword value");
+ return got1;
+
+ case KDELIM:
+ if (!got1 && optional)
+ return false;
+ /* fall into */
+ case '\n':
+ case 0:
+ return badly_terminated();
+ }
+ Igeteof(fp, c, return badly_terminated(););
+ }
+}
+
+
+ static int
+keepdate(fp)
+ RILE *fp;
+/* Function: reads a date prevdate; checks format
+ * Return 0 on error, lookahead character otherwise.
+ */
+{
+ struct buf prevday, prevtime, prevzone;
+ register char const *p;
+ register int c;
+
+ c = 0;
+ bufautobegin(&prevday);
+ if (getval(fp,&prevday,false)) {
+ bufautobegin(&prevtime);
+ if (getval(fp,&prevtime,false)) {
+ bufautobegin(&prevzone);
+ bufscpy(&prevzone, "");
+ Igeteof(fp, c, c=0;);
+ if (c=='-' || c=='+')
+ if (!get0val(c,fp,&prevzone,false))
+ c = 0;
+ else
+ Igeteof(fp, c, c=0;);
+ if (c) {
+ p = prevday.string;
+ bufalloc(&prevdate, strlen(p) + strlen(prevtime.string) + strlen(prevzone.string) + 5);
+ VOID sprintf(prevdate.string, "%s%s %s %s",
+ /* Parse dates put out by old versions of RCS. */
+ isdigit(p[0]) && isdigit(p[1]) && p[2]=='/' ? "19" : "",
+ p, prevtime.string, prevzone.string
+ );
+ }
+ bufautoend(&prevzone);
+ }
+ bufautoend(&prevtime);
+ }
+ bufautoend(&prevday);
+ return c;
+}
+
+ static int
+keepid(c, fp, b)
+ int c;
+ RILE *fp;
+ struct buf *b;
+/* Get previous identifier from C+FP into B. */
+{
+ if (!c)
+ Igeteof(fp, c, return false;);
+ if (!get0val(c, fp, b, false))
+ return false;
+ checksid(b->string);
+ return true;
+}
+
+ static int
+keeprev(fp)
+ RILE *fp;
+/* Get previous revision from FP into prevrev. */
+{
+ return getval(fp,&prevrev,false) && checknum(prevrev.string,-1);
+}
+
+
+ static int
+checknum(sp,fields)
+ register char const *sp;
+ int fields;
+{ register int dotcount;
+ dotcount=0;
+ while(*sp) {
+ if (*sp=='.') dotcount++;
+ else if (!isdigit(*sp)) return false;
+ sp++;
+ }
+ return fields<0 ? dotcount&1 : dotcount==fields;
+}
+
+
+
+#ifdef KEEPTEST
+
+char const cmdid[] ="keeptest";
+
+ int
+main(argc, argv)
+int argc; char *argv[];
+{
+ while (*(++argv)) {
+ workfilename = *argv;
+ getoldkeys((RILE*)0);
+ VOID printf("%s: revision: %s, date: %s, author: %s, state: %s\n",
+ *argv, prevrev.string, prevdate.string, prevauthor.string, prevstate.string);
+ }
+ exitmain(EXIT_SUCCESS);
+}
+#endif
diff --git a/gnu/usr.bin/rcs/lib/rcskeys.c b/gnu/usr.bin/rcs/lib/rcskeys.c
new file mode 100644
index 000000000000..82850a7311eb
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcskeys.c
@@ -0,0 +1,102 @@
+/*
+ * RCS keyword table and match operation
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+/* $Log: rcskeys.c,v $
+ * Revision 5.2 1991/08/19 03:13:55 eggert
+ * Say `T const' instead of `const T'; it's less confusing for pointer types.
+ * (This change was made in other source files too.)
+ *
+ * Revision 5.1 1991/04/21 11:58:25 eggert
+ * Don't put , just before } in initializer.
+ *
+ * Revision 5.0 1990/08/22 08:12:54 eggert
+ * Add -k. Ansify and Posixate.
+ *
+ * Revision 4.3 89/05/01 15:13:02 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.2 87/10/18 10:36:33 narten
+ * Updating version numbers. Changes relative to 1.1 actuallyt
+ * relative to 4.1
+ *
+ * Revision 1.2 87/09/24 14:00:10 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 4.1 83/05/04 10:06:53 wft
+ * Initial revision.
+ *
+ */
+
+
+#include "rcsbase.h"
+
+libId(keysId, "$Id: rcskeys.c,v 5.2 1991/08/19 03:13:55 eggert Exp $")
+
+
+char const *const Keyword[] = {
+ /* This must be in the same order as rcsbase.h's enum markers type. */
+ nil,
+ AUTHOR, DATE, HEADER, IDH,
+ LOCKER, LOG, RCSFILE, REVISION, SOURCE, STATE
+};
+
+
+
+ enum markers
+trymatch(string)
+ char const *string;
+/* function: Checks whether string starts with a keyword followed
+ * by a KDELIM or a VDELIM.
+ * If successful, returns the appropriate marker, otherwise Nomatch.
+ */
+{
+ register int j;
+ register char const *p, *s;
+ for (j = sizeof(Keyword)/sizeof(*Keyword); (--j); ) {
+ /* try next keyword */
+ p = Keyword[j];
+ s = string;
+ while (*p++ == *s++) {
+ if (!*p)
+ switch (*s) {
+ case KDELIM:
+ case VDELIM:
+ return (enum markers)j;
+ default:
+ return Nomatch;
+ }
+ }
+ }
+ return(Nomatch);
+}
+
diff --git a/gnu/usr.bin/rcs/lib/rcslex.c b/gnu/usr.bin/rcs/lib/rcslex.c
new file mode 100644
index 000000000000..51e31f3445c1
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcslex.c
@@ -0,0 +1,1241 @@
+/*
+ * RCS file input
+ */
+/*********************************************************************************
+ * Lexical Analysis.
+ * hashtable, Lexinit, nextlex, getlex, getkey,
+ * getid, getnum, readstring, printstring, savestring,
+ * checkid, fatserror, error, faterror, warn, diagnose
+ * Testprogram: define LEXDB
+ *********************************************************************************
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+/* $Log: rcslex.c,v $
+ * Revision 5.11 1991/11/03 03:30:44 eggert
+ * Fix porting bug to ancient hosts lacking vfprintf.
+ *
+ * Revision 5.10 1991/10/07 17:32:46 eggert
+ * Support piece tables even if !has_mmap.
+ *
+ * Revision 5.9 1991/09/24 00:28:42 eggert
+ * Don't export errsay().
+ *
+ * Revision 5.8 1991/08/19 03:13:55 eggert
+ * Add eoflex(), mmap support. Tune.
+ *
+ * Revision 5.7 1991/04/21 11:58:26 eggert
+ * Add MS-DOS support.
+ *
+ * Revision 5.6 1991/02/25 07:12:42 eggert
+ * Work around fputs bug. strsave -> str_save (DG/UX name clash)
+ *
+ * Revision 5.5 1990/12/04 05:18:47 eggert
+ * Use -I for prompts and -q for diagnostics.
+ *
+ * Revision 5.4 1990/11/19 20:05:28 hammer
+ * no longer gives warning about unknown keywords if -q is specified
+ *
+ * Revision 5.3 1990/11/01 05:03:48 eggert
+ * When ignoring unknown phrases, copy them to the output RCS file.
+ *
+ * Revision 5.2 1990/09/04 08:02:27 eggert
+ * Count RCS lines better.
+ *
+ * Revision 5.1 1990/08/29 07:14:03 eggert
+ * Work around buggy compilers with defective argument promotion.
+ *
+ * Revision 5.0 1990/08/22 08:12:55 eggert
+ * Remove compile-time limits; use malloc instead.
+ * Report errno-related errors with perror().
+ * Ansify and Posixate. Add support for ISO 8859.
+ * Use better hash function.
+ *
+ * Revision 4.6 89/05/01 15:13:07 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.5 88/08/28 15:01:12 eggert
+ * Don't loop when writing error messages to a full filesystem.
+ * Flush stderr/stdout when mixing output.
+ * Yield exit status compatible with diff(1).
+ * Shrink stdio code size; allow cc -R; remove lint.
+ *
+ * Revision 4.4 87/12/18 11:44:47 narten
+ * fixed to use "varargs" in "fprintf"; this is required if it is to
+ * work on a SPARC machine such as a Sun-4
+ *
+ * Revision 4.3 87/10/18 10:37:18 narten
+ * Updating version numbers. Changes relative to 1.1 actually relative
+ * to version 4.1
+ *
+ * Revision 1.3 87/09/24 14:00:17 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:22:33 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/03/25 18:12:51 wft
+ * Only changed $Header to $Id.
+ *
+ * Revision 3.3 82/12/10 16:22:37 wft
+ * Improved error messages, changed exit status on error to 1.
+ *
+ * Revision 3.2 82/11/28 21:27:10 wft
+ * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h.
+ * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations
+ * properly in case there is an IO-error (e.g., file system full).
+ *
+ * Revision 3.1 82/10/11 19:43:56 wft
+ * removed unused label out:;
+ * made sure all calls to getc() return into an integer, not a char.
+ */
+
+
+/*
+#define LEXDB
+*/
+/* version LEXDB is for testing the lexical analyzer. The testprogram
+ * reads a stream of lexemes, enters the revision numbers into the
+ * hashtable, and prints the recognized tokens. Keywords are recognized
+ * as identifiers.
+ */
+
+
+
+#include "rcsbase.h"
+
+libId(lexId, "$Id: rcslex.c,v 5.11 1991/11/03 03:30:44 eggert Exp $")
+
+static struct hshentry *nexthsh; /*pointer to next hash entry, set by lookup*/
+
+enum tokens nexttok; /*next token, set by nextlex */
+
+int hshenter; /*if true, next suitable lexeme will be entered */
+ /*into the symbol table. Handle with care. */
+int nextc; /*next input character, initialized by Lexinit */
+
+unsigned long rcsline; /*current line-number of input */
+int nerror; /*counter for errors */
+int quietflag; /*indicates quiet mode */
+RILE * finptr; /*input file descriptor */
+
+FILE * frewrite; /*file descriptor for echoing input */
+
+FILE * foutptr; /* copy of frewrite, but 0 to suppress echo */
+
+static struct buf tokbuf; /* token buffer */
+
+char const * NextString; /* next token */
+
+/*
+ * Our hash algorithm is h[0] = 0, h[i+1] = 4*h[i] + c,
+ * so hshsize should be odd.
+ * See B J McKenzie, R Harries & T Bell, Selecting a hashing algorithm,
+ * Software--practice & experience 20, 2 (Feb 1990), 209-224.
+ */
+#ifndef hshsize
+# define hshsize 511
+#endif
+
+static struct hshentry *hshtab[hshsize]; /*hashtable */
+
+static int ignored_phrases; /* have we ignored phrases in this RCS file? */
+
+ void
+warnignore()
+{
+ if (! (ignored_phrases|quietflag)) {
+ ignored_phrases = true;
+ warn("Unknown phrases like `%s ...;' are in the RCS file.", NextString);
+ }
+}
+
+
+
+ static void
+lookup(str)
+ char const *str;
+/* Function: Looks up the character string pointed to by str in the
+ * hashtable. If the string is not present, a new entry for it is created.
+ * In any case, the address of the corresponding hashtable entry is placed
+ * into nexthsh.
+ */
+{
+ register unsigned ihash; /* index into hashtable */
+ register char const *sp;
+ register struct hshentry *n, **p;
+
+ /* calculate hash code */
+ sp = str;
+ ihash = 0;
+ while (*sp)
+ ihash = (ihash<<2) + *sp++;
+ ihash %= hshsize;
+
+ for (p = &hshtab[ihash]; ; p = &n->nexthsh)
+ if (!(n = *p)) {
+ /* empty slot found */
+ *p = n = ftalloc(struct hshentry);
+ n->num = fstr_save(str);
+ n->nexthsh = nil;
+# ifdef LEXDB
+ VOID printf("\nEntered: %s at %u ", str, ihash);
+# endif
+ break;
+ } else if (strcmp(str, n->num) == 0)
+ /* match found */
+ break;
+ nexthsh = n;
+ NextString = n->num;
+}
+
+
+
+
+
+
+ void
+Lexinit()
+/* Function: Initialization of lexical analyzer:
+ * initializes the hashtable,
+ * initializes nextc, nexttok if finptr != 0
+ */
+{ register int c;
+
+ for (c = hshsize; 0 <= --c; ) {
+ hshtab[c] = nil;
+ }
+
+ nerror = 0;
+ if (finptr) {
+ foutptr = 0;
+ hshenter = true;
+ ignored_phrases = false;
+ rcsline = 1;
+ bufrealloc(&tokbuf, 2);
+ Iget(finptr, nextc);
+ nextlex(); /*initial token*/
+ }
+}
+
+
+
+
+
+
+
+ void
+nextlex()
+
+/* Function: Reads the next token and sets nexttok to the next token code.
+ * Only if hshenter is set, a revision number is entered into the
+ * hashtable and a pointer to it is placed into nexthsh.
+ * This is useful for avoiding that dates are placed into the hashtable.
+ * For ID's and NUM's, NextString is set to the character string.
+ * Assumption: nextc contains the next character.
+ */
+{ register c;
+ declarecache;
+ register FILE *frew;
+ register char * sp;
+ char const *limit;
+ register enum tokens d;
+ register RILE *fin;
+
+ fin=finptr; frew=foutptr;
+ setupcache(fin); cache(fin);
+ c = nextc;
+
+ for (;;) { switch ((d = ctab[c])) {
+
+ default:
+ fatserror("unknown character `%c'", c);
+ /*NOTREACHED*/
+
+ case NEWLN:
+ ++rcsline;
+# ifdef LEXDB
+ afputc('\n',stdout);
+# endif
+ /* Note: falls into next case */
+
+ case SPACE:
+ GETC(frew, c);
+ continue;
+
+ case DIGIT:
+ sp = tokbuf.string;
+ limit = sp + tokbuf.size;
+ *sp++ = c;
+ for (;;) {
+ GETC(frew, c);
+ if ((d=ctab[c])!=DIGIT && d!=PERIOD)
+ break;
+ *sp++ = c; /* 1.2. and 1.2 are different */
+ if (limit <= sp)
+ sp = bufenlarge(&tokbuf, &limit);
+ }
+ *sp = 0;
+ if (hshenter)
+ lookup(tokbuf.string);
+ else
+ NextString = fstr_save(tokbuf.string);
+ d = NUM;
+ break;
+
+
+ case LETTER:
+ case Letter:
+ sp = tokbuf.string;
+ limit = sp + tokbuf.size;
+ *sp++ = c;
+ for (;;) {
+ GETC(frew, c);
+ if ((d=ctab[c])!=LETTER && d!=Letter && d!=DIGIT && d!=IDCHAR)
+ break;
+ *sp++ = c;
+ if (limit <= sp)
+ sp = bufenlarge(&tokbuf, &limit);
+ }
+ *sp = 0;
+ NextString = fstr_save(tokbuf.string);
+ d = ID; /* may be ID or keyword */
+ break;
+
+ case SBEGIN: /* long string */
+ d = STRING;
+ /* note: only the initial SBEGIN has been read*/
+ /* read the string, and reset nextc afterwards*/
+ break;
+
+ case COLON:
+ case SEMI:
+ GETC(frew, c);
+ break;
+ } break; }
+ nextc = c;
+ nexttok = d;
+ uncache(fin);
+}
+
+ int
+eoflex()
+/*
+ * Yield true if we look ahead to the end of the input, false otherwise.
+ * nextc becomes undefined at end of file.
+ */
+{
+ register int c;
+ declarecache;
+ register FILE *fout;
+ register RILE *fin;
+
+ c = nextc;
+ fin = finptr;
+ fout = foutptr;
+ setupcache(fin); cache(fin);
+
+ for (;;) {
+ switch (ctab[c]) {
+ default:
+ nextc = c;
+ uncache(fin);
+ return false;
+
+ case NEWLN:
+ ++rcsline;
+ /* fall into */
+ case SPACE:
+ cachegeteof(c, {uncache(fin);return true;});
+ break;
+ }
+ if (fout)
+ aputc(c, fout);
+ }
+}
+
+
+int getlex(token)
+enum tokens token;
+/* Function: Checks if nexttok is the same as token. If so,
+ * advances the input by calling nextlex and returns true.
+ * otherwise returns false.
+ * Doesn't work for strings and keywords; loses the character string for ids.
+ */
+{
+ if (nexttok==token) {
+ nextlex();
+ return(true);
+ } else return(false);
+}
+
+ int
+getkeyopt(key)
+ char const *key;
+/* Function: If the current token is a keyword identical to key,
+ * advances the input by calling nextlex and returns true;
+ * otherwise returns false.
+ */
+{
+ if (nexttok==ID && strcmp(key,NextString) == 0) {
+ /* match found */
+ ffree1(NextString);
+ nextlex();
+ return(true);
+ }
+ return(false);
+}
+
+ void
+getkey(key)
+ char const *key;
+/* Check that the current input token is a keyword identical to key,
+ * and advance the input by calling nextlex.
+ */
+{
+ if (!getkeyopt(key))
+ fatserror("missing '%s' keyword", key);
+}
+
+ void
+getkeystring(key)
+ char const *key;
+/* Check that the current input token is a keyword identical to key,
+ * and advance the input by calling nextlex; then look ahead for a string.
+ */
+{
+ getkey(key);
+ if (nexttok != STRING)
+ fatserror("missing string after '%s' keyword", key);
+}
+
+
+ char const *
+getid()
+/* Function: Checks if nexttok is an identifier. If so,
+ * advances the input by calling nextlex and returns a pointer
+ * to the identifier; otherwise returns nil.
+ * Treats keywords as identifiers.
+ */
+{
+ register char const *name;
+ if (nexttok==ID) {
+ name = NextString;
+ nextlex();
+ return name;
+ } else return nil;
+}
+
+
+struct hshentry * getnum()
+/* Function: Checks if nexttok is a number. If so,
+ * advances the input by calling nextlex and returns a pointer
+ * to the hashtable entry. Otherwise returns nil.
+ * Doesn't work if hshenter is false.
+ */
+{
+ register struct hshentry * num;
+ if (nexttok==NUM) {
+ num=nexthsh;
+ nextlex();
+ return num;
+ } else return nil;
+}
+
+ struct cbuf
+getphrases(key)
+ char const *key;
+/* Get a series of phrases that do not start with KEY, yield resulting buffer.
+ * Stop when the next phrase starts with a token that is not an identifier,
+ * or is KEY.
+ * Assume !foutptr.
+ */
+{
+ declarecache;
+ register int c;
+ register char *p;
+ char const *limit;
+ register char const *ki, *kn;
+ struct cbuf r;
+ struct buf b;
+ register RILE *fin;
+
+ if (nexttok!=ID || strcmp(NextString,key) == 0) {
+ r.string = 0;
+ r.size = 0;
+ return r;
+ } else {
+ warnignore();
+ fin = finptr;
+ setupcache(fin); cache(fin);
+ bufautobegin(&b);
+ bufscpy(&b, NextString);
+ ffree1(NextString);
+ p = b.string + strlen(b.string);
+ limit = b.string + b.size;
+ c = nextc;
+ for (;;) {
+ for (;;) {
+ if (limit <= p)
+ p = bufenlarge(&b, &limit);
+ *p++ = c;
+ switch (ctab[c]) {
+ default:
+ fatserror("unknown character `%c'", c);
+ /*NOTREACHED*/
+ case NEWLN:
+ ++rcsline;
+ /* fall into */
+ case COLON: case DIGIT: case LETTER: case Letter:
+ case PERIOD: case SPACE:
+ cacheget(c);
+ continue;
+ case SBEGIN: /* long string */
+ for (;;) {
+ for (;;) {
+ if (limit <= p)
+ p = bufenlarge(&b, &limit);
+ cacheget(c);
+ *p++ = c;
+ switch (c) {
+ case '\n':
+ ++rcsline;
+ /* fall into */
+ default:
+ continue;
+
+ case SDELIM:
+ break;
+ }
+ break;
+ }
+ cacheget(c);
+ if (c != SDELIM)
+ break;
+ if (limit <= p)
+ p = bufenlarge(&b, &limit);
+ *p++ = c;
+ }
+ continue;
+ case SEMI:
+ cacheget(c);
+ if (ctab[c] == NEWLN) {
+ ++rcsline;
+ if (limit <= p)
+ p = bufenlarge(&b, &limit);
+ *p++ = c;
+ cacheget(c);
+ }
+ for (;;) {
+ switch (ctab[c]) {
+ case NEWLN:
+ ++rcsline;
+ /* fall into */
+ case SPACE:
+ cacheget(c);
+ continue;
+
+ default: break;
+ }
+ break;
+ }
+ break;
+ }
+ break;
+ }
+ switch (ctab[c]) {
+ case LETTER:
+ case Letter:
+ for (kn = key; c && *kn==c; kn++)
+ cacheget(c);
+ if (!*kn)
+ switch (ctab[c]) {
+ case DIGIT: case LETTER: case Letter:
+ break;
+ default:
+ nextc = c;
+ NextString = fstr_save(key);
+ nexttok = ID;
+ uncache(fin);
+ goto returnit;
+ }
+ for (ki=key; ki<kn; ) {
+ if (limit <= p)
+ p = bufenlarge(&b, &limit);
+ *p++ = *ki++;
+ }
+ break;
+
+ default:
+ nextc = c;
+ uncache(fin);
+ nextlex();
+ goto returnit;
+ }
+ }
+ returnit:
+ return bufremember(&b, (size_t)(p - b.string));
+ }
+}
+
+
+ void
+readstring()
+/* skip over characters until terminating single SDELIM */
+/* If foutptr is set, copy every character read to foutptr. */
+/* Does not advance nextlex at the end. */
+{ register c;
+ declarecache;
+ register FILE *frew;
+ register RILE *fin;
+ fin=finptr; frew=foutptr;
+ setupcache(fin); cache(fin);
+ for (;;) {
+ GETC(frew, c);
+ switch (c) {
+ case '\n':
+ ++rcsline;
+ break;
+
+ case SDELIM:
+ GETC(frew, c);
+ if (c != SDELIM) {
+ /* end of string */
+ nextc = c;
+ uncache(fin);
+ return;
+ }
+ break;
+ }
+ }
+}
+
+
+ void
+printstring()
+/* Function: copy a string to stdout, until terminated with a single SDELIM.
+ * Does not advance nextlex at the end.
+ */
+{
+ register c;
+ declarecache;
+ register FILE *fout;
+ register RILE *fin;
+ fin=finptr;
+ fout = stdout;
+ setupcache(fin); cache(fin);
+ for (;;) {
+ cacheget(c);
+ switch (c) {
+ case '\n':
+ ++rcsline;
+ break;
+ case SDELIM:
+ cacheget(c);
+ if (c != SDELIM) {
+ nextc=c;
+ uncache(fin);
+ return;
+ }
+ break;
+ }
+ aputc(c,fout);
+ }
+}
+
+
+
+ struct cbuf
+savestring(target)
+ struct buf *target;
+/* Copies a string terminated with SDELIM from file finptr to buffer target.
+ * Double SDELIM is replaced with SDELIM.
+ * If foutptr is set, the string is also copied unchanged to foutptr.
+ * Does not advance nextlex at the end.
+ * Yield a copy of *TARGET, except with exact length.
+ */
+{
+ register c;
+ declarecache;
+ register FILE *frew;
+ register char *tp;
+ register RILE *fin;
+ char const *limit;
+ struct cbuf r;
+
+ fin=finptr; frew=foutptr;
+ setupcache(fin); cache(fin);
+ tp = target->string; limit = tp + target->size;
+ for (;;) {
+ GETC(frew, c);
+ switch (c) {
+ case '\n':
+ ++rcsline;
+ break;
+ case SDELIM:
+ GETC(frew, c);
+ if (c != SDELIM) {
+ /* end of string */
+ nextc=c;
+ r.string = target->string;
+ r.size = tp - r.string;
+ uncache(fin);
+ return r;
+ }
+ break;
+ }
+ if (tp == limit)
+ tp = bufenlarge(target, &limit);
+ *tp++ = c;
+ }
+}
+
+
+ char *
+checkid(id, delimiter)
+ register char *id;
+ int delimiter;
+/* Function: check whether the string starting at id is an */
+/* identifier and return a pointer to the delimiter*/
+/* after the identifier. White space, delim and 0 */
+/* are legal delimiters. Aborts the program if not*/
+/* a legal identifier. Useful for checking commands*/
+/* If !delim, the only delimiter is 0. */
+{
+ register enum tokens d;
+ register char *temp;
+ register char c,tc;
+ register char delim = delimiter;
+
+ temp = id;
+ if ((d = ctab[(unsigned char)(c = *id)])==LETTER || d==Letter) {
+ while ((d = ctab[(unsigned char)(c = *++id)])==LETTER
+ || d==Letter || d==DIGIT || d==IDCHAR
+ )
+ ;
+ if (c && (!delim || c!=delim && c!=' ' && c!='\t' && c!='\n')) {
+ /* append \0 to end of id before error message */
+ tc = c;
+ while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
+ *id = '\0';
+ faterror("invalid character %c in identifier `%s'",tc,temp);
+ }
+ } else {
+ /* append \0 to end of id before error message */
+ while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
+ *id = '\0';
+ faterror("identifier `%s' doesn't start with letter", temp);
+ }
+ return id;
+}
+
+ void
+checksid(id)
+ char *id;
+/* Check whether the string ID is an identifier. */
+{
+ VOID checkid(id, 0);
+}
+
+
+ static RILE *
+#if has_mmap && large_memory
+fd2_RILE(fd, filename, status)
+#else
+fd2RILE(fd, filename, mode, status)
+ char const *mode;
+#endif
+ int fd;
+ char const *filename;
+ register struct stat *status;
+{
+ struct stat st;
+
+ if (!status)
+ status = &st;
+ if (fstat(fd, status) != 0)
+ efaterror(filename);
+ if (!S_ISREG(status->st_mode)) {
+ error("`%s' is not a regular file", filename);
+ VOID close(fd);
+ errno = EINVAL;
+ return 0;
+ } else {
+
+# if ! (has_mmap && large_memory)
+ FILE *stream;
+ if (!(stream = fdopen(fd, mode)))
+ efaterror(filename);
+# endif
+
+# if !large_memory
+ return stream;
+# else
+# define RILES 3
+ {
+ static RILE rilebuf[RILES];
+
+ register RILE *f;
+ size_t s = status->st_size;
+
+ if (s != status->st_size)
+ faterror("`%s' is enormous", filename);
+ for (f = rilebuf; f->base; f++)
+ if (f == rilebuf+RILES)
+ faterror("too many RILEs");
+ if (!s) {
+ static unsigned char dummy;
+ f->base = &dummy;
+ } else {
+# if has_mmap
+ if (
+ (f->base = (unsigned char *)mmap(
+ (caddr_t)0, s, PROT_READ, MAP_SHARED,
+ fd, (off_t)0
+ )) == (unsigned char *)-1
+ )
+ efaterror("mmap");
+# else
+ f->base = tnalloc(unsigned char, s);
+# endif
+ }
+ f->ptr = f->base;
+ f->lim = f->base + s;
+# if has_mmap
+ f->fd = fd;
+# else
+ f->readlim = f->base;
+ f->stream = stream;
+# endif
+ if_advise_access(s, f, MADV_SEQUENTIAL);
+ return f;
+ }
+# endif
+ }
+}
+
+#if !has_mmap && large_memory
+ int
+Igetmore(f)
+ register RILE *f;
+{
+ register fread_type r;
+ register size_t s = f->lim - f->readlim;
+
+ if (BUFSIZ < s)
+ s = BUFSIZ;
+ if (!(r = Fread(f->readlim, sizeof(*f->readlim), s, f->stream))) {
+ testIerror(f->stream);
+ f->lim = f->readlim; /* The file might have shrunk! */
+ return 0;
+ }
+ f->readlim += r;
+ return 1;
+}
+#endif
+
+#if has_madvise && has_mmap && large_memory
+ void
+advise_access(f, advice)
+ register RILE *f;
+ int advice;
+{
+ if (madvise((caddr_t)f->base, (size_t)(f->lim - f->base), advice) != 0)
+ efaterror("madvise");
+}
+#endif
+
+ RILE *
+#if has_mmap && large_memory
+I_open(filename, status)
+#else
+Iopen(filename, mode, status)
+ char const *mode;
+#endif
+ char const *filename;
+ struct stat *status;
+/* Open FILENAME for reading, yield its descriptor, and set *STATUS. */
+{
+ int fd;
+
+ if ((fd = open(filename,O_RDONLY|O_BINARY)) < 0)
+ return 0;
+# if has_mmap && large_memory
+ return fd2_RILE(fd, filename, status);
+# else
+ return fd2RILE(fd, filename, mode, status);
+# endif
+}
+
+
+#if !large_memory
+# define Iclose(f) fclose(f)
+#else
+ static int
+ Iclose(f)
+ register RILE *f;
+ {
+# if has_mmap
+ size_t s = f->lim - f->base;
+ if (s && munmap((caddr_t)f->base, s) != 0)
+ return -1;
+ f->base = 0;
+ return close(f->fd);
+# else
+ tfree(f->base);
+ f->base = 0;
+ return fclose(f->stream);
+# endif
+ }
+#endif
+
+
+static int Oerrloop;
+
+ exiting void
+Oerror()
+{
+ if (Oerrloop)
+ exiterr();
+ Oerrloop = true;
+ efaterror("output error");
+}
+
+exiting void Ieof() { fatserror("unexpected end of file"); }
+exiting void Ierror() { efaterror("input error"); }
+void testIerror(f) FILE *f; { if (ferror(f)) Ierror(); }
+void testOerror(o) FILE *o; { if (ferror(o)) Oerror(); }
+
+void Ifclose(f) RILE *f; { if (f && Iclose(f)!=0) Ierror(); }
+void Ofclose(f) FILE *f; { if (f && fclose(f)!=0) Oerror(); }
+void Izclose(p) RILE **p; { Ifclose(*p); *p = 0; }
+void Ozclose(p) FILE **p; { Ofclose(*p); *p = 0; }
+
+#if !large_memory
+ void
+testIeof(f)
+ FILE *f;
+{
+ testIerror(f);
+ if (feof(f))
+ Ieof();
+}
+void Irewind(f) FILE *f; { if (fseek(f,0L,SEEK_SET) != 0) Ierror(); }
+#endif
+
+void eflush()
+{
+ if (fflush(stderr) != 0 && !Oerrloop)
+ Oerror();
+}
+
+void oflush()
+{
+ if (fflush(workstdout ? workstdout : stdout) != 0 && !Oerrloop)
+ Oerror();
+}
+
+ static exiting void
+fatcleanup(already_newline)
+ int already_newline;
+{
+ VOID fprintf(stderr, already_newline+"\n%s aborted\n", cmdid);
+ exiterr();
+}
+
+static void errsay() { oflush(); aprintf(stderr,"%s error: ",cmdid); nerror++; }
+static void fatsay() { oflush(); VOID fprintf(stderr,"%s error: ",cmdid); }
+
+void eerror(s) char const *s; { enerror(errno,s); }
+
+ void
+enerror(e,s)
+ int e;
+ char const *s;
+{
+ errsay();
+ errno = e;
+ perror(s);
+ eflush();
+}
+
+exiting void efaterror(s) char const *s; { enfaterror(errno,s); }
+
+ exiting void
+enfaterror(e,s)
+ int e;
+ char const *s;
+{
+ fatsay();
+ errno = e;
+ perror(s);
+ fatcleanup(true);
+}
+
+#if has_prototypes
+ void
+error(char const *format,...)
+#else
+ /*VARARGS1*/ void error(format, va_alist) char const *format; va_dcl
+#endif
+/* non-fatal error */
+{
+ va_list args;
+ errsay();
+ vararg_start(args, format);
+ fvfprintf(stderr, format, args);
+ va_end(args);
+ afputc('\n',stderr);
+ eflush();
+}
+
+#if has_prototypes
+ exiting void
+fatserror(char const *format,...)
+#else
+ /*VARARGS1*/ exiting void
+ fatserror(format, va_alist) char const *format; va_dcl
+#endif
+/* fatal syntax error */
+{
+ va_list args;
+ oflush();
+ VOID fprintf(stderr, "%s: %s:%lu: ", cmdid, RCSfilename, rcsline);
+ vararg_start(args, format);
+ fvfprintf(stderr, format, args);
+ va_end(args);
+ fatcleanup(false);
+}
+
+#if has_prototypes
+ exiting void
+faterror(char const *format,...)
+#else
+ /*VARARGS1*/ exiting void faterror(format, va_alist)
+ char const *format; va_dcl
+#endif
+/* fatal error, terminates program after cleanup */
+{
+ va_list args;
+ fatsay();
+ vararg_start(args, format);
+ fvfprintf(stderr, format, args);
+ va_end(args);
+ fatcleanup(false);
+}
+
+#if has_prototypes
+ void
+warn(char const *format,...)
+#else
+ /*VARARGS1*/ void warn(format, va_alist) char const *format; va_dcl
+#endif
+/* prints a warning message */
+{
+ va_list args;
+ oflush();
+ aprintf(stderr,"%s warning: ",cmdid);
+ vararg_start(args, format);
+ fvfprintf(stderr, format, args);
+ va_end(args);
+ afputc('\n',stderr);
+ eflush();
+}
+
+ void
+redefined(c)
+ int c;
+{
+ warn("redefinition of -%c option", c);
+}
+
+#if has_prototypes
+ void
+diagnose(char const *format,...)
+#else
+ /*VARARGS1*/ void diagnose(format, va_alist) char const *format; va_dcl
+#endif
+/* prints a diagnostic message */
+/* Unlike the other routines, it does not append a newline. */
+/* This lets some callers suppress the newline, and is faster */
+/* in implementations that flush stderr just at the end of each printf. */
+{
+ va_list args;
+ if (!quietflag) {
+ oflush();
+ vararg_start(args, format);
+ fvfprintf(stderr, format, args);
+ va_end(args);
+ eflush();
+ }
+}
+
+
+
+ void
+afputc(c, f)
+/* Function: afputc(c,f) acts like aputc(c,f), but is smaller and slower.
+ */
+ int c;
+ register FILE *f;
+{
+ aputc(c,f);
+}
+
+
+ void
+aputs(s, iop)
+ char const *s;
+ FILE *iop;
+/* Function: Put string s on file iop, abort on error.
+ */
+{
+#if has_fputs
+ if (fputs(s, iop) < 0)
+ Oerror();
+#else
+ awrite(s, strlen(s), iop);
+#endif
+}
+
+
+
+ void
+#if has_prototypes
+fvfprintf(FILE *stream, char const *format, va_list args)
+#else
+ fvfprintf(stream,format,args) FILE *stream; char *format; va_list args;
+#endif
+/* like vfprintf, except abort program on error */
+{
+#if has_vfprintf
+ if (vfprintf(stream, format, args) < 0)
+#else
+# if has__doprintf
+ _doprintf(stream, format, args);
+# else
+# if has__doprnt
+ _doprnt(format, args, stream);
+# else
+ int *a = (int *)args;
+ VOID fprintf(stream, format,
+ a[0], a[1], a[2], a[3], a[4],
+ a[5], a[6], a[7], a[8], a[9]
+ );
+# endif
+# endif
+ if (ferror(stream))
+#endif
+ Oerror();
+}
+
+#if has_prototypes
+ void
+aprintf(FILE *iop, char const *fmt, ...)
+#else
+ /*VARARGS2*/ void
+aprintf(iop, fmt, va_alist)
+FILE *iop;
+char const *fmt;
+va_dcl
+#endif
+/* Function: formatted output. Same as fprintf in stdio,
+ * but aborts program on error
+ */
+{
+ va_list ap;
+ vararg_start(ap, fmt);
+ fvfprintf(iop, fmt, ap);
+ va_end(ap);
+}
+
+
+
+#ifdef LEXDB
+/* test program reading a stream of lexemes and printing the tokens.
+ */
+
+
+
+ int
+main(argc,argv)
+int argc; char * argv[];
+{
+ cmdid="lextest";
+ if (argc<2) {
+ aputs("No input file\n",stderr);
+ exitmain(EXIT_FAILURE);
+ }
+ if (!(finptr=Iopen(argv[1], FOPEN_R, (struct stat*)0))) {
+ faterror("can't open input file %s",argv[1]);
+ }
+ Lexinit();
+ while (!eoflex()) {
+ switch (nexttok) {
+
+ case ID:
+ VOID printf("ID: %s",NextString);
+ break;
+
+ case NUM:
+ if (hshenter)
+ VOID printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab);
+ else
+ VOID printf("NUM, unentered: %s",NextString);
+ hshenter = !hshenter; /*alternate between dates and numbers*/
+ break;
+
+ case COLON:
+ VOID printf("COLON"); break;
+
+ case SEMI:
+ VOID printf("SEMI"); break;
+
+ case STRING:
+ readstring();
+ VOID printf("STRING"); break;
+
+ case UNKN:
+ VOID printf("UNKN"); break;
+
+ default:
+ VOID printf("DEFAULT"); break;
+ }
+ VOID printf(" | ");
+ nextlex();
+ }
+ exitmain(EXIT_SUCCESS);
+}
+
+exiting void exiterr() { _exit(EXIT_FAILURE); }
+
+
+#endif
diff --git a/gnu/usr.bin/rcs/lib/rcsmap.c b/gnu/usr.bin/rcs/lib/rcsmap.c
new file mode 100644
index 000000000000..0e7b23c85f4a
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcsmap.c
@@ -0,0 +1,68 @@
+/* RCS map of character types */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+#include "rcsbase.h"
+
+libId(mapId, "$Id: rcsmap.c,v 5.2 1991/08/19 03:13:55 eggert Exp $")
+
+/* map of character types */
+/* ISO 8859/1 (Latin-1) */
+enum tokens const ctab[] = {
+ UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
+ SPACE, SPACE, NEWLN, SPACE, SPACE, SPACE, UNKN, UNKN,
+ UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
+ UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
+ SPACE, IDCHAR, IDCHAR, IDCHAR, DELIM, IDCHAR, IDCHAR, IDCHAR,
+ IDCHAR, IDCHAR, IDCHAR, IDCHAR, DELIM, IDCHAR, PERIOD, IDCHAR,
+ DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,
+ DIGIT, DIGIT, COLON, SEMI, IDCHAR, IDCHAR, IDCHAR, IDCHAR,
+ SBEGIN, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+ LETTER, LETTER, LETTER, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR,
+ IDCHAR, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
+ Letter, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
+ Letter, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
+ Letter, Letter, Letter, IDCHAR, IDCHAR, IDCHAR, IDCHAR, UNKN,
+ UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
+ UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
+ UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
+ UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
+ IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR,
+ IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR,
+ IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR,
+ IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR, IDCHAR,
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, IDCHAR,
+ LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, Letter,
+ Letter, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
+ Letter, Letter, Letter, Letter, Letter, Letter, Letter, Letter,
+ Letter, Letter, Letter, Letter, Letter, Letter, Letter, IDCHAR,
+ Letter, Letter, Letter, Letter, Letter, Letter, Letter, Letter
+};
diff --git a/gnu/usr.bin/rcs/lib/rcsrev.c b/gnu/usr.bin/rcs/lib/rcsrev.c
new file mode 100644
index 000000000000..ce11f549698d
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcsrev.c
@@ -0,0 +1,790 @@
+/*
+ * RCS revision number handling
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+
+/* $Log: rcsrev.c,v $
+ * Revision 5.3 1991/08/19 03:13:55 eggert
+ * Add `-r$', `-rB.'. Remove botches like `<now>' from messages. Tune.
+ *
+ * Revision 5.2 1991/04/21 11:58:28 eggert
+ * Add tiprev().
+ *
+ * Revision 5.1 1991/02/25 07:12:43 eggert
+ * Avoid overflow when comparing revision numbers.
+ *
+ * Revision 5.0 1990/08/22 08:13:43 eggert
+ * Remove compile-time limits; use malloc instead.
+ * Ansify and Posixate. Tune.
+ * Remove possibility of an internal error. Remove lint.
+ *
+ * Revision 4.5 89/05/01 15:13:22 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.4 87/12/18 11:45:22 narten
+ * more lint cleanups. Also, the NOTREACHED comment is no longer necessary,
+ * since there's now a return value there with a value. (Guy Harris)
+ *
+ * Revision 4.3 87/10/18 10:38:42 narten
+ * Updating version numbers. Changes relative to version 1.1 actually
+ * relative to 4.1
+ *
+ * Revision 1.3 87/09/24 14:00:37 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:22:37 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/03/25 21:10:45 wft
+ * Only changed $Header to $Id.
+ *
+ * Revision 3.4 82/12/04 13:24:08 wft
+ * Replaced getdelta() with gettree().
+ *
+ * Revision 3.3 82/11/28 21:33:15 wft
+ * fixed compartial() and compnum() for nil-parameters; fixed nils
+ * in error messages. Testprogram output shortenend.
+ *
+ * Revision 3.2 82/10/18 21:19:47 wft
+ * renamed compnum->cmpnum, compnumfld->cmpnumfld,
+ * numericrevno->numricrevno.
+ *
+ * Revision 3.1 82/10/11 19:46:09 wft
+ * changed expandsym() to check for source==nil; returns zero length string
+ * in that case.
+ */
+
+
+
+/*
+#define REVTEST
+*/
+/* version REVTEST is for testing the routines that generate a sequence
+ * of delta numbers needed to regenerate a given delta.
+ */
+
+#include "rcsbase.h"
+
+libId(revId, "$Id: rcsrev.c,v 5.3 1991/08/19 03:13:55 eggert Exp $")
+
+static char const *branchtip P((char const*));
+static struct hshentry *genbranch P((struct hshentry const*,char const*,unsigned,char const*,char const*,char const*,struct hshentries**));
+
+
+
+ unsigned
+countnumflds(s)
+ char const *s;
+/* Given a pointer s to a dotted number (date or revision number),
+ * countnumflds returns the number of digitfields in s.
+ */
+{
+ register char const *sp;
+ register unsigned count;
+ if ((sp=s)==nil) return(0);
+ if (*sp == '\0') return(0);
+ count = 1;
+ do {
+ if (*sp++ == '.') count++;
+ } while (*sp);
+ return(count);
+}
+
+ void
+getbranchno(revno,branchno)
+ char const *revno;
+ struct buf *branchno;
+/* Given a non-nil revision number revno, getbranchno copies the number of the branch
+ * on which revno is into branchno. If revno itself is a branch number,
+ * it is copied unchanged.
+ */
+{
+ register unsigned numflds;
+ register char *tp;
+
+ bufscpy(branchno, revno);
+ numflds=countnumflds(revno);
+ if (!(numflds & 1)) {
+ tp = branchno->string;
+ while (--numflds)
+ while (*tp++ != '.')
+ ;
+ *(tp-1)='\0';
+ }
+}
+
+
+
+int cmpnum(num1, num2)
+ char const *num1, *num2;
+/* compares the two dotted numbers num1 and num2 lexicographically
+ * by field. Individual fields are compared numerically.
+ * returns <0, 0, >0 if num1<num2, num1==num2, and num1>num2, resp.
+ * omitted fields are assumed to be higher than the existing ones.
+*/
+{
+ register char const *s1, *s2;
+ register size_t d1, d2;
+ register int r;
+
+ s1=num1==nil?"":num1;
+ s2=num2==nil?"":num2;
+
+ for (;;) {
+ /* Give precedence to shorter one. */
+ if (!*s1)
+ return (unsigned char)*s2;
+ if (!*s2)
+ return -1;
+
+ /* Strip leading zeros, then find number of digits. */
+ while (*s1=='0') ++s1; for (d1=0; isdigit(s1[d1]); d1++) ;
+ while (*s2=='0') ++s2; for (d2=0; isdigit(s2[d2]); d2++) ;
+
+ /* Do not convert to integer; it might overflow! */
+ if (d1 != d2)
+ return d1<d2 ? -1 : 1;
+ if ((r = memcmp(s1, s2, d1)))
+ return r;
+ s1 += d1;
+ s2 += d1;
+
+ /* skip '.' */
+ if (*s1) s1++;
+ if (*s2) s2++;
+ }
+}
+
+
+
+int cmpnumfld(num1, num2, fld)
+ char const *num1, *num2;
+ unsigned fld;
+/* Compare the two dotted numbers at field fld.
+ * num1 and num2 must have at least fld fields.
+ * fld must be positive.
+*/
+{
+ register char const *s1, *s2;
+ register size_t d1, d2;
+
+ s1 = num1;
+ s2 = num2;
+ /* skip fld-1 fields */
+ while (--fld) {
+ while (*s1++ != '.')
+ ;
+ while (*s2++ != '.')
+ ;
+ }
+ /* Now s1 and s2 point to the beginning of the respective fields */
+ while (*s1=='0') ++s1; for (d1=0; isdigit(s1[d1]); d1++) ;
+ while (*s2=='0') ++s2; for (d2=0; isdigit(s2[d2]); d2++) ;
+
+ return d1<d2 ? -1 : d1==d2 ? memcmp(s1,s2,d1) : 1;
+}
+
+
+ static void
+cantfindbranch(revno, date, author, state)
+ char const *revno, date[datesize], *author, *state;
+{
+ char datebuf[datesize];
+
+ error("No revision on branch %s has%s%s%s%s%s%s.",
+ revno,
+ date ? " a date before " : "",
+ date ? date2str(date,datebuf) : "",
+ author ? " and author "+(date?0:4) : "",
+ author ? author : "",
+ state ? " and state "+(date||author?0:4) : "",
+ state ? state : ""
+ );
+}
+
+ static void
+absent(revno, field)
+ char const *revno;
+ unsigned field;
+{
+ struct buf t;
+ bufautobegin(&t);
+ error("%s %s absent", field&1?"revision":"branch",
+ partialno(&t,revno,field)
+ );
+ bufautoend(&t);
+}
+
+
+ int
+compartial(num1, num2, length)
+ char const *num1, *num2;
+ unsigned length;
+
+/* compare the first "length" fields of two dot numbers;
+ the omitted field is considered to be larger than any number */
+/* restriction: at least one number has length or more fields */
+
+{
+ register char const *s1, *s2;
+ register size_t d1, d2;
+ register int r;
+
+ s1 = num1; s2 = num2;
+ if (!s1) return 1;
+ if (!s2) return -1;
+
+ for (;;) {
+ if (!*s1) return 1;
+ if (!*s2) return -1;
+
+ while (*s1=='0') ++s1; for (d1=0; isdigit(s1[d1]); d1++) ;
+ while (*s2=='0') ++s2; for (d2=0; isdigit(s2[d2]); d2++) ;
+
+ if (d1 != d2)
+ return d1<d2 ? -1 : 1;
+ if ((r = memcmp(s1, s2, d1)))
+ return r;
+ s1 += d1;
+ s2 += d1;
+
+ if (*s1 == '.') s1++;
+ if (*s2 == '.') s2++;
+
+ if ( --length == 0 ) return 0;
+ }
+}
+
+
+char * partialno(rev1,rev2,length)
+ struct buf *rev1;
+ char const *rev2;
+ register unsigned length;
+/* Function: Copies length fields of revision number rev2 into rev1.
+ * Return rev1's string.
+ */
+{
+ register char *r1;
+
+ bufscpy(rev1, rev2);
+ r1 = rev1->string;
+ while (length) {
+ while (*r1!='.' && *r1)
+ ++r1;
+ ++r1;
+ length--;
+ }
+ /* eliminate last '.'*/
+ *(r1-1)='\0';
+ return rev1->string;
+}
+
+
+
+
+ static void
+store1(store, next)
+ struct hshentries ***store;
+ struct hshentry *next;
+/*
+ * Allocate a new list node that addresses NEXT.
+ * Append it to the list that **STORE is the end pointer of.
+ */
+{
+ register struct hshentries *p;
+
+ p = ftalloc(struct hshentries);
+ p->first = next;
+ **store = p;
+ *store = &p->rest;
+}
+
+struct hshentry * genrevs(revno,date,author,state,store)
+ char const *revno, *date, *author, *state;
+ struct hshentries **store;
+/* Function: finds the deltas needed for reconstructing the
+ * revision given by revno, date, author, and state, and stores pointers
+ * to these deltas into a list whose starting address is given by store.
+ * The last delta (target delta) is returned.
+ * If the proper delta could not be found, nil is returned.
+ */
+{
+ unsigned length;
+ register struct hshentry * next;
+ int result;
+ char const *branchnum;
+ struct buf t;
+ char datebuf[datesize];
+
+ bufautobegin(&t);
+
+ if (!(next = Head)) {
+ error("RCS file empty");
+ goto norev;
+ }
+
+ length = countnumflds(revno);
+
+ if (length >= 1) {
+ /* at least one field; find branch exactly */
+ while ((result=cmpnumfld(revno,next->num,1)) < 0) {
+ store1(&store, next);
+ next = next->next;
+ if (!next) {
+ error("branch number %s too low", partialno(&t,revno,1));
+ goto norev;
+ }
+ }
+
+ if (result>0) {
+ absent(revno, 1);
+ goto norev;
+ }
+ }
+ if (length<=1){
+ /* pick latest one on given branch */
+ branchnum = next->num; /* works even for empty revno*/
+ while ((next!=nil) &&
+ (cmpnumfld(branchnum,next->num,1)==0) &&
+ !(
+ (date==nil?1:(cmpnum(date,next->date)>=0)) &&
+ (author==nil?1:(strcmp(author,next->author)==0)) &&
+ (state ==nil?1:(strcmp(state, next->state) ==0))
+ )
+ )
+ {
+ store1(&store, next);
+ next=next->next;
+ }
+ if ((next==nil) ||
+ (cmpnumfld(branchnum,next->num,1)!=0))/*overshot*/ {
+ cantfindbranch(
+ length ? revno : partialno(&t,branchnum,1),
+ date, author, state
+ );
+ goto norev;
+ } else {
+ store1(&store, next);
+ }
+ *store = nil;
+ return next;
+ }
+
+ /* length >=2 */
+ /* find revision; may go low if length==2*/
+ while ((result=cmpnumfld(revno,next->num,2)) < 0 &&
+ (cmpnumfld(revno,next->num,1)==0) ) {
+ store1(&store, next);
+ next = next->next;
+ if (!next)
+ break;
+ }
+
+ if ((next==nil) || (cmpnumfld(revno,next->num,1)!=0)) {
+ error("revision number %s too low", partialno(&t,revno,2));
+ goto norev;
+ }
+ if ((length>2) && (result!=0)) {
+ absent(revno, 2);
+ goto norev;
+ }
+
+ /* print last one */
+ store1(&store, next);
+
+ if (length>2)
+ return genbranch(next,revno,length,date,author,state,store);
+ else { /* length == 2*/
+ if ((date!=nil) && (cmpnum(date,next->date)<0)){
+ error("Revision %s has date %s.",
+ next->num,
+ date2str(next->date, datebuf)
+ );
+ return nil;
+ }
+ if ((author!=nil)&&(strcmp(author,next->author)!=0)) {
+ error("Revision %s has author %s.",next->num,next->author);
+ return nil;
+ }
+ if ((state!=nil)&&(strcmp(state,next->state)!=0)) {
+ error("Revision %s has state %s.",next->num,
+ next->state==nil?"<empty>":next->state);
+ return nil;
+ }
+ *store=nil;
+ return next;
+ }
+
+ norev:
+ bufautoend(&t);
+ return nil;
+}
+
+
+
+
+ static struct hshentry *
+genbranch(bpoint, revno, length, date, author, state, store)
+ struct hshentry const *bpoint;
+ char const *revno;
+ unsigned length;
+ char const *date, *author, *state;
+ struct hshentries **store;
+/* Function: given a branchpoint, a revision number, date, author, and state,
+ * genbranch finds the deltas necessary to reconstruct the given revision
+ * from the branch point on.
+ * Pointers to the found deltas are stored in a list beginning with store.
+ * revno must be on a side branch.
+ * return nil on error
+ */
+{
+ unsigned field;
+ register struct hshentry * next, * trail;
+ register struct branchhead const *bhead;
+ int result;
+ struct buf t;
+ char datebuf[datesize];
+
+ field = 3;
+ bhead = bpoint->branches;
+
+ do {
+ if (!bhead) {
+ bufautobegin(&t);
+ error("no side branches present for %s", partialno(&t,revno,field-1));
+ bufautoend(&t);
+ return nil;
+ }
+
+ /*find branch head*/
+ /*branches are arranged in increasing order*/
+ while (0 < (result=cmpnumfld(revno,bhead->hsh->num,field))) {
+ bhead = bhead->nextbranch;
+ if (!bhead) {
+ bufautobegin(&t);
+ error("branch number %s too high",partialno(&t,revno,field));
+ bufautoend(&t);
+ return nil;
+ }
+ }
+
+ if (result<0) {
+ absent(revno, field);
+ return nil;
+ }
+
+ next = bhead->hsh;
+ if (length==field) {
+ /* pick latest one on that branch */
+ trail=nil;
+ do { if ((date==nil?1:(cmpnum(date,next->date)>=0)) &&
+ (author==nil?1:(strcmp(author,next->author)==0)) &&
+ (state ==nil?1:(strcmp(state, next->state) ==0))
+ ) trail = next;
+ next=next->next;
+ } while (next!=nil);
+
+ if (trail==nil) {
+ cantfindbranch(revno, date, author, state);
+ return nil;
+ } else { /* print up to last one suitable */
+ next = bhead->hsh;
+ while (next!=trail) {
+ store1(&store, next);
+ next=next->next;
+ }
+ store1(&store, next);
+ }
+ *store = nil;
+ return next;
+ }
+
+ /* length > field */
+ /* find revision */
+ /* check low */
+ if (cmpnumfld(revno,next->num,field+1)<0) {
+ bufautobegin(&t);
+ error("revision number %s too low", partialno(&t,revno,field+1));
+ bufautoend(&t);
+ return(nil);
+ }
+ do {
+ store1(&store, next);
+ trail = next;
+ next = next->next;
+ } while ((next!=nil) &&
+ (cmpnumfld(revno,next->num,field+1) >=0));
+
+ if ((length>field+1) && /*need exact hit */
+ (cmpnumfld(revno,trail->num,field+1) !=0)){
+ absent(revno, field+1);
+ return(nil);
+ }
+ if (length == field+1) {
+ if ((date!=nil) && (cmpnum(date,trail->date)<0)){
+ error("Revision %s has date %s.",
+ trail->num,
+ date2str(trail->date, datebuf)
+ );
+ return nil;
+ }
+ if ((author!=nil)&&(strcmp(author,trail->author)!=0)) {
+ error("Revision %s has author %s.",trail->num,trail->author);
+ return nil;
+ }
+ if ((state!=nil)&&(strcmp(state,trail->state)!=0)) {
+ error("Revision %s has state %s.",trail->num,
+ trail->state==nil?"<empty>":trail->state);
+ return nil;
+ }
+ }
+ bhead = trail->branches;
+
+ } while ((field+=2) <= length);
+ * store = nil;
+ return trail;
+}
+
+
+ static char const *
+lookupsym(id)
+ char const *id;
+/* Function: looks up id in the list of symbolic names starting
+ * with pointer SYMBOLS, and returns a pointer to the corresponding
+ * revision number. Returns nil if not present.
+ */
+{
+ register struct assoc const *next;
+ next = Symbols;
+ while (next!=nil) {
+ if (strcmp(id, next->symbol)==0)
+ return next->num;
+ else next=next->nextassoc;
+ }
+ return nil;
+}
+
+int expandsym(source, target)
+ char const *source;
+ struct buf *target;
+/* Function: Source points to a revision number. Expandsym copies
+ * the number to target, but replaces all symbolic fields in the
+ * source number with their numeric values.
+ * Expand a branch followed by `.' to the latest revision on that branch.
+ * Ignore `.' after a revision. Remove leading zeros.
+ * returns false on error;
+ */
+{
+ return fexpandsym(source, target, (RILE*)0);
+}
+
+ int
+fexpandsym(source, target, fp)
+ char const *source;
+ struct buf *target;
+ RILE *fp;
+/* Same as expandsym, except if FP is nonzero, it is used to expand KDELIM. */
+{
+ register char const *sp, *bp;
+ register char *tp;
+ char const *tlim;
+ register enum tokens d;
+ unsigned dots;
+
+ sp = source;
+ bufalloc(target, 1);
+ tp = target->string;
+ if (!sp || !*sp) { /*accept nil pointer as a legal value*/
+ *tp='\0';
+ return true;
+ }
+ if (sp[0] == KDELIM && !sp[1]) {
+ if (!getoldkeys(fp))
+ return false;
+ if (!*prevrev.string) {
+ error("working file lacks revision number");
+ return false;
+ }
+ bufscpy(target, prevrev.string);
+ return true;
+ }
+ tlim = tp + target->size;
+ dots = 0;
+
+ for (;;) {
+ switch (ctab[(unsigned char)*sp]) {
+ case DIGIT:
+ while (*sp=='0' && isdigit(sp[1]))
+ /* skip leading zeroes */
+ sp++;
+ do {
+ if (tlim <= tp)
+ tp = bufenlarge(target, &tlim);
+ } while (isdigit(*tp++ = *sp++));
+ --sp;
+ tp[-1] = '\0';
+ break;
+
+ case LETTER:
+ case Letter:
+ {
+ register char *p = tp;
+ register size_t s = tp - target->string;
+ do {
+ if (tlim <= p)
+ p = bufenlarge(target, &tlim);
+ *p++ = *sp++;
+ } while ((d=ctab[(unsigned char)*sp])==LETTER ||
+ d==Letter || d==DIGIT ||
+ (d==IDCHAR));
+ if (tlim <= p)
+ p = bufenlarge(target, &tlim);
+ *p = 0;
+ tp = target->string + s;
+ }
+ bp = lookupsym(tp);
+ if (bp==nil) {
+ error("Symbolic number %s is undefined.", tp);
+ return false;
+ }
+ do {
+ if (tlim <= tp)
+ tp = bufenlarge(target, &tlim);
+ } while ((*tp++ = *bp++));
+ break;
+
+ default:
+ goto improper;
+ }
+ switch (*sp++) {
+ case '\0': return true;
+ case '.': break;
+ default: goto improper;
+ }
+ if (!*sp) {
+ if (dots & 1)
+ goto improper;
+ if (!(bp = branchtip(target->string)))
+ return false;
+ bufscpy(target, bp);
+ return true;
+ }
+ ++dots;
+ tp[-1] = '.';
+ }
+
+ improper:
+ error("improper revision number: %s", source);
+ return false;
+}
+
+ static char const *
+branchtip(branch)
+ char const *branch;
+{
+ struct hshentry *h;
+ struct hshentries *hs;
+
+ h = genrevs(branch, (char*)0, (char*)0, (char*)0, &hs);
+ return h ? h->num : (char const*)0;
+}
+
+ char const *
+tiprev()
+{
+ return Dbranch ? branchtip(Dbranch) : Head ? Head->num : (char const*)0;
+}
+
+
+
+#ifdef REVTEST
+
+char const cmdid[] = "revtest";
+
+ int
+main(argc,argv)
+int argc; char * argv[];
+{
+ static struct buf numricrevno;
+ char symrevno[100]; /* used for input of revision numbers */
+ char author[20];
+ char state[20];
+ char date[20];
+ struct hshentries *gendeltas;
+ struct hshentry * target;
+ int i;
+
+ if (argc<2) {
+ aputs("No input file\n",stderr);
+ exitmain(EXIT_FAILURE);
+ }
+ if (!(finptr=Iopen(argv[1], FOPEN_R, (struct stat*)0))) {
+ faterror("can't open input file %s", argv[1]);
+ }
+ Lexinit();
+ getadmin();
+
+ gettree();
+
+ getdesc(false);
+
+ do {
+ /* all output goes to stderr, to have diagnostics and */
+ /* errors in sequence. */
+ aputs("\nEnter revision number or <return> or '.': ",stderr);
+ if (!gets(symrevno)) break;
+ if (*symrevno == '.') break;
+ aprintf(stderr,"%s;\n",symrevno);
+ expandsym(symrevno,&numricrevno);
+ aprintf(stderr,"expanded number: %s; ",numricrevno.string);
+ aprintf(stderr,"Date: ");
+ gets(date); aprintf(stderr,"%s; ",date);
+ aprintf(stderr,"Author: ");
+ gets(author); aprintf(stderr,"%s; ",author);
+ aprintf(stderr,"State: ");
+ gets(state); aprintf(stderr, "%s;\n", state);
+ target = genrevs(numricrevno.string, *date?date:(char *)nil, *author?author:(char *)nil,
+ *state?state:(char*)nil, &gendeltas);
+ if (target!=nil) {
+ while (gendeltas) {
+ aprintf(stderr,"%s\n",gendeltas->first->num);
+ gendeltas = gendeltas->next;
+ }
+ }
+ } while (true);
+ aprintf(stderr,"done\n");
+ exitmain(EXIT_SUCCESS);
+}
+
+exiting void exiterr() { _exit(EXIT_FAILURE); }
+
+#endif
diff --git a/gnu/usr.bin/rcs/lib/rcssyn.c b/gnu/usr.bin/rcs/lib/rcssyn.c
new file mode 100644
index 000000000000..31086c292f0f
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcssyn.c
@@ -0,0 +1,857 @@
+/*
+ * RCS file input
+ */
+/*********************************************************************************
+ * Syntax Analysis.
+ * Keyword table
+ * Testprogram: define SYNTEST
+ * Compatibility with Release 2: define COMPAT2=1
+ *********************************************************************************
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+/* $Log: rcssyn.c,v $
+ * Revision 5.8 1991/08/19 03:13:55 eggert
+ * Tune.
+ *
+ * Revision 5.7 1991/04/21 11:58:29 eggert
+ * Disambiguate names on shortname hosts.
+ * Fix errno bug. Add MS-DOS support.
+ *
+ * Revision 5.6 1991/02/28 19:18:51 eggert
+ * Fix null termination bug in reporting keyword expansion.
+ *
+ * Revision 5.5 1991/02/25 07:12:44 eggert
+ * Check diff output more carefully; avoid overflow.
+ *
+ * Revision 5.4 1990/11/01 05:28:48 eggert
+ * When ignoring unknown phrases, copy them to the output RCS file.
+ * Permit arbitrary data in logs and comment leaders.
+ * Don't check for nontext on initial checkin.
+ *
+ * Revision 5.3 1990/09/20 07:58:32 eggert
+ * Remove the test for non-text bytes; it caused more pain than it cured.
+ *
+ * Revision 5.2 1990/09/04 08:02:30 eggert
+ * Parse RCS files with no revisions.
+ * Don't strip leading white space from diff commands. Count RCS lines better.
+ *
+ * Revision 5.1 1990/08/29 07:14:06 eggert
+ * Add -kkvl. Clean old log messages too.
+ *
+ * Revision 5.0 1990/08/22 08:13:44 eggert
+ * Try to parse future RCS formats without barfing.
+ * Add -k. Don't require final newline.
+ * Remove compile-time limits; use malloc instead.
+ * Don't output branch keyword if there's no default branch,
+ * because RCS version 3 doesn't understand it.
+ * Tune. Remove lint.
+ * Add support for ISO 8859. Ansify and Posixate.
+ * Check that a newly checked-in file is acceptable as input to 'diff'.
+ * Check diff's output.
+ *
+ * Revision 4.6 89/05/01 15:13:32 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.5 88/08/09 19:13:21 eggert
+ * Allow cc -R; remove lint.
+ *
+ * Revision 4.4 87/12/18 11:46:16 narten
+ * more lint cleanups (Guy Harris)
+ *
+ * Revision 4.3 87/10/18 10:39:36 narten
+ * Updating version numbers. Changes relative to 1.1 actually relative to
+ * 4.1
+ *
+ * Revision 1.3 87/09/24 14:00:49 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:22:40 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/03/28 11:38:49 wft
+ * Added parsing and printing of default branch.
+ *
+ * Revision 3.6 83/01/15 17:46:50 wft
+ * Changed readdelta() to initialize selector and log-pointer.
+ * Changed puttree to check for selector==DELETE; putdtext() uses DELNUMFORM.
+ *
+ * Revision 3.5 82/12/08 21:58:58 wft
+ * renamed Commentleader to Commleader.
+ *
+ * Revision 3.4 82/12/04 13:24:40 wft
+ * Added routine gettree(), which updates keeplock after reading the
+ * delta tree.
+ *
+ * Revision 3.3 82/11/28 21:30:11 wft
+ * Reading and printing of Suffix removed; version COMPAT2 skips the
+ * Suffix for files of release 2 format. Fixed problems with printing nil.
+ *
+ * Revision 3.2 82/10/18 21:18:25 wft
+ * renamed putdeltatext to putdtext.
+ *
+ * Revision 3.1 82/10/11 19:45:11 wft
+ * made sure getc() returns into an integer.
+ */
+
+
+
+/* version COMPAT2 reads files of the format of release 2 and 3, but
+ * generates files of release 3 format. Need not be defined if no
+ * old RCS files generated with release 2 exist.
+ */
+/* version SYNTEST inputs a RCS file and then prints out its internal
+ * data structures.
+*/
+
+#include "rcsbase.h"
+
+libId(synId, "$Id: rcssyn.c,v 5.8 1991/08/19 03:13:55 eggert Exp $")
+
+/* forward */
+static char const *getkeyval P((char const*,enum tokens,int));
+static int strn2expmode P((char const*,size_t));
+
+/* keyword table */
+
+char const
+ Kdesc[] = "desc",
+ Klog[] = "log",
+ Ktext[] = "text";
+
+static char const
+ Kaccess[] = "access",
+ Kauthor[] = "author",
+ Kbranch[] = "branch",
+ K_branches[]= "branches",
+ Kcomment[] = "comment",
+ Kdate[] = "date",
+ Kexpand[] = "expand",
+ Khead[] = "head",
+ Klocks[] = "locks",
+ Knext[] = "next",
+ Kstate[] = "state",
+ Kstrict[] = "strict",
+#if COMPAT2
+ Ksuffix[] = "suffix",
+#endif
+ Ksymbols[] = "symbols";
+
+static struct buf Commleader;
+static struct cbuf Ignored;
+struct cbuf Comment;
+struct access * AccessList;
+struct assoc * Symbols;
+struct lock * Locks;
+int Expand;
+int StrictLocks;
+struct hshentry * Head;
+char const * Dbranch;
+unsigned TotalDeltas;
+
+
+ static void
+getsemi(key)
+ char const *key;
+/* Get a semicolon to finish off a phrase started by KEY. */
+{
+ if (!getlex(SEMI))
+ fatserror("missing ';' after '%s'", key);
+}
+
+ static struct hshentry *
+getdnum()
+/* Get a delta number. */
+{
+ register struct hshentry *delta = getnum();
+ if (delta && countnumflds(delta->num)&1)
+ fatserror("%s isn't a delta number", delta->num);
+ return delta;
+}
+
+
+ void
+getadmin()
+/* Read an <admin> and initialize the appropriate global variables. */
+{
+ register char const *id;
+ struct access * newaccess;
+ struct assoc * newassoc;
+ struct lock * newlock;
+ struct hshentry * delta;
+ struct access **LastAccess;
+ struct assoc **LastSymbol;
+ struct lock **LastLock;
+ struct buf b;
+ struct cbuf cb;
+
+ TotalDeltas=0;
+
+ getkey(Khead);
+ Head = getdnum();
+ getsemi(Khead);
+
+ Dbranch = nil;
+ if (getkeyopt(Kbranch)) {
+ if ((delta = getnum()))
+ Dbranch = delta->num;
+ getsemi(Kbranch);
+ }
+
+
+#if COMPAT2
+ /* read suffix. Only in release 2 format */
+ if (getkeyopt(Ksuffix)) {
+ if (nexttok==STRING) {
+ readstring(); nextlex(); /* Throw away the suffix. */
+ } else if (nexttok==ID) {
+ nextlex();
+ }
+ getsemi(Ksuffix);
+ }
+#endif
+
+ getkey(Kaccess);
+ LastAccess = &AccessList;
+ while (id=getid()) {
+ newaccess = ftalloc(struct access);
+ newaccess->login = id;
+ *LastAccess = newaccess;
+ LastAccess = &newaccess->nextaccess;
+ }
+ *LastAccess = nil;
+ getsemi(Kaccess);
+
+ getkey(Ksymbols);
+ LastSymbol = &Symbols;
+ while (id = getid()) {
+ if (!getlex(COLON))
+ fatserror("missing ':' in symbolic name definition");
+ if (!(delta=getnum())) {
+ fatserror("missing number in symbolic name definition");
+ } else { /*add new pair to association list*/
+ newassoc = ftalloc(struct assoc);
+ newassoc->symbol=id;
+ newassoc->num = delta->num;
+ *LastSymbol = newassoc;
+ LastSymbol = &newassoc->nextassoc;
+ }
+ }
+ *LastSymbol = nil;
+ getsemi(Ksymbols);
+
+ getkey(Klocks);
+ LastLock = &Locks;
+ while (id = getid()) {
+ if (!getlex(COLON))
+ fatserror("missing ':' in lock");
+ if (!(delta=getdnum())) {
+ fatserror("missing number in lock");
+ } else { /*add new pair to lock list*/
+ newlock = ftalloc(struct lock);
+ newlock->login=id;
+ newlock->delta=delta;
+ *LastLock = newlock;
+ LastLock = &newlock->nextlock;
+ }
+ }
+ *LastLock = nil;
+ getsemi(Klocks);
+
+ if ((StrictLocks = getkeyopt(Kstrict)))
+ getsemi(Kstrict);
+
+ Comment.size = 0;
+ if (getkeyopt(Kcomment)) {
+ if (nexttok==STRING) {
+ Comment = savestring(&Commleader);
+ nextlex();
+ }
+ getsemi(Kcomment);
+ }
+
+ Expand = KEYVAL_EXPAND;
+ if (getkeyopt(Kexpand)) {
+ if (nexttok==STRING) {
+ bufautobegin(&b);
+ cb = savestring(&b);
+ if ((Expand = strn2expmode(cb.string,cb.size)) < 0)
+ fatserror("unknown expand mode %.*s",
+ (int)cb.size, cb.string
+ );
+ bufautoend(&b);
+ nextlex();
+ }
+ getsemi(Kexpand);
+ }
+ Ignored = getphrases(Kdesc);
+}
+
+char const *const expand_names[] = {
+ /* These must agree with *_EXPAND in rcsbase.h. */
+ "kv","kvl","k","v","o",
+ 0
+};
+
+ int
+str2expmode(s)
+ char const *s;
+/* Yield expand mode corresponding to S, or -1 if bad. */
+{
+ return strn2expmode(s, strlen(s));
+}
+
+ static int
+strn2expmode(s, n)
+ char const *s;
+ size_t n;
+{
+ char const *const *p;
+
+ for (p = expand_names; *p; ++p)
+ if (memcmp(*p,s,n) == 0 && !(*p)[n])
+ return p - expand_names;
+ return -1;
+}
+
+
+ void
+ignorephrase()
+/* Ignore a phrase introduced by a later version of RCS. */
+{
+ warnignore();
+ hshenter=false;
+ for (;;) {
+ switch (nexttok) {
+ case SEMI: hshenter=true; nextlex(); return;
+ case ID:
+ case NUM: ffree1(NextString); break;
+ case STRING: readstring(); break;
+ default: break;
+ }
+ nextlex();
+ }
+}
+
+
+ static int
+getdelta()
+/* Function: reads a delta block.
+ * returns false if the current block does not start with a number.
+ */
+{
+ register struct hshentry * Delta, * num;
+ struct branchhead **LastBranch, *NewBranch;
+
+ if (!(Delta = getdnum()))
+ return false;
+
+ hshenter = false; /*Don't enter dates into hashtable*/
+ Delta->date = getkeyval(Kdate, NUM, false);
+ hshenter=true; /*reset hshenter for revision numbers.*/
+
+ Delta->author = getkeyval(Kauthor, ID, false);
+
+ Delta->state = getkeyval(Kstate, ID, true);
+
+ getkey(K_branches);
+ LastBranch = &Delta->branches;
+ while ((num = getdnum())) {
+ NewBranch = ftalloc(struct branchhead);
+ NewBranch->hsh = num;
+ *LastBranch = NewBranch;
+ LastBranch = &NewBranch->nextbranch;
+ }
+ *LastBranch = nil;
+ getsemi(K_branches);
+
+ getkey(Knext);
+ Delta->next = num = getdnum();
+ getsemi(Knext);
+ Delta->lockedby = nil;
+ Delta->log.string = 0;
+ Delta->selector = true;
+ Delta->ig = getphrases(Kdesc);
+ TotalDeltas++;
+ return (true);
+}
+
+
+ void
+gettree()
+/* Function: Reads in the delta tree with getdelta(), then
+ * updates the lockedby fields.
+ */
+{
+ struct lock const *currlock;
+
+ while (getdelta());
+ currlock=Locks;
+ while (currlock) {
+ currlock->delta->lockedby = currlock->login;
+ currlock = currlock->nextlock;
+ }
+}
+
+
+ void
+getdesc(prdesc)
+int prdesc;
+/* Function: read in descriptive text
+ * nexttok is not advanced afterwards.
+ * If prdesc is set, the text is printed to stdout.
+ */
+{
+
+ getkeystring(Kdesc);
+ if (prdesc)
+ printstring(); /*echo string*/
+ else readstring(); /*skip string*/
+}
+
+
+
+
+
+
+ static char const *
+getkeyval(keyword, token, optional)
+ char const *keyword;
+ enum tokens token;
+ int optional;
+/* reads a pair of the form
+ * <keyword> <token> ;
+ * where token is one of <id> or <num>. optional indicates whether
+ * <token> is optional. A pointer to
+ * the actual character string of <id> or <num> is returned.
+ */
+{
+ register char const *val = nil;
+
+ getkey(keyword);
+ if (nexttok==token) {
+ val = NextString;
+ nextlex();
+ } else {
+ if (!optional)
+ fatserror("missing %s", keyword);
+ }
+ getsemi(keyword);
+ return(val);
+}
+
+
+
+
+ void
+putadmin(fout)
+register FILE * fout;
+/* Function: Print the <admin> node read with getadmin() to file fout.
+ * Assumption: Variables AccessList, Symbols, Locks, StrictLocks,
+ * and Head have been set.
+ */
+{
+ struct assoc const *curassoc;
+ struct lock const *curlock;
+ struct access const *curaccess;
+
+ aprintf(fout, "%s\t%s;\n", Khead, Head?Head->num:"");
+ if (Dbranch && VERSION(4)<=RCSversion)
+ aprintf(fout, "%s\t%s;\n", Kbranch, Dbranch);
+
+ aputs(Kaccess, fout);
+ curaccess = AccessList;
+ while (curaccess) {
+ aprintf(fout, "\n\t%s", curaccess->login);
+ curaccess = curaccess->nextaccess;
+ }
+ aprintf(fout, ";\n%s", Ksymbols);
+ curassoc = Symbols;
+ while (curassoc) {
+ aprintf(fout, "\n\t%s:%s", curassoc->symbol, curassoc->num);
+ curassoc = curassoc->nextassoc;
+ }
+ aprintf(fout, ";\n%s", Klocks);
+ curlock = Locks;
+ while (curlock) {
+ aprintf(fout, "\n\t%s:%s", curlock->login, curlock->delta->num);
+ curlock = curlock->nextlock;
+ }
+ if (StrictLocks) aprintf(fout, "; %s", Kstrict);
+ aprintf(fout, ";\n");
+ if (Comment.size) {
+ aprintf(fout, "%s\t", Kcomment);
+ putstring(fout, true, Comment, false);
+ aprintf(fout, ";\n");
+ }
+ if (Expand != KEYVAL_EXPAND)
+ aprintf(fout, "%s\t%c%s%c;\n",
+ Kexpand, SDELIM, expand_names[Expand], SDELIM
+ );
+ awrite(Ignored.string, Ignored.size, fout);
+ aputc('\n', fout);
+}
+
+
+
+
+ static void
+putdelta(node,fout)
+register struct hshentry const *node;
+register FILE * fout;
+/* Function: prints a <delta> node to fout;
+ */
+{
+ struct branchhead const *nextbranch;
+
+ if (node == nil) return;
+
+ aprintf(fout, "\n%s\n%s\t%s;\t%s %s;\t%s %s;\nbranches",
+ node->num,
+ Kdate, node->date,
+ Kauthor, node->author,
+ Kstate, node->state?node->state:""
+ );
+ nextbranch = node->branches;
+ while (nextbranch) {
+ aprintf(fout, "\n\t%s", nextbranch->hsh->num);
+ nextbranch = nextbranch->nextbranch;
+ }
+
+ aprintf(fout, ";\n%s\t%s;\n", Knext, node->next?node->next->num:"");
+ awrite(node->ig.string, node->ig.size, fout);
+}
+
+
+
+
+ void
+puttree(root,fout)
+struct hshentry const *root;
+register FILE * fout;
+/* Function: prints the delta tree in preorder to fout, starting with root.
+ */
+{
+ struct branchhead const *nextbranch;
+
+ if (root==nil) return;
+
+ if (root->selector)
+ putdelta(root,fout);
+
+ puttree(root->next,fout);
+
+ nextbranch = root->branches;
+ while (nextbranch) {
+ puttree(nextbranch->hsh,fout);
+ nextbranch = nextbranch->nextbranch;
+ }
+}
+
+
+ static exiting void
+unexpected_EOF()
+{
+ faterror("unexpected EOF in diff output");
+}
+
+int putdtext(num,log,srcfilename,fout,diffmt)
+ char const *num, *srcfilename;
+ struct cbuf log;
+ FILE *fout;
+ int diffmt;
+/* Function: write a deltatext-node to fout.
+ * num points to the deltanumber, log to the logmessage, and
+ * sourcefile contains the text. Doubles up all SDELIMs in both the
+ * log and the text; Makes sure the log message ends in \n.
+ * returns false on error.
+ * If diffmt is true, also checks that text is valid diff -n output.
+ */
+{
+ RILE *fin;
+ int result;
+ if (!(fin = Iopen(srcfilename, "r", (struct stat*)0))) {
+ eerror(srcfilename);
+ return false;
+ }
+ result = putdftext(num,log,fin,fout,diffmt);
+ Ifclose(fin);
+ return result;
+}
+
+ void
+putstring(out, delim, s, log)
+ register FILE *out;
+ struct cbuf s;
+ int delim, log;
+/*
+ * Output to OUT one SDELIM if DELIM, then the string S with SDELIMs doubled.
+ * If LOG is set then S is a log string; append a newline if S is nonempty.
+ */
+{
+ register char const *sp;
+ register size_t ss;
+
+ if (delim)
+ aputc(SDELIM, out);
+ sp = s.string;
+ for (ss = s.size; ss; --ss) {
+ if (*sp == SDELIM)
+ aputc(SDELIM, out);
+ aputc(*sp++, out);
+ }
+ if (s.size && log)
+ aputc('\n', out);
+ aputc(SDELIM, out);
+}
+
+ int
+putdftext(num,log,finfile,foutfile,diffmt)
+ char const *num;
+ struct cbuf log;
+ RILE *finfile;
+ FILE *foutfile;
+ int diffmt;
+/* like putdtext(), except the source file is already open */
+{
+ declarecache;
+ register FILE *fout;
+ register int c;
+ register RILE *fin;
+ int ed;
+ struct diffcmd dc;
+
+ fout = foutfile;
+ aprintf(fout,DELNUMFORM,num,Klog);
+ /* put log */
+ putstring(fout, true, log, true);
+ /* put text */
+ aprintf(fout, "\n%s\n%c", Ktext, SDELIM);
+ fin = finfile;
+ setupcache(fin);
+ if (!diffmt) {
+ /* Copy the file */
+ cache(fin);
+ for (;;) {
+ cachegeteof(c, break;);
+ if (c==SDELIM) aputc(SDELIM,fout); /*double up SDELIM*/
+ aputc(c,fout);
+ }
+ } else {
+ initdiffcmd(&dc);
+ while (0 <= (ed = getdiffcmd(fin,false,fout,&dc)))
+ if (ed) {
+ cache(fin);
+ while (dc.nlines--)
+ do {
+ cachegeteof(c, { if (!dc.nlines) goto OK_EOF; unexpected_EOF(); });
+ if (c == SDELIM)
+ aputc(SDELIM,fout);
+ aputc(c,fout);
+ } while (c != '\n');
+ uncache(fin);
+ }
+ }
+ OK_EOF:
+ aprintf(fout, "%c\n", SDELIM);
+ return true;
+}
+
+ void
+initdiffcmd(dc)
+ register struct diffcmd *dc;
+/* Initialize *dc suitably for getdiffcmd(). */
+{
+ dc->adprev = 0;
+ dc->dafter = 0;
+}
+
+ static exiting void
+badDiffOutput(buf)
+ char const *buf;
+{
+ faterror("bad diff output line: %s", buf);
+}
+
+ static exiting void
+diffLineNumberTooLarge(buf)
+ char const *buf;
+{
+ faterror("diff line number too large: %s", buf);
+}
+
+ int
+getdiffcmd(finfile, delimiter, foutfile, dc)
+ RILE *finfile;
+ FILE *foutfile;
+ int delimiter;
+ struct diffcmd *dc;
+/* Get a editing command output by 'diff -n' from fin.
+ * The input is delimited by SDELIM if delimiter is set, EOF otherwise.
+ * Copy a clean version of the command to fout (if nonnull).
+ * Yield 0 for 'd', 1 for 'a', and -1 for EOF.
+ * Store the command's line number and length into dc->line1 and dc->nlines.
+ * Keep dc->adprev and dc->dafter up to date.
+ */
+{
+ register int c;
+ declarecache;
+ register FILE *fout;
+ register char *p;
+ register RILE *fin;
+ unsigned long line1, nlines, t;
+ char buf[BUFSIZ];
+
+ fin = finfile;
+ fout = foutfile;
+ setupcache(fin); cache(fin);
+ cachegeteof(c, { if (delimiter) unexpected_EOF(); return -1; } );
+ if (delimiter) {
+ if (c==SDELIM) {
+ cacheget(c);
+ if (c==SDELIM) {
+ buf[0] = c;
+ buf[1] = 0;
+ badDiffOutput(buf);
+ }
+ uncache(fin);
+ nextc = c;
+ if (fout)
+ aprintf(fout, "%c%c", SDELIM, c);
+ return -1;
+ }
+ }
+ p = buf;
+ do {
+ if (buf+BUFSIZ-2 <= p) {
+ faterror("diff output command line too long");
+ }
+ *p++ = c;
+ cachegeteof(c, unexpected_EOF();) ;
+ } while (c != '\n');
+ uncache(fin);
+ if (delimiter)
+ ++rcsline;
+ *p = '\0';
+ for (p = buf+1; (c = *p++) == ' '; )
+ ;
+ line1 = 0;
+ while (isdigit(c)) {
+ t = line1 * 10;
+ if (
+ ULONG_MAX/10 < line1 ||
+ (line1 = t + (c - '0')) < t
+ )
+ diffLineNumberTooLarge(buf);
+ c = *p++;
+ }
+ while (c == ' ')
+ c = *p++;
+ nlines = 0;
+ while (isdigit(c)) {
+ t = nlines * 10;
+ if (
+ ULONG_MAX/10 < nlines ||
+ (nlines = t + (c - '0')) < t
+ )
+ diffLineNumberTooLarge(buf);
+ c = *p++;
+ }
+ if (c || !nlines) {
+ badDiffOutput(buf);
+ }
+ if (line1+nlines < line1)
+ diffLineNumberTooLarge(buf);
+ switch (buf[0]) {
+ case 'a':
+ if (line1 < dc->adprev) {
+ faterror("backward insertion in diff output: %s", buf);
+ }
+ dc->adprev = line1 + 1;
+ break;
+ case 'd':
+ if (line1 < dc->adprev || line1 < dc->dafter) {
+ faterror("backward deletion in diff output: %s", buf);
+ }
+ dc->adprev = line1;
+ dc->dafter = line1 + nlines;
+ break;
+ default:
+ badDiffOutput(buf);
+ }
+ if (fout) {
+ aprintf(fout, "%s\n", buf);
+ }
+ dc->line1 = line1;
+ dc->nlines = nlines;
+ return buf[0] == 'a';
+}
+
+
+
+#ifdef SYNTEST
+
+char const cmdid[] = "syntest";
+
+ int
+main(argc,argv)
+int argc; char * argv[];
+{
+
+ if (argc<2) {
+ aputs("No input file\n",stderr);
+ exitmain(EXIT_FAILURE);
+ }
+ if (!(finptr = Iopen(argv[1], FOPEN_R, (struct stat*)0))) {
+ faterror("can't open input file %s", argv[1]);
+ }
+ Lexinit();
+ getadmin();
+ putadmin(stdout);
+
+ gettree();
+ puttree(Head,stdout);
+
+ getdesc(true);
+
+ nextlex();
+
+ if (!eoflex()) {
+ fatserror("expecting EOF");
+ }
+ exitmain(EXIT_SUCCESS);
+}
+
+
+exiting void exiterr() { _exit(EXIT_FAILURE); }
+
+
+#endif
+
diff --git a/gnu/usr.bin/rcs/lib/rcsutil.c b/gnu/usr.bin/rcs/lib/rcsutil.c
new file mode 100644
index 000000000000..c523ccf1df84
--- /dev/null
+++ b/gnu/usr.bin/rcs/lib/rcsutil.c
@@ -0,0 +1,994 @@
+/*
+ * RCS utilities
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+
+/* $Log: rcsutil.c,v $
+ * Revision 5.10 1991/10/07 17:32:46 eggert
+ * Support piece tables even if !has_mmap.
+ *
+ * Revision 5.9 1991/08/19 03:13:55 eggert
+ * Add spawn() support. Explicate assumptions about getting invoker's name.
+ * Standardize user-visible dates. Tune.
+ *
+ * Revision 5.8 1991/04/21 11:58:30 eggert
+ * Plug setuid security hole.
+ *
+ * Revision 5.6 1991/02/26 17:48:39 eggert
+ * Fix setuid bug. Use fread, fwrite more portably.
+ * Support waitpid. Don't assume -1 is acceptable to W* macros.
+ * strsave -> str_save (DG/UX name clash)
+ *
+ * Revision 5.5 1990/12/04 05:18:49 eggert
+ * Don't output a blank line after a signal diagnostic.
+ * Use -I for prompts and -q for diagnostics.
+ *
+ * Revision 5.4 1990/11/01 05:03:53 eggert
+ * Remove unneeded setid check. Add awrite(), fremember().
+ *
+ * Revision 5.3 1990/10/06 00:16:45 eggert
+ * Don't fread F if feof(F).
+ *
+ * Revision 5.2 1990/09/04 08:02:31 eggert
+ * Store fread()'s result in an fread_type object.
+ *
+ * Revision 5.1 1990/08/29 07:14:07 eggert
+ * Declare getpwuid() more carefully.
+ *
+ * Revision 5.0 1990/08/22 08:13:46 eggert
+ * Add setuid support. Permit multiple locks per user.
+ * Remove compile-time limits; use malloc instead.
+ * Switch to GMT. Permit dates past 1999/12/31.
+ * Add -V. Remove snooping. Ansify and Posixate.
+ * Tune. Some USG hosts define NSIG but not sys_siglist.
+ * Don't run /bin/sh if it's hopeless.
+ * Don't leave garbage behind if the output is an empty pipe.
+ * Clean up after SIGXCPU or SIGXFSZ. Print name of signal that caused cleanup.
+ *
+ * Revision 4.6 89/05/01 15:13:40 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.5 88/11/08 16:01:02 narten
+ * corrected use of varargs routines
+ *
+ * Revision 4.4 88/08/09 19:13:24 eggert
+ * Check for memory exhaustion.
+ * Permit signal handlers to yield either 'void' or 'int'; fix oldSIGINT botch.
+ * Use execv(), not system(); yield exit status like diff(1)'s.
+ *
+ * Revision 4.3 87/10/18 10:40:22 narten
+ * Updating version numbers. Changes relative to 1.1 actually
+ * relative to 4.1
+ *
+ * Revision 1.3 87/09/24 14:01:01 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:22:43 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/05/10 15:53:13 wft
+ * Added getcaller() and findlock().
+ * Changed catchints() to check SIGINT for SIG_IGN before setting up the signal
+ * (needed for background jobs in older shells). Added restoreints().
+ * Removed printing of full RCS path from logcommand().
+ *
+ * Revision 3.8 83/02/15 15:41:49 wft
+ * Added routine fastcopy() to copy remainder of a file in blocks.
+ *
+ * Revision 3.7 82/12/24 15:25:19 wft
+ * added catchints(), ignoreints() for catching and ingnoring interrupts;
+ * fixed catchsig().
+ *
+ * Revision 3.6 82/12/08 21:52:05 wft
+ * Using DATEFORM to format dates.
+ *
+ * Revision 3.5 82/12/04 18:20:49 wft
+ * Replaced SNOOPDIR with SNOOPFILE; changed addlock() to update
+ * lockedby-field.
+ *
+ * Revision 3.4 82/12/03 17:17:43 wft
+ * Added check to addlock() ensuring only one lock per person.
+ * Addlock also returns a pointer to the lock created. Deleted fancydate().
+ *
+ * Revision 3.3 82/11/27 12:24:37 wft
+ * moved rmsema(), trysema(), trydiraccess(), getfullRCSname() to rcsfnms.c.
+ * Introduced macro SNOOP so that snoop can be placed in directory other than
+ * TARGETDIR. Changed %02d to %.2d for compatibility reasons.
+ *
+ * Revision 3.2 82/10/18 21:15:11 wft
+ * added function getfullRCSname().
+ *
+ * Revision 3.1 82/10/13 16:17:37 wft
+ * Cleanup message is now suppressed in quiet mode.
+ */
+
+
+
+
+#include "rcsbase.h"
+
+libId(utilId, "$Id: rcsutil.c,v 5.10 1991/10/07 17:32:46 eggert Exp $")
+
+#if !has_memcmp
+ int
+memcmp(s1, s2, n)
+ void const *s1, *s2;
+ size_t n;
+{
+ register unsigned char const
+ *p1 = (unsigned char const*)s1,
+ *p2 = (unsigned char const*)s2;
+ register size_t i = n;
+ register int r = 0;
+ while (i-- && !(r = (*p1++ - *p2++)))
+ ;
+ return r;
+}
+#endif
+
+#if !has_memcpy
+ void *
+memcpy(s1, s2, n)
+ void *s1;
+ void const *s2;
+ size_t n;
+{
+ register char *p1 = (char*)s1;
+ register char const *p2 = (char const*)s2;
+ while (n--)
+ *p1++ = *p2++;
+ return s1;
+}
+#endif
+
+#if lint
+ malloc_type lintalloc;
+#endif
+
+/*
+ * list of blocks allocated with ftestalloc()
+ * These blocks can be freed by ffree when we're done with the current file.
+ * We could put the free block inside struct alloclist, rather than a pointer
+ * to the free block, but that would be less portable.
+ */
+struct alloclist {
+ malloc_type alloc;
+ struct alloclist *nextalloc;
+};
+static struct alloclist *alloced;
+
+
+ static malloc_type
+okalloc(p)
+ malloc_type p;
+{
+ if (!p)
+ faterror("out of memory");
+ return p;
+}
+
+ malloc_type
+testalloc(size)
+ size_t size;
+/* Allocate a block, testing that the allocation succeeded. */
+{
+ return okalloc(malloc(size));
+}
+
+ malloc_type
+testrealloc(ptr, size)
+ malloc_type ptr;
+ size_t size;
+/* Reallocate a block, testing that the allocation succeeded. */
+{
+ return okalloc(realloc(ptr, size));
+}
+
+ malloc_type
+fremember(ptr)
+ malloc_type ptr;
+/* Remember PTR in 'alloced' so that it can be freed later. Yield PTR. */
+{
+ register struct alloclist *q = talloc(struct alloclist);
+ q->nextalloc = alloced;
+ alloced = q;
+ return q->alloc = ptr;
+}
+
+ malloc_type
+ftestalloc(size)
+ size_t size;
+/* Allocate a block, putting it in 'alloced' so it can be freed later. */
+{
+ return fremember(testalloc(size));
+}
+
+ void
+ffree()
+/* Free all blocks allocated with ftestalloc(). */
+{
+ register struct alloclist *p, *q;
+ for (p = alloced; p; p = q) {
+ q = p->nextalloc;
+ tfree(p->alloc);
+ tfree(p);
+ }
+ alloced = nil;
+}
+
+ void
+ffree1(f)
+ register char const *f;
+/* Free the block f, which was allocated by ftestalloc. */
+{
+ register struct alloclist *p, **a = &alloced;
+
+ while ((p = *a)->alloc != f)
+ a = &p->nextalloc;
+ *a = p->nextalloc;
+ tfree(p->alloc);
+ tfree(p);
+}
+
+ char *
+str_save(s)
+ char const *s;
+/* Save s in permanently allocated storage. */
+{
+ return strcpy(tnalloc(char, strlen(s)+1), s);
+}
+
+ char *
+fstr_save(s)
+ char const *s;
+/* Save s in storage that will be deallocated when we're done with this file. */
+{
+ return strcpy(ftnalloc(char, strlen(s)+1), s);
+}
+
+ char *
+cgetenv(name)
+ char const *name;
+/* Like getenv(), but yield a copy; getenv() can overwrite old results. */
+{
+ register char *p;
+
+ return (p=getenv(name)) ? str_save(p) : p;
+}
+
+ char const *
+getusername(suspicious)
+ int suspicious;
+/* Get the caller's login name. Trust only getwpuid if SUSPICIOUS. */
+{
+ static char *name;
+
+ if (!name) {
+ if (
+ /* Prefer getenv() unless suspicious; it's much faster. */
+# if getlogin_is_secure
+ (suspicious
+ ||
+ !(name = cgetenv("LOGNAME"))
+ && !(name = cgetenv("USER")))
+ && !(name = getlogin())
+# else
+ suspicious
+ ||
+ !(name = cgetenv("LOGNAME"))
+ && !(name = cgetenv("USER"))
+ && !(name = getlogin())
+# endif
+ ) {
+#if has_getuid && has_getpwuid
+ struct passwd const *pw = getpwuid(ruid());
+ if (!pw)
+ faterror("no password entry for userid %lu",
+ (unsigned long)ruid()
+ );
+ name = pw->pw_name;
+#else
+#if has_setuid
+ faterror("setuid not supported");
+#else
+ faterror("Who are you? Please set LOGNAME.");
+#endif
+#endif
+ }
+ checksid(name);
+ }
+ return name;
+}
+
+
+
+
+#if has_signal
+
+/*
+ * Signal handling
+ *
+ * Standard C places too many restrictions on signal handlers.
+ * We obey as many of them as we can.
+ * Posix places fewer restrictions, and we are Posix-compatible here.
+ */
+
+static sig_atomic_t volatile heldsignal, holdlevel;
+
+ static signal_type
+catchsig(s)
+ int s;
+{
+ char const *sname;
+ char buf[BUFSIZ];
+
+#if sig_zaps_handler
+ /* If a signal arrives before we reset the signal handler, we lose. */
+ VOID signal(s, SIG_IGN);
+#endif
+ if (holdlevel) {
+ heldsignal = s;
+ return;
+ }
+ ignoreints();
+ setrid();
+ if (!quietflag) {
+ sname = nil;
+#if has_sys_siglist && defined(NSIG)
+ if ((unsigned)s < NSIG) {
+# ifndef sys_siglist
+ extern char const *sys_siglist[];
+# endif
+ sname = sys_siglist[s];
+ }
+#else
+ switch (s) {
+#ifdef SIGHUP
+ case SIGHUP: sname = "Hangup"; break;
+#endif
+#ifdef SIGINT
+ case SIGINT: sname = "Interrupt"; break;
+#endif
+#ifdef SIGPIPE
+ case SIGPIPE: sname = "Broken pipe"; break;
+#endif
+#ifdef SIGQUIT
+ case SIGQUIT: sname = "Quit"; break;
+#endif
+#ifdef SIGTERM
+ case SIGTERM: sname = "Terminated"; break;
+#endif
+#ifdef SIGXCPU
+ case SIGXCPU: sname = "Cputime limit exceeded"; break;
+#endif
+#ifdef SIGXFSZ
+ case SIGXFSZ: sname = "Filesize limit exceeded"; break;
+#endif
+ }
+#endif
+ if (sname)
+ VOID sprintf(buf, "\nRCS: %s. Cleaning up.\n", sname);
+ else
+ VOID sprintf(buf, "\nRCS: Signal %d. Cleaning up.\n", s);
+ VOID write(STDERR_FILENO, buf, strlen(buf));
+ }
+ exiterr();
+}
+
+ void
+ignoreints()
+{
+ ++holdlevel;
+}
+
+ void
+restoreints()
+{
+ if (!--holdlevel && heldsignal)
+ VOID catchsig(heldsignal);
+}
+
+
+static int const sig[] = {
+#ifdef SIGHUP
+ SIGHUP,
+#endif
+#ifdef SIGINT
+ SIGINT,
+#endif
+#ifdef SIGPIPE
+ SIGPIPE,
+#endif
+#ifdef SIGQUIT
+ SIGQUIT,
+#endif
+#ifdef SIGTERM
+ SIGTERM,
+#endif
+#ifdef SIGXCPU
+ SIGXCPU,
+#endif
+#ifdef SIGXFSZ
+ SIGXFSZ,
+#endif
+};
+#define SIGS (sizeof(sig)/sizeof(*sig))
+
+
+#if has_sigaction
+
+ static void
+ check_sig(r)
+ int r;
+ {
+ if (r != 0)
+ efaterror("signal");
+ }
+
+ static void
+ setup_catchsig()
+ {
+ register int i;
+ sigset_t blocked;
+ struct sigaction act;
+
+ check_sig(sigemptyset(&blocked));
+ for (i=SIGS; 0<=--i; )
+ check_sig(sigaddset(&blocked, sig[i]));
+ for (i=SIGS; 0<=--i; ) {
+ check_sig(sigaction(sig[i], (struct sigaction*)nil, &act));
+ if (act.sa_handler != SIG_IGN) {
+ act.sa_handler = catchsig;
+ act.sa_mask = blocked;
+ check_sig(sigaction(sig[i], &act, (struct sigaction*)nil));
+ }
+ }
+ }
+
+#else
+#if has_sigblock
+
+ static void
+ setup_catchsig()
+ {
+ register int i;
+ int mask;
+
+ mask = 0;
+ for (i=SIGS; 0<=--i; )
+ mask |= sigmask(sig[i]);
+ mask = sigblock(mask);
+ for (i=SIGS; 0<=--i; )
+ if (
+ signal(sig[i], catchsig) == SIG_IGN &&
+ signal(sig[i], SIG_IGN) != catchsig
+ )
+ faterror("signal catcher failure");
+ VOID sigsetmask(mask);
+ }
+
+#else
+
+ static void
+ setup_catchsig()
+ {
+ register i;
+
+ for (i=SIGS; 0<=--i; )
+ if (
+ signal(sig[i], SIG_IGN) != SIG_IGN &&
+ signal(sig[i], catchsig) != SIG_IGN
+ )
+ faterror("signal catcher failure");
+ }
+
+#endif
+#endif
+
+ void
+catchints()
+{
+ static int catching_ints;
+ if (!catching_ints) {
+ catching_ints = true;
+ setup_catchsig();
+ }
+}
+
+#endif /* has_signal */
+
+
+ void
+fastcopy(inf,outf)
+ register RILE *inf;
+ FILE *outf;
+/* Function: copies the remainder of file inf to outf.
+ */
+{
+#if large_memory
+# if has_mmap
+ awrite((char const*)inf->ptr, (size_t)(inf->lim - inf->ptr), outf);
+ inf->ptr = inf->lim;
+# else
+ for (;;) {
+ awrite((char const*)inf->ptr, (size_t)(inf->readlim - inf->ptr), outf);
+ inf->ptr = inf->readlim;
+ if (inf->ptr == inf->lim)
+ break;
+ VOID Igetmore(inf);
+ }
+# endif
+#else
+ char buf[BUFSIZ*8];
+ register fread_type rcount;
+
+ /*now read the rest of the file in blocks*/
+ while (!feof(inf)) {
+ if (!(rcount = Fread(buf,sizeof(*buf),sizeof(buf),inf))) {
+ testIerror(inf);
+ return;
+ }
+ awrite(buf, (size_t)rcount, outf);
+ }
+#endif
+}
+
+#ifndef SSIZE_MAX
+ /* This does not work in #ifs, but it's good enough for us. */
+ /* Underestimating SSIZE_MAX may slow us down, but it won't break us. */
+# define SSIZE_MAX ((unsigned)-1 >> 1)
+#endif
+
+ void
+awrite(buf, chars, f)
+ char const *buf;
+ size_t chars;
+ FILE *f;
+{
+ /* Posix 1003.1-1990 ssize_t hack */
+ while (SSIZE_MAX < chars) {
+ if (Fwrite(buf, sizeof(*buf), SSIZE_MAX, f) != SSIZE_MAX)
+ Oerror();
+ buf += SSIZE_MAX;
+ chars -= SSIZE_MAX;
+ }
+
+ if (Fwrite(buf, sizeof(*buf), chars, f) != chars)
+ Oerror();
+}
+
+
+
+
+
+ static int
+movefd(old, new)
+ int old, new;
+{
+ if (old < 0 || old == new)
+ return old;
+# ifdef F_DUPFD
+ new = fcntl(old, F_DUPFD, new);
+# else
+ new = dup2(old, new);
+# endif
+ return close(old)==0 ? new : -1;
+}
+
+ static int
+fdreopen(fd, file, flags)
+ int fd;
+ char const *file;
+ int flags;
+{
+ int newfd;
+ VOID close(fd);
+ newfd =
+#if !open_can_creat
+ flags&O_CREAT ? creat(file, S_IRUSR|S_IWUSR) :
+#endif
+ open(file, flags, S_IRUSR|S_IWUSR);
+ return movefd(newfd, fd);
+}
+
+#if !has_spawn
+ static void
+tryopen(fd,file,flags)
+ int fd, flags;
+ char const *file;
+{
+ if (file && fdreopen(fd,file,flags) != fd)
+ efaterror(file);
+}
+#else
+ static int
+tryopen(fd,file,flags)
+ int fd, flags;
+ char const *file;
+{
+ int newfd = -1;
+ if (file && ((newfd=dup(fd)) < 0 || fdreopen(fd,file,flags) != fd))
+ efaterror(file);
+ return newfd;
+}
+ static void
+redirect(old, new)
+ int old, new;
+{
+ if (0 <= old && (close(new) != 0 || movefd(old,new) < 0))
+ efaterror("spawn I/O redirection");
+}
+#endif
+
+
+
+#if !has_fork && !has_spawn
+ static void
+bufargcat(b, c, s)
+ register struct buf *b;
+ int c;
+ register char const *s;
+/* Append to B a copy of C, plus a quoted copy of S. */
+{
+ register char *p;
+ register char const *t;
+ size_t bl, sl;
+
+ for (t=s, sl=0; *t; )
+ sl += 3*(*t++=='\'') + 1;
+ bl = strlen(b->string);
+ bufrealloc(b, bl + sl + 4);
+ p = b->string + bl;
+ *p++ = c;
+ *p++ = '\'';
+ while (*s) {
+ if (*s == '\'') {
+ *p++ = '\'';
+ *p++ = '\\';
+ *p++ = '\'';
+ }
+ *p++ = *s++;
+ }
+ *p++ = '\'';
+ *p = 0;
+}
+#endif
+
+/*
+* Run a command specified by the strings in 'inoutargs'.
+* inoutargs[0], if nonnil, is the name of the input file.
+* inoutargs[1], if nonnil, is the name of the output file.
+* inoutargs[2..] form the command to be run.
+*/
+ int
+runv(inoutargs)
+ char const **inoutargs;
+{
+ register char const **p;
+ int wstatus;
+
+ oflush();
+ eflush();
+ {
+#if has_spawn
+ int in, out;
+ p = inoutargs;
+ in = tryopen(STDIN_FILENO, *p++, O_BINARY|O_RDONLY);
+ out = tryopen(STDOUT_FILENO, *p++, O_BINARY|O_CREAT|O_TRUNC|O_WRONLY);
+ wstatus = spawn_RCS(0, *p, (char*const*)p);
+ if (wstatus == -1 && errno == ENOEXEC) {
+ *--p = RCS_SHELL;
+ wstatus = spawnv(0, *p, (char*const*)p);
+ }
+ redirect(in, STDIN_FILENO);
+ redirect(out, STDOUT_FILENO);
+#else
+#if has_fork
+ pid_t pid;
+# if !has_waitpid
+ pid_t w;
+# endif
+ if (!(pid = vfork())) {
+ p = inoutargs;
+ tryopen(STDIN_FILENO, *p++, O_BINARY|O_RDONLY);
+ tryopen(STDOUT_FILENO, *p++, O_BINARY|O_CREAT|O_TRUNC|O_WRONLY);
+ VOID exec_RCS(*p, (char*const*)p);
+ if (errno == ENOEXEC) {
+ *--p = RCS_SHELL;
+ VOID execv(*p, (char*const*)p);
+ }
+ VOID write(STDERR_FILENO, *p, strlen(*p));
+ VOID write(STDERR_FILENO, ": not found\n", 12);
+ _exit(EXIT_TROUBLE);
+ }
+ if (pid < 0)
+ efaterror("fork");
+# if has_waitpid
+ if (waitpid(pid, &wstatus, 0) < 0)
+ efaterror("waitpid");
+# else
+ do {
+ if ((w = wait(&wstatus)) < 0)
+ efaterror("wait");
+ } while (w != pid);
+# endif
+#else
+ static struct buf b;
+
+ /* Use system(). On many hosts system() discards signals. Yuck! */
+ p = inoutargs+2;
+ bufscpy(&b, *p);
+ while (*++p)
+ bufargcat(&b, ' ', *p);
+ if (inoutargs[0])
+ bufargcat(&b, '<', inoutargs[0]);
+ if (inoutargs[1])
+ bufargcat(&b, '>', inoutargs[1]);
+ wstatus = system(b.string);
+#endif
+#endif
+ }
+ if (!WIFEXITED(wstatus))
+ faterror("%s failed", inoutargs[2]);
+ return WEXITSTATUS(wstatus);
+}
+
+#define CARGSMAX 20
+/*
+* Run a command.
+* The first two arguments are the input and output files (if nonnil);
+* the rest specify the command and its arguments.
+*/
+ int
+#if has_prototypes
+run(char const *infile, char const *outfile, ...)
+#else
+ /*VARARGS2*/
+run(infile, outfile, va_alist)
+ char const *infile;
+ char const *outfile;
+ va_dcl
+#endif
+{
+ va_list ap;
+ char const *rgargs[CARGSMAX];
+ register i = 0;
+ rgargs[0] = infile;
+ rgargs[1] = outfile;
+ vararg_start(ap, outfile);
+ for (i = 2; (rgargs[i++] = va_arg(ap, char const*)); )
+ if (CARGSMAX <= i)
+ faterror("too many command arguments");
+ va_end(ap);
+ return runv(rgargs);
+}
+
+
+ char const *
+date2str(date, datebuf)
+ char const date[datesize];
+ char datebuf[datesize];
+/*
+* Format a user-readable form of the RCS format DATE into the buffer DATEBUF.
+* Yield DATEBUF.
+*/
+{
+ register char const *p = date;
+
+ while (*p++ != '.')
+ ;
+ VOID sprintf(datebuf,
+ "19%.*s/%.2s/%.2s %.2s:%.2s:%s" +
+ (date[2]=='.' && VERSION(5)<=RCSversion ? 0 : 2),
+ (int)(p-date-1), date,
+ p, p+3, p+6, p+9, p+12
+ );
+ return datebuf;
+}
+
+
+int RCSversion;
+
+ void
+setRCSversion(str)
+ char const *str;
+{
+ static int oldversion;
+
+ register char const *s = str + 2;
+ int v = VERSION_DEFAULT;
+
+ if (oldversion)
+ redefined('V');
+ oldversion = true;
+
+ if (*s) {
+ v = 0;
+ while (isdigit(*s))
+ v = 10*v + *s++ - '0';
+ if (*s)
+ faterror("%s isn't a number", str);
+ if (v < VERSION_min || VERSION_max < v)
+ faterror("%s out of range %d..%d", str, VERSION_min, VERSION_max);
+ }
+
+ RCSversion = VERSION(v);
+}
+
+ int
+getRCSINIT(argc, argv, newargv)
+ int argc;
+ char **argv, ***newargv;
+{
+ register char *p, *q, **pp;
+ unsigned n;
+
+ if (!(q = cgetenv("RCSINIT")))
+ *newargv = argv;
+ else {
+ n = argc + 2;
+ /*
+ * Count spaces in RCSINIT to allocate a new arg vector.
+ * This is an upper bound, but it's OK even if too large.
+ */
+ for (p = q; ; ) {
+ switch (*p++) {
+ default:
+ continue;
+
+ case ' ':
+ case '\b': case '\f': case '\n':
+ case '\r': case '\t': case '\v':
+ n++;
+ continue;
+
+ case '\0':
+ break;
+ }
+ break;
+ }
+ *newargv = pp = tnalloc(char*, n);
+ *pp++ = *argv++; /* copy program name */
+ for (p = q; ; ) {
+ for (;;) {
+ switch (*q) {
+ case '\0':
+ goto copyrest;
+
+ case ' ':
+ case '\b': case '\f': case '\n':
+ case '\r': case '\t': case '\v':
+ q++;
+ continue;
+ }
+ break;
+ }
+ *pp++ = p;
+ ++argc;
+ for (;;) {
+ switch ((*p++ = *q++)) {
+ case '\0':
+ goto copyrest;
+
+ case '\\':
+ if (!*q)
+ goto copyrest;
+ p[-1] = *q++;
+ continue;
+
+ default:
+ continue;
+
+ case ' ':
+ case '\b': case '\f': case '\n':
+ case '\r': case '\t': case '\v':
+ break;
+ }
+ break;
+ }
+ p[-1] = '\0';
+ }
+ copyrest:
+ while ((*pp++ = *argv++))
+ ;
+ }
+ return argc;
+}
+
+
+#define cacheid(E) static uid_t i; static int s; if (!s){ s=1; i=(E); } return i
+
+#if has_getuid
+ uid_t ruid() { cacheid(getuid()); }
+#endif
+#if has_setuid
+ uid_t euid() { cacheid(geteuid()); }
+#endif
+
+
+#if has_setuid
+
+/*
+ * Setuid execution really works only with Posix 1003.1a Draft 5 seteuid(),
+ * because it lets us switch back and forth between arbitrary users.
+ * If seteuid() doesn't work, we fall back on setuid(),
+ * which works if saved setuid is supported,
+ * unless the real or effective user is root.
+ * This area is such a mess that we always check switches at runtime.
+ */
+
+ static void
+set_uid_to(u)
+ uid_t u;
+/* Become user u. */
+{
+ static int looping;
+
+ if (euid() == ruid())
+ return;
+#if (has_fork||has_spawn) && DIFF_ABSOLUTE
+ if (seteuid(u) != 0)
+ efaterror("setuid");
+#endif
+ if (geteuid() != u) {
+ if (looping)
+ return;
+ looping = true;
+ faterror("root setuid not supported" + (u?5:0));
+ }
+}
+
+static int stick_with_euid;
+
+ void
+/* Ignore all calls to seteid() and setrid(). */
+nosetid()
+{
+ stick_with_euid = true;
+}
+
+ void
+seteid()
+/* Become effective user. */
+{
+ if (!stick_with_euid)
+ set_uid_to(euid());
+}
+
+ void
+setrid()
+/* Become real user. */
+{
+ if (!stick_with_euid)
+ set_uid_to(ruid());
+}
+#endif
diff --git a/gnu/usr.bin/rcs/merge/Makefile b/gnu/usr.bin/rcs/merge/Makefile
new file mode 100644
index 000000000000..d14afb288570
--- /dev/null
+++ b/gnu/usr.bin/rcs/merge/Makefile
@@ -0,0 +1,7 @@
+PROG= merge
+
+SRCS= merge.c
+LDADD= -L${.CURDIR}/../lib/obj -lrcs
+CFLAGS+= -I${.CURDIR}/../lib
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/rcs/merge/merge.1 b/gnu/usr.bin/rcs/merge/merge.1
new file mode 100644
index 000000000000..8b1957fca099
--- /dev/null
+++ b/gnu/usr.bin/rcs/merge/merge.1
@@ -0,0 +1,102 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: merge.1,v 5.3 1991/02/28 19:18:45 eggert Exp $
+.TH MERGE 1 \*(Dt GNU
+.SH NAME
+merge \- three-way file merge
+.SH SYNOPSIS
+.B merge
+[
+.B \-L
+.I label1
+[
+.B \-L
+.I label3
+] ] [
+.B \-p
+] [
+.B \-q
+]
+.I "file1 file2 file3"
+.SH DESCRIPTION
+.B merge
+incorporates all changes that lead from
+.I file2
+to
+.I file3
+into
+.IR file1 .
+The result goes to standard output if
+.B \-p
+is present, into
+.I file1
+otherwise.
+.B merge
+is useful for combining separate changes to an original. Suppose
+.I file2
+is the original, and both
+.I file1
+and
+.I file3
+are modifications of
+.IR file2 .
+Then
+.B merge
+combines both changes.
+.PP
+An overlap occurs if both
+.I file1
+and
+.I file3
+have changes in a common segment of lines.
+On a few older hosts where
+.B diff3
+does not support the
+.B \-E
+option,
+.B merge
+does not detect overlaps, and merely supplies the changed lines from
+.I file3.
+On most hosts, if overlaps occur,
+.B merge
+outputs a message (unless the
+.B \-q
+option is given),
+and includes both alternatives
+in the result. The alternatives are delimited as follows:
+.LP
+.RS
+.nf
+.BI <<<<<<< " file1"
+.I "lines in file1"
+.B "======="
+.I "lines in file3"
+.BI >>>>>>> " file3"
+.RE
+.fi
+.LP
+If there are overlaps, the user should edit the result and delete one of the
+alternatives.
+If the
+.BI \-L "\ label1"
+and
+.BI \-L "\ label3"
+options are given, the labels are output in place of the names
+.I file1
+and
+.I file3
+in overlap reports.
+.SH DIAGNOSTICS
+Exit status is 0 for no overlaps, 1 for some overlaps, 2 for trouble.
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH SEE ALSO
+diff3(1), diff(1), rcsmerge(1), co(1).
diff --git a/gnu/usr.bin/rcs/merge/merge.c b/gnu/usr.bin/rcs/merge/merge.c
new file mode 100644
index 000000000000..4067c184a255
--- /dev/null
+++ b/gnu/usr.bin/rcs/merge/merge.c
@@ -0,0 +1,97 @@
+/* merge - three-way file merge */
+
+/* Copyright 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+#include "rcsbase.h"
+
+
+static char const usage[] =
+ "\nmerge: usage: merge [-p] [-q] [-L label1 [-L label3]] file1 file2 file3\n";
+
+ static exiting void
+badoption(a)
+ char const *a;
+{
+ faterror("unknown option: %s%s", a-2, usage);
+}
+
+
+mainProg(mergeId, "merge", "$Id: merge.c,v 1.2 1991/08/19 03:13:55 eggert Exp $")
+{
+ register char const *a;
+ char const *label[2], *arg[3];
+ int labels, tostdout;
+
+ labels = 0;
+ tostdout = false;
+
+ while ((a = *++argv) && *a++ == '-') {
+ switch (*a++) {
+ case 'p': tostdout = true; break;
+ case 'q': quietflag = true; break;
+ case 'L':
+ if (1<labels)
+ faterror("too many -L options");
+ if (!(label[labels++] = *++argv))
+ faterror("-L needs following argument");
+ --argc;
+ break;
+ default:
+ badoption(a);
+ }
+ if (*a)
+ badoption(a);
+ --argc;
+ }
+
+ if (argc != 4)
+ faterror("%s arguments%s",
+ argc<4 ? "not enough" : "too many", usage
+ );
+
+ /* This copy keeps us `const'-clean. */
+ arg[0] = argv[0];
+ arg[1] = argv[1];
+ arg[2] = argv[2];
+
+ switch (labels) {
+ case 0: label[0] = arg[0]; /* fall into */
+ case 1: label[1] = arg[2];
+ }
+
+ exitmain(merge(tostdout, label, arg));
+}
+
+
+#if lint
+# define exiterr mergeExit
+#endif
+ exiting void
+exiterr()
+{
+ tempunlink();
+ _exit(DIFF_TROUBLE);
+}
diff --git a/gnu/usr.bin/rcs/rcs/Makefile b/gnu/usr.bin/rcs/rcs/Makefile
new file mode 100644
index 000000000000..d62c8d1d94c5
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcs/Makefile
@@ -0,0 +1,10 @@
+PROG= rcs
+
+SRCS= rcs.c
+LDADD= -L${.CURDIR}/../lib/obj -lrcs
+CFLAGS+= -I${.CURDIR}/../lib
+
+MAN1= rcs.0 rcsintro.0
+MAN5= rcsfile.0
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/rcs/rcs/rcs.1 b/gnu/usr.bin/rcs/rcs/rcs.1
new file mode 100644
index 000000000000..9866a9c0054f
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcs/rcs.1
@@ -0,0 +1,397 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: rcs.1,v 5.6 1991/09/26 23:16:17 eggert Exp $
+.ds r \&\s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.TH RCS 1 \*(Dt GNU
+.SH NAME
+rcs \- change RCS file attributes
+.SH SYNOPSIS
+.B rcs
+.RI [ " options " ] " file " .\|.\|.
+.SH DESCRIPTION
+.B rcs
+creates new \*r files or changes attributes of existing ones.
+An \*r file contains multiple revisions of text,
+an access list, a change log,
+descriptive text,
+and some control attributes.
+For
+.B rcs
+to work, the caller's login name must be on the access list,
+except if the access list is empty, the caller is the owner of the file
+or the superuser, or
+the
+.B \-i
+option is present.
+.PP
+Pathnames matching an \*r suffix denote \*r files;
+all others denote working files.
+Names are paired as explained in
+.BR ci (1).
+Revision numbers use the syntax described in
+.BR ci (1).
+.SH OPTIONS
+.TP
+.B \-i
+Create and initialize a new \*r file, but do not deposit any revision.
+If the \*r file has no path prefix, try to place it
+first into the subdirectory
+.BR ./RCS ,
+and then into the current directory.
+If the \*r file
+already exists, print an error message.
+.TP
+.BI \-a "logins"
+Append the login names appearing in the comma-separated list
+.I logins
+to the access list of the \*r file.
+.TP
+.BI \-A "oldfile"
+Append the access list of
+.I oldfile
+to the access list of the \*r file.
+.TP
+.BR \-e [\f2logins\fP]
+Erase the login names appearing in the comma-separated list
+.I logins
+from the access list of the \*r file.
+If
+.I logins
+is omitted, erase the entire access list.
+.TP
+.BR \-b [\f2rev\fP]
+Set the default branch to
+.IR rev .
+If
+.I rev
+is omitted, the default
+branch is reset to the (dynamically) highest branch on the trunk.
+.TP
+.BI \-c string
+sets the comment leader to
+.IR string .
+The comment leader
+is printed before every log message line generated by the keyword
+.B $\&Log$
+during checkout (see
+.BR co (1)).
+This is useful for programming
+languages without multi-line comments.
+An initial
+.B ci ,
+or an
+.B "rcs\ \-i"
+without
+.BR \-c ,
+guesses the comment leader from the suffix of the working file.
+.TP
+.BI \-k subst
+Set the default keyword substitution to
+.IR subst .
+The effect of keyword substitution is described in
+.BR co (1).
+Giving an explicit
+.B \-k
+option to
+.BR co ,
+.BR rcsdiff ,
+and
+.B rcsmerge
+overrides this default.
+Beware
+.BR "rcs\ \-kv",
+because
+.B \-kv
+is incompatible with
+.BR "co\ \-l".
+Use
+.B "rcs\ \-kkv"
+to restore the normal default keyword substitution.
+.TP
+.BR \-l [\f2rev\fP]
+Lock the revision with number
+.IR rev .
+If a branch is given, lock the latest revision on that branch.
+If
+.I rev
+is omitted, lock the latest revision on the default branch.
+Locking prevents overlapping changes.
+A lock is removed with
+.B ci
+or
+.B "rcs\ \-u"
+(see below).
+.TP
+.BR \-u [\f2rev\fP]
+Unlock the revision with number
+.IR rev .
+If a branch is given, unlock the latest revision on that branch.
+If
+.I rev
+is omitted, remove the latest lock held by the caller.
+Normally, only the locker of a revision may unlock it.
+Somebody else unlocking a revision breaks the lock.
+This causes a mail message to be sent to the original locker.
+The message contains a commentary solicited from the breaker.
+The commentary is terminated by end-of-file or by a line containing
+.BR \&. "\ by"
+itself.
+.TP
+.B \-L
+Set locking to
+.IR strict .
+Strict locking means that the owner
+of an \*r file is not exempt from locking for checkin.
+This option should be used for files that are shared.
+.TP
+.B \-U
+Set locking to non-strict. Non-strict locking means that the owner of
+a file need not lock a revision for checkin.
+This option should
+.I not
+be used for files that are shared.
+Whether default locking is strict is determined by your system administrator,
+but it is normally strict.
+.TP
+\f3\-m\fP\f2rev\fP\f3:\fP\f2msg\fP
+Replace revision
+.IR rev 's
+log message with
+.IR msg .
+.TP
+\f3\-n\fP\f2name\fP[\f3:\fP[\f2rev\fP]]
+Associate the symbolic name
+.I name
+with the branch or
+revision
+.IR rev .
+Delete the symbolic name if both
+.B :
+and
+.I rev
+are omitted; otherwise, print an error message if
+.I name
+is already associated with
+another number.
+If
+.I rev
+is symbolic, it is expanded before association.
+A
+.I rev
+consisting of a branch number followed by a
+.B .\&
+stands for the current latest revision in the branch.
+A
+.B :
+with an empty
+.I rev
+stands for the current latest revision on the default branch,
+normally the trunk.
+For example,
+.BI "rcs\ \-n" name ":\ RCS/*"
+associates
+.I name
+with the current latest revision of all the named \*r files;
+this contrasts with
+.BI "rcs\ \-n" name ":$\ RCS/*"
+which associates
+.I name
+with the revision numbers extracted from keyword strings
+in the corresponding working files.
+.TP
+\f3\-N\fP\f2name\fP[\f3:\fP[\f2rev\fP]]
+Act like
+.BR \-n ,
+except override any previous assignment of
+.IR name .
+.TP
+.BI \-o range
+deletes (\*(lqoutdates\*(rq) the revisions given by
+.IR range .
+A range consisting of a single revision number means that revision.
+A range consisting of a branch number means the latest revision on that
+branch.
+A range of the form
+.IB rev1 : rev2
+means
+revisions
+.I rev1
+to
+.I rev2
+on the same branch,
+.BI : rev
+means from the beginning of the branch containing
+.I rev
+up to and including
+.IR rev ,
+and
+.IB rev :
+means
+from revision
+.I rev
+to the end of the branch containing
+.IR rev .
+None of the outdated revisions may have branches or locks.
+.TP
+.B \-q
+Run quietly; do not print diagnostics.
+.TP
+.B \-I
+Run interactively, even if the standard input is not a terminal.
+.TP
+.B \-s\f2state\fP\f1[\fP:\f2rev\fP\f1]\fP
+Set the state attribute of the revision
+.I rev
+to
+.I state .
+If
+.I rev
+is a branch number, assume the latest revision on that branch.
+If
+.I rev
+is omitted, assume the latest revision on the default branch.
+Any identifier is acceptable for
+.IR state .
+A useful set of states
+is
+.B Exp
+(for experimental),
+.B Stab
+(for stable), and
+.B Rel
+(for
+released).
+By default,
+.BR ci (1)
+sets the state of a revision to
+.BR Exp .
+.TP
+.BR \-t [\f2file\fP]
+Write descriptive text from the contents of the named
+.I file
+into the \*r file, deleting the existing text.
+The
+.IR file
+pathname may not begin with
+.BR \- .
+If
+.I file
+is omitted, obtain the text from standard input,
+terminated by end-of-file or by a line containing
+.BR \&. "\ by"
+itself.
+Prompt for the text if interaction is possible; see
+.BR \-I .
+With
+.BR \-i ,
+descriptive text is obtained
+even if
+.B \-t
+is not given.
+.TP
+.BI \-t\- string
+Write descriptive text from the
+.I string
+into the \*r file, deleting the existing text.
+.TP
+.BI \-V n
+Emulate \*r version
+.IR n .
+See
+.BR co (1)
+for details.
+.TP
+.BI \-x "suffixes"
+Use
+.I suffixes
+to characterize \*r files.
+See
+.BR ci (1)
+for details.
+.SH COMPATIBILITY
+The
+.BI \-b rev
+option generates an \*r file that cannot be parsed by \*r version 3 or earlier.
+.PP
+The
+.BI \-k subst
+options (except
+.BR \-kkv )
+generate an \*r file that cannot be parsed by \*r version 4 or earlier.
+.PP
+Use
+.BI "rcs \-V" n
+to make an \*r file acceptable to \*r version
+.I n
+by discarding information that would confuse version
+.IR n .
+.PP
+\*r version 5.5 and earlier does not support the
+.B \-x
+option, and requires a
+.B ,v
+suffix on an \*r pathname.
+.SH FILES
+.B rcs
+accesses files much as
+.BR ci (1)
+does,
+except that it uses the effective user for all accesses,
+it does not write the working file or its directory,
+and it does not even read the working file unless a revision number of
+.B $
+is specified.
+.SH ENVIRONMENT
+.TP
+.B \s-1RCSINIT\s0
+options prepended to the argument list, separated by spaces.
+See
+.BR ci (1)
+for details.
+.SH DIAGNOSTICS
+The \*r pathname and the revisions outdated are written to
+the diagnostic output.
+The exit status is zero if and only if all operations were successful.
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH "SEE ALSO"
+co(1), ci(1), ident(1), rcsdiff(1), rcsintro(1), rcsmerge(1), rlog(1),
+rcsfile(5)
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
+.SH BUGS
+The separator for revision ranges in the
+.B \-o
+option used to be
+.B \-
+instead of
+.BR : ,
+but this leads to confusion when symbolic names contain
+.BR \- .
+For backwards compatibility
+.B "rcs \-o"
+still supports the old
+.B \-
+separator, but it warns about this obsolete use.
+.PP
+Symbolic names need not refer to existing revisions or branches.
+For example, the
+.B \-o
+option does not remove symbolic names for the outdated revisions; you must use
+.B \-n
+to remove the names.
+.br
diff --git a/gnu/usr.bin/rcs/rcs/rcs.c b/gnu/usr.bin/rcs/rcs/rcs.c
new file mode 100644
index 000000000000..70e7ffc5a8ee
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcs/rcs.c
@@ -0,0 +1,1554 @@
+/*
+ * RCS create/change operation
+ */
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+
+/* $Log: rcs.c,v $
+ * Revision 5.12 1991/11/20 17:58:08 eggert
+ * Don't read the delta tree from a nonexistent RCS file.
+ *
+ * Revision 5.11 1991/10/07 17:32:46 eggert
+ * Remove lint.
+ *
+ * Revision 5.10 1991/08/19 23:17:54 eggert
+ * Add -m, -r$, piece tables. Revision separator is `:', not `-'. Tune.
+ *
+ * Revision 5.9 1991/04/21 11:58:18 eggert
+ * Add -x, RCSINIT, MS-DOS support.
+ *
+ * Revision 5.8 1991/02/25 07:12:38 eggert
+ * strsave -> str_save (DG/UX name clash)
+ * 0444 -> S_IRUSR|S_IRGRP|S_IROTH for portability
+ *
+ * Revision 5.7 1990/12/18 17:19:21 eggert
+ * Fix bug with multiple -n and -N options.
+ *
+ * Revision 5.6 1990/12/04 05:18:40 eggert
+ * Use -I for prompts and -q for diagnostics.
+ *
+ * Revision 5.5 1990/11/11 00:06:35 eggert
+ * Fix `rcs -e' core dump.
+ *
+ * Revision 5.4 1990/11/01 05:03:33 eggert
+ * Add -I and new -t behavior. Permit arbitrary data in logs.
+ *
+ * Revision 5.3 1990/10/04 06:30:16 eggert
+ * Accumulate exit status across files.
+ *
+ * Revision 5.2 1990/09/04 08:02:17 eggert
+ * Standardize yes-or-no procedure.
+ *
+ * Revision 5.1 1990/08/29 07:13:51 eggert
+ * Remove unused setuid support. Clean old log messages too.
+ *
+ * Revision 5.0 1990/08/22 08:12:42 eggert
+ * Don't lose names when applying -a option to multiple files.
+ * Remove compile-time limits; use malloc instead. Add setuid support.
+ * Permit dates past 1999/12/31. Make lock and temp files faster and safer.
+ * Ansify and Posixate. Add -V. Fix umask bug. Make linting easier. Tune.
+ * Yield proper exit status. Check diff's output.
+ *
+ * Revision 4.11 89/05/01 15:12:06 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.10 88/11/08 16:01:54 narten
+ * didn't install previous patch correctly
+ *
+ * Revision 4.9 88/11/08 13:56:01 narten
+ * removed include <sysexits.h> (not needed)
+ * minor fix for -A option
+ *
+ * Revision 4.8 88/08/09 19:12:27 eggert
+ * Don't access freed storage.
+ * Use execv(), not system(); yield proper exit status; remove lint.
+ *
+ * Revision 4.7 87/12/18 11:37:17 narten
+ * lint cleanups (Guy Harris)
+ *
+ * Revision 4.6 87/10/18 10:28:48 narten
+ * Updating verison numbers. Changes relative to 1.1 are actually
+ * relative to 4.3
+ *
+ * Revision 1.4 87/09/24 13:58:52 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.3 87/03/27 14:21:55 jenkins
+ * Port to suns
+ *
+ * Revision 1.2 85/12/17 13:59:09 albitz
+ * Changed setstate to rcs_setstate because of conflict with random.o.
+ *
+ * Revision 4.3 83/12/15 12:27:33 wft
+ * rcs -u now breaks most recent lock if it can't find a lock by the caller.
+ *
+ * Revision 4.2 83/12/05 10:18:20 wft
+ * Added conditional compilation for sending mail.
+ * Alternatives: V4_2BSD, V6, USG, and other.
+ *
+ * Revision 4.1 83/05/10 16:43:02 wft
+ * Simplified breaklock(); added calls to findlock() and getcaller().
+ * Added option -b (default branch). Updated -s and -w for -b.
+ * Removed calls to stat(); now done by pairfilenames().
+ * Replaced most catchints() calls with restoreints().
+ * Removed check for exit status of delivermail().
+ * Directed all interactive output to stderr.
+ *
+ * Revision 3.9.1.1 83/12/02 22:08:51 wft
+ * Added conditional compilation for 4.2 sendmail and 4.1 delivermail.
+ *
+ * Revision 3.9 83/02/15 15:38:39 wft
+ * Added call to fastcopy() to copy remainder of RCS file.
+ *
+ * Revision 3.8 83/01/18 17:37:51 wft
+ * Changed sendmail(): now uses delivermail, and asks whether to break the lock.
+ *
+ * Revision 3.7 83/01/15 18:04:25 wft
+ * Removed putree(); replaced with puttree() in rcssyn.c.
+ * Combined putdellog() and scanlogtext(); deleted putdellog().
+ * Cleaned up diagnostics and error messages. Fixed problem with
+ * mutilated files in case of deletions in 2 files in a single command.
+ * Changed marking of selector from 'D' to DELETE.
+ *
+ * Revision 3.6 83/01/14 15:37:31 wft
+ * Added ignoring of interrupts while new RCS file is renamed;
+ * Avoids deletion of RCS files by interrupts.
+ *
+ * Revision 3.5 82/12/10 21:11:39 wft
+ * Removed unused variables, fixed checking of return code from diff,
+ * introduced variant COMPAT2 for skipping Suffix on -A files.
+ *
+ * Revision 3.4 82/12/04 13:18:20 wft
+ * Replaced getdelta() with gettree(), changed breaklock to update
+ * field lockedby, added some diagnostics.
+ *
+ * Revision 3.3 82/12/03 17:08:04 wft
+ * Replaced getlogin() with getpwuid(), flcose() with ffclose(),
+ * /usr/ucb/Mail with macro MAIL. Removed handling of Suffix (-x).
+ * fixed -u for missing revno. Disambiguated structure members.
+ *
+ * Revision 3.2 82/10/18 21:05:07 wft
+ * rcs -i now generates a file mode given by the umask minus write permission;
+ * otherwise, rcs keeps the mode, but removes write permission.
+ * I added a check for write error, fixed call to getlogin(), replaced
+ * curdir() with getfullRCSname(), cleaned up handling -U/L, and changed
+ * conflicting, long identifiers.
+ *
+ * Revision 3.1 82/10/13 16:11:07 wft
+ * fixed type of variables receiving from getc() (char -> int).
+ */
+
+
+#include "rcsbase.h"
+
+struct Lockrev {
+ char const *revno;
+ struct Lockrev * nextrev;
+};
+
+struct Symrev {
+ char const *revno;
+ char const *ssymbol;
+ int override;
+ struct Symrev * nextsym;
+};
+
+struct Message {
+ char const *revno;
+ struct cbuf message;
+ struct Message *nextmessage;
+};
+
+struct Status {
+ char const *revno;
+ char const *status;
+ struct Status * nextstatus;
+};
+
+enum changeaccess {append, erase};
+struct chaccess {
+ char const *login;
+ enum changeaccess command;
+ struct chaccess *nextchaccess;
+};
+
+struct delrevpair {
+ char const *strt;
+ char const *end;
+ int code;
+};
+
+static int buildeltatext P((struct hshentries const*));
+static int removerevs P((void));
+static int sendmail P((char const*,char const*));
+static struct Lockrev *rmnewlocklst P((struct Lockrev const*));
+static void breaklock P((struct hshentry const*));
+static void buildtree P((void));
+static void cleanup P((void));
+static void doaccess P((void));
+static void doassoc P((void));
+static void dolocks P((void));
+static void domessages P((void));
+static void getaccessor P((char*,enum changeaccess));
+static void getassoclst P((int,char*));
+static void getchaccess P((char const*,enum changeaccess));
+static void getdelrev P((char*));
+static void getmessage P((char*));
+static void getstates P((char*));
+static void rcs_setstate P((char const*,char const*));
+static void scanlogtext P((struct hshentry*,int));
+static void setlock P((char const*));
+
+static struct buf numrev;
+static char const *headstate;
+static int chgheadstate, exitstatus, lockhead, unlockcaller;
+static struct Lockrev *newlocklst, *rmvlocklst;
+static struct Message *messagelst, *lastmessage;
+static struct Status *statelst, *laststate;
+static struct Symrev *assoclst, *lastassoc;
+static struct chaccess *chaccess, **nextchaccess;
+static struct delrevpair delrev;
+static struct hshentry *cuthead, *cuttail, *delstrt;
+static struct hshentries *gendeltas;
+
+mainProg(rcsId, "rcs", "$Id: rcs.c,v 5.12 1991/11/20 17:58:08 eggert Exp $")
+{
+ static char const cmdusage[] =
+ "\nrcs usage: rcs -{ae}logins -Afile -{blu}[rev] -cstring -{iLU} -{nNs}name[:rev] -orange -t[file] -Vn file ...";
+
+ char *a, **newargv, *textfile;
+ char const *branchsym, *commsyml;
+ int branchflag, expmode, initflag;
+ int e, r, strictlock, strict_selected, textflag;
+ mode_t defaultRCSmode; /* default mode for new RCS files */
+ mode_t RCSmode;
+ struct buf branchnum;
+ struct stat workstat;
+ struct Lockrev *curlock, * rmvlock, *lockpt;
+ struct Status * curstate;
+
+ nosetid();
+
+ nextchaccess = &chaccess;
+ branchsym = commsyml = textfile = nil;
+ branchflag = strictlock = false;
+ bufautobegin(&branchnum);
+ curlock = rmvlock = nil;
+ defaultRCSmode = 0;
+ expmode = -1;
+ suffixes = X_DEFAULT;
+ initflag= textflag = false;
+ strict_selected = 0;
+
+ /* preprocessing command options */
+ argc = getRCSINIT(argc, argv, &newargv);
+ argv = newargv;
+ while (a = *++argv, 0<--argc && *a++=='-') {
+ switch (*a++) {
+
+ case 'i': /* initial version */
+ initflag = true;
+ break;
+
+ case 'b': /* change default branch */
+ if (branchflag) redefined('b');
+ branchflag= true;
+ branchsym = a;
+ break;
+
+ case 'c': /* change comment symbol */
+ if (commsyml) redefined('c');
+ commsyml = a;
+ break;
+
+ case 'a': /* add new accessor */
+ getaccessor(*argv+1, append);
+ break;
+
+ case 'A': /* append access list according to accessfile */
+ if (!*a) {
+ error("missing file name after -A");
+ break;
+ }
+ *argv = a;
+ if (0 < pairfilenames(1,argv,rcsreadopen,true,false)) {
+ while (AccessList) {
+ getchaccess(str_save(AccessList->login),append);
+ AccessList = AccessList->nextaccess;
+ }
+ Izclose(&finptr);
+ }
+ break;
+
+ case 'e': /* remove accessors */
+ getaccessor(*argv+1, erase);
+ break;
+
+ case 'l': /* lock a revision if it is unlocked */
+ if (!*a) {
+ /* Lock head or default branch. */
+ lockhead = true;
+ break;
+ }
+ lockpt = talloc(struct Lockrev);
+ lockpt->revno = a;
+ lockpt->nextrev = nil;
+ if ( curlock )
+ curlock->nextrev = lockpt;
+ else
+ newlocklst = lockpt;
+ curlock = lockpt;
+ break;
+
+ case 'u': /* release lock of a locked revision */
+ if (!*a) {
+ unlockcaller=true;
+ break;
+ }
+ lockpt = talloc(struct Lockrev);
+ lockpt->revno = a;
+ lockpt->nextrev = nil;
+ if (rmvlock)
+ rmvlock->nextrev = lockpt;
+ else
+ rmvlocklst = lockpt;
+ rmvlock = lockpt;
+
+ curlock = rmnewlocklst(lockpt);
+ break;
+
+ case 'L': /* set strict locking */
+ if (strict_selected++) { /* Already selected L or U? */
+ if (!strictlock) /* Already selected -U? */
+ warn("-L overrides -U.");
+ }
+ strictlock = true;
+ break;
+
+ case 'U': /* release strict locking */
+ if (strict_selected++) { /* Already selected L or U? */
+ if (strictlock) /* Already selected -L? */
+ warn("-L overrides -U.");
+ }
+ else
+ strictlock = false;
+ break;
+
+ case 'n': /* add new association: error, if name exists */
+ if (!*a) {
+ error("missing symbolic name after -n");
+ break;
+ }
+ getassoclst(false, (*argv)+1);
+ break;
+
+ case 'N': /* add or change association */
+ if (!*a) {
+ error("missing symbolic name after -N");
+ break;
+ }
+ getassoclst(true, (*argv)+1);
+ break;
+
+ case 'm': /* change log message */
+ getmessage(a);
+ break;
+
+ case 'o': /* delete revisions */
+ if (delrev.strt) redefined('o');
+ if (!*a) {
+ error("missing revision range after -o");
+ break;
+ }
+ getdelrev( (*argv)+1 );
+ break;
+
+ case 's': /* change state attribute of a revision */
+ if (!*a) {
+ error("state missing after -s");
+ break;
+ }
+ getstates( (*argv)+1);
+ break;
+
+ case 't': /* change descriptive text */
+ textflag=true;
+ if (*a) {
+ if (textfile) redefined('t');
+ textfile = a;
+ }
+ break;
+
+ case 'I':
+ interactiveflag = true;
+ break;
+
+ case 'q':
+ quietflag = true;
+ break;
+
+ case 'x':
+ suffixes = a;
+ break;
+
+ case 'V':
+ setRCSversion(*argv);
+ break;
+
+ case 'k': /* set keyword expand mode */
+ if (0 <= expmode) redefined('k');
+ if (0 <= (expmode = str2expmode(a)))
+ break;
+ /* fall into */
+ default:
+ faterror("unknown option: %s%s", *argv, cmdusage);
+ };
+ } /* end processing of options */
+
+ if (argc<1) faterror("no input file%s", cmdusage);
+ if (nerror) {
+ diagnose("%s aborted\n",cmdid);
+ exitmain(EXIT_FAILURE);
+ }
+ if (initflag) {
+ defaultRCSmode = umask((mode_t)0);
+ VOID umask(defaultRCSmode);
+ defaultRCSmode = (S_IRUSR|S_IRGRP|S_IROTH) & ~defaultRCSmode;
+ }
+
+ /* now handle all filenames */
+ do {
+ ffree();
+
+ if ( initflag ) {
+ switch (pairfilenames(argc, argv, rcswriteopen, false, false)) {
+ case -1: break; /* not exist; ok */
+ case 0: continue; /* error */
+ case 1: error("file %s exists already", RCSfilename);
+ continue;
+ }
+ }
+ else {
+ switch (pairfilenames(argc, argv, rcswriteopen, true, false)) {
+ case -1: continue; /* not exist */
+ case 0: continue; /* errors */
+ case 1: break; /* file exists; ok*/
+ }
+ }
+
+
+ /* now RCSfilename contains the name of the RCS file, and
+ * workfilename contains the name of the working file.
+ * if !initflag, finptr contains the file descriptor for the
+ * RCS file. The admin node is initialized.
+ */
+
+ diagnose("RCS file: %s\n", RCSfilename);
+
+ RCSmode = defaultRCSmode;
+ if (initflag) {
+ if (stat(workfilename, &workstat) == 0)
+ RCSmode = workstat.st_mode;
+ } else {
+ if (!checkaccesslist()) continue;
+ gettree(); /* Read the delta tree. */
+ RCSmode = RCSstat.st_mode;
+ }
+ RCSmode &= ~(S_IWUSR|S_IWGRP|S_IWOTH);
+
+ /* update admin. node */
+ if (strict_selected) StrictLocks = strictlock;
+ if (commsyml) {
+ Comment.string = commsyml;
+ Comment.size = strlen(commsyml);
+ }
+ if (0 <= expmode) Expand = expmode;
+
+ /* update default branch */
+ if (branchflag && expandsym(branchsym, &branchnum)) {
+ if (countnumflds(branchnum.string)) {
+ Dbranch = branchnum.string;
+ } else
+ Dbranch = nil;
+ }
+
+ doaccess(); /* Update access list. */
+
+ doassoc(); /* Update association list. */
+
+ dolocks(); /* Update locks. */
+
+ domessages(); /* Update log messages. */
+
+ /* update state attribution */
+ if (chgheadstate) {
+ /* change state of default branch or head */
+ if (Dbranch==nil) {
+ if (Head==nil)
+ warn("can't change states in an empty tree");
+ else Head->state = headstate;
+ } else {
+ rcs_setstate(Dbranch,headstate); /* Can't set directly */
+ }
+ }
+ curstate = statelst;
+ while( curstate ) {
+ rcs_setstate(curstate->revno,curstate->status);
+ curstate = curstate->nextstatus;
+ }
+
+ cuthead = cuttail = nil;
+ if (delrev.strt && removerevs()) {
+ /* rebuild delta tree if some deltas are deleted */
+ if ( cuttail )
+ VOID genrevs(cuttail->num, (char *)nil,(char *)nil,
+ (char *)nil, &gendeltas);
+ buildtree();
+ }
+
+ if (nerror)
+ continue;
+
+ putadmin(frewrite);
+ if ( Head )
+ puttree(Head, frewrite);
+ putdesc(textflag,textfile);
+
+ if ( Head) {
+ if (!delrev.strt && !messagelst) {
+ /* No revision was deleted and no message was changed. */
+ fastcopy(finptr, frewrite);
+ } else {
+ if (!cuttail || buildeltatext(gendeltas)) {
+ advise_access(finptr, MADV_SEQUENTIAL);
+ scanlogtext((struct hshentry *)nil, false);
+ /* copy rest of delta text nodes that are not deleted */
+ }
+ }
+ }
+ Izclose(&finptr);
+ if ( ! nerror ) { /* move temporary file to RCS file if no error */
+ /* update mode */
+ ignoreints();
+ r = chnamemod(&frewrite, newRCSfilename, RCSfilename, RCSmode);
+ e = errno;
+ keepdirtemp(newRCSfilename);
+ restoreints();
+ if (r != 0) {
+ enerror(e, RCSfilename);
+ error("saved in %s", newRCSfilename);
+ dirtempunlink();
+ break;
+ }
+ diagnose("done\n");
+ } else {
+ diagnose("%s aborted; %s unchanged.\n",cmdid,RCSfilename);
+ }
+ } while (cleanup(),
+ ++argv, --argc >=1);
+
+ tempunlink();
+ exitmain(exitstatus);
+} /* end of main (rcs) */
+
+ static void
+cleanup()
+{
+ if (nerror) exitstatus = EXIT_FAILURE;
+ Izclose(&finptr);
+ Ozclose(&fcopy);
+ Ozclose(&frewrite);
+ dirtempunlink();
+}
+
+ exiting void
+exiterr()
+{
+ dirtempunlink();
+ tempunlink();
+ _exit(EXIT_FAILURE);
+}
+
+
+ static void
+getassoclst(flag, sp)
+int flag;
+char * sp;
+/* Function: associate a symbolic name to a revision or branch, */
+/* and store in assoclst */
+
+{
+ struct Symrev * pt;
+ char const *temp;
+ int c;
+
+ while( (c=(*++sp)) == ' ' || c == '\t' || c =='\n') ;
+ temp = sp;
+ sp = checkid(sp, ':'); /* check for invalid symbolic name */
+ c = *sp; *sp = '\0';
+ while( c == ' ' || c == '\t' || c == '\n') c = *++sp;
+
+ if ( c != ':' && c != '\0') {
+ error("invalid string %s after option -n or -N",sp);
+ return;
+ }
+
+ pt = talloc(struct Symrev);
+ pt->ssymbol = temp;
+ pt->override = flag;
+ if (c == '\0') /* delete symbol */
+ pt->revno = nil;
+ else {
+ while( (c = *++sp) == ' ' || c == '\n' || c == '\t') ;
+ pt->revno = sp;
+ }
+ pt->nextsym = nil;
+ if (lastassoc)
+ lastassoc->nextsym = pt;
+ else
+ assoclst = pt;
+ lastassoc = pt;
+ return;
+}
+
+
+ static void
+getchaccess(login, command)
+ char const *login;
+ enum changeaccess command;
+{
+ register struct chaccess *pt;
+
+ *nextchaccess = pt = talloc(struct chaccess);
+ pt->login = login;
+ pt->command = command;
+ pt->nextchaccess = nil;
+ nextchaccess = &pt->nextchaccess;
+}
+
+
+
+ static void
+getaccessor(opt, command)
+ char *opt;
+ enum changeaccess command;
+/* Function: get the accessor list of options -e and -a, */
+/* and store in chaccess */
+
+
+{
+ register c;
+ register char *sp;
+
+ sp = opt;
+ while( ( c = *++sp) == ' ' || c == '\n' || c == '\t' || c == ',') ;
+ if ( c == '\0') {
+ if (command == erase && sp-opt == 1) {
+ getchaccess((char const*)nil, command);
+ return;
+ }
+ error("missing login name after option -a or -e");
+ return;
+ }
+
+ while( c != '\0') {
+ getchaccess(sp, command);
+ sp = checkid(sp,',');
+ c = *sp; *sp = '\0';
+ while( c == ' ' || c == '\n' || c == '\t'|| c == ',')c =(*++sp);
+ }
+}
+
+
+ static void
+getmessage(option)
+ char *option;
+{
+ struct Message *pt;
+ struct cbuf cb;
+ char *m;
+
+ if (!(m = strchr(option, ':'))) {
+ error("-m option lacks revision number");
+ return;
+ }
+ *m++ = 0;
+ cb = cleanlogmsg(m, strlen(m));
+ if (!cb.size) {
+ error("-m option lacks log message");
+ return;
+ }
+ pt = talloc(struct Message);
+ pt->revno = option;
+ pt->message = cb;
+ pt->nextmessage = 0;
+ if (lastmessage)
+ lastmessage->nextmessage = pt;
+ else
+ messagelst = pt;
+ lastmessage = pt;
+}
+
+
+ static void
+getstates(sp)
+char *sp;
+/* Function: get one state attribute and the corresponding */
+/* revision and store in statelst */
+
+{
+ char const *temp;
+ struct Status *pt;
+ register c;
+
+ while( (c=(*++sp)) ==' ' || c == '\t' || c == '\n') ;
+ temp = sp;
+ sp = checkid(sp,':'); /* check for invalid state attribute */
+ c = *sp; *sp = '\0';
+ while( c == ' ' || c == '\t' || c == '\n' ) c = *++sp;
+
+ if ( c == '\0' ) { /* change state of def. branch or Head */
+ chgheadstate = true;
+ headstate = temp;
+ return;
+ }
+ else if ( c != ':' ) {
+ error("missing ':' after state in option -s");
+ return;
+ }
+
+ while( (c = *++sp) == ' ' || c == '\t' || c == '\n') ;
+ pt = talloc(struct Status);
+ pt->status = temp;
+ pt->revno = sp;
+ pt->nextstatus = nil;
+ if (laststate)
+ laststate->nextstatus = pt;
+ else
+ statelst = pt;
+ laststate = pt;
+}
+
+
+
+ static void
+getdelrev(sp)
+char *sp;
+/* Function: get revision range or branch to be deleted, */
+/* and place in delrev */
+{
+ int c;
+ struct delrevpair *pt;
+ int separator;
+
+ pt = &delrev;
+ while((c = (*++sp)) == ' ' || c == '\n' || c == '\t') ;
+
+ /* Support old ambiguous '-' syntax; this will go away. */
+ if (strchr(sp,':'))
+ separator = ':';
+ else {
+ if (strchr(sp,'-') && VERSION(5) <= RCSversion)
+ warn("`-' is obsolete in `-o%s'; use `:' instead", sp);
+ separator = '-';
+ }
+
+ if (c == separator) { /* -o:rev */
+ while( (c = (*++sp)) == ' ' || c == '\n' || c == '\t') ;
+ pt->strt = sp; pt->code = 1;
+ while( c != ' ' && c != '\n' && c != '\t' && c != '\0') c =(*++sp);
+ *sp = '\0';
+ pt->end = nil;
+ return;
+ }
+ else {
+ pt->strt = sp;
+ while( c != ' ' && c != '\n' && c != '\t' && c != '\0'
+ && c != separator ) c = *++sp;
+ *sp = '\0';
+ while( c == ' ' || c == '\n' || c == '\t' ) c = *++sp;
+ if ( c == '\0' ) { /* -o rev or branch */
+ pt->end = nil; pt->code = 0;
+ return;
+ }
+ if (c != separator) {
+ faterror("invalid range %s %s after -o", pt->strt, sp);
+ }
+ while( (c = *++sp) == ' ' || c == '\n' || c == '\t') ;
+ if (!c) { /* -orev: */
+ pt->end = nil; pt->code = 2;
+ return;
+ }
+ }
+ /* -orev1:rev2 */
+ pt->end = sp; pt->code = 3;
+ while( c!= ' ' && c != '\n' && c != '\t' && c != '\0') c = *++sp;
+ *sp = '\0';
+}
+
+
+
+
+ static void
+scanlogtext(delta,edit)
+ struct hshentry *delta;
+ int edit;
+/* Function: Scans delta text nodes up to and including the one given
+ * by delta, or up to last one present, if delta==nil.
+ * For the one given by delta (if delta!=nil), the log message is saved into
+ * delta->log if delta==cuttail; the text is edited if EDIT is set, else copied.
+ * Assumes the initial lexeme must be read in first.
+ * Does not advance nexttok after it is finished, except if delta==nil.
+ */
+{
+ struct hshentry const *nextdelta;
+ struct cbuf cb;
+
+ for (;;) {
+ foutptr = 0;
+ if (eoflex()) {
+ if(delta)
+ faterror("can't find delta for revision %s", delta->num);
+ return; /* no more delta text nodes */
+ }
+ nextlex();
+ if (!(nextdelta=getnum()))
+ faterror("delta number corrupted");
+ if (nextdelta->selector) {
+ foutptr = frewrite;
+ aprintf(frewrite,DELNUMFORM,nextdelta->num,Klog);
+ }
+ getkeystring(Klog);
+ if (nextdelta == cuttail) {
+ cb = savestring(&curlogbuf);
+ if (!delta->log.string)
+ delta->log = cleanlogmsg(curlogbuf.string, cb.size);
+ } else if (nextdelta->log.string && nextdelta->selector) {
+ foutptr = 0;
+ readstring();
+ foutptr = frewrite;
+ putstring(foutptr, false, nextdelta->log, true);
+ afputc(nextc, foutptr);
+ } else {readstring();
+ }
+ nextlex();
+ while (nexttok==ID && strcmp(NextString,Ktext)!=0)
+ ignorephrase();
+ getkeystring(Ktext);
+
+ if (delta==nextdelta)
+ break;
+ readstring(); /* skip over it */
+
+ }
+ /* got the one we're looking for */
+ if (edit)
+ editstring((struct hshentry *)nil);
+ else
+ enterstring();
+}
+
+
+
+ static struct Lockrev *
+rmnewlocklst(which)
+ struct Lockrev const *which;
+/* Function: remove lock to revision which->revno from newlocklst */
+
+{
+ struct Lockrev * pt, *pre;
+
+ while( newlocklst && (! strcmp(newlocklst->revno, which->revno))){
+ struct Lockrev *pn = newlocklst->nextrev;
+ tfree(newlocklst);
+ newlocklst = pn;
+ }
+
+ pt = pre = newlocklst;
+ while( pt ) {
+ if ( ! strcmp(pt->revno, which->revno) ) {
+ pre->nextrev = pt->nextrev;
+ tfree(pt);
+ pt = pre->nextrev;
+ }
+ else {
+ pre = pt;
+ pt = pt->nextrev;
+ }
+ }
+ return pre;
+}
+
+
+
+ static void
+doaccess()
+{
+ register struct chaccess *ch;
+ register struct access **p, *t;
+
+ for (ch = chaccess; ch; ch = ch->nextchaccess) {
+ switch (ch->command) {
+ case erase:
+ if (!ch->login)
+ AccessList = nil;
+ else
+ for (p = &AccessList; (t = *p); )
+ if (strcmp(ch->login, t->login) == 0)
+ *p = t->nextaccess;
+ else
+ p = &t->nextaccess;
+ break;
+ case append:
+ for (p = &AccessList; ; p = &t->nextaccess)
+ if (!(t = *p)) {
+ *p = t = ftalloc(struct access);
+ t->login = ch->login;
+ t->nextaccess = nil;
+ break;
+ } else if (strcmp(ch->login, t->login) == 0)
+ break;
+ break;
+ }
+ }
+}
+
+
+ static int
+sendmail(Delta, who)
+ char const *Delta, *who;
+/* Function: mail to who, informing him that his lock on delta was
+ * broken by caller. Ask first whether to go ahead. Return false on
+ * error or if user decides not to break the lock.
+ */
+{
+#ifdef SENDMAIL
+ char const *messagefile;
+ int old1, old2, c;
+ FILE * mailmess;
+#endif
+
+
+ aprintf(stderr, "Revision %s is already locked by %s.\n", Delta, who);
+ if (!yesorno(false, "Do you want to break the lock? [ny](n): "))
+ return false;
+
+ /* go ahead with breaking */
+#ifdef SENDMAIL
+ messagefile = maketemp(0);
+ if (!(mailmess = fopen(messagefile, "w"))) {
+ efaterror(messagefile);
+ }
+
+ aprintf(mailmess, "Subject: Broken lock on %s\n\nYour lock on revision %s of file %s\nhas been broken by %s for the following reason:\n",
+ basename(RCSfilename), Delta, getfullRCSname(), getcaller()
+ );
+ aputs("State the reason for breaking the lock:\n(terminate with single '.' or end of file)\n>> ", stderr);
+
+ old1 = '\n'; old2 = ' ';
+ for (; ;) {
+ c = getcstdin();
+ if (feof(stdin)) {
+ aprintf(mailmess, "%c\n", old1);
+ break;
+ }
+ else if ( c == '\n' && old1 == '.' && old2 == '\n')
+ break;
+ else {
+ afputc(old1, mailmess);
+ old2 = old1; old1 = c;
+ if (c=='\n') aputs(">> ", stderr);
+ }
+ }
+ Ozclose(&mailmess);
+
+ if (run(messagefile, (char*)nil, SENDMAIL, who, (char*)nil))
+ warn("Mail may have failed."),
+#else
+ warn("Mail notification of broken locks is not available."),
+#endif
+ warn("Please tell `%s' why you broke the lock.", who);
+ return(true);
+}
+
+
+
+ static void
+breaklock(delta)
+ struct hshentry const *delta;
+/* function: Finds the lock held by caller on delta,
+ * and removes it.
+ * Sends mail if a lock different from the caller's is broken.
+ * Prints an error message if there is no such lock or error.
+ */
+{
+ register struct lock * next, * trail;
+ char const *num;
+ struct lock dummy;
+
+ num=delta->num;
+ dummy.nextlock=next=Locks;
+ trail = &dummy;
+ while (next!=nil) {
+ if (strcmp(num, next->delta->num) == 0) {
+ if (
+ strcmp(getcaller(),next->login) != 0
+ && !sendmail(num, next->login)
+ ) {
+ error("%s still locked by %s", num, next->login);
+ return;
+ }
+ break; /* exact match */
+ }
+ trail=next;
+ next=next->nextlock;
+ }
+ if (next!=nil) {
+ /*found one */
+ diagnose("%s unlocked\n",next->delta->num);
+ trail->nextlock=next->nextlock;
+ next->delta->lockedby=nil;
+ Locks=dummy.nextlock;
+ } else {
+ error("no lock set on revision %s", num);
+ }
+}
+
+
+
+ static struct hshentry *
+searchcutpt(object, length, store)
+ char const *object;
+ unsigned length;
+ struct hshentries *store;
+/* Function: Search store and return entry with number being object. */
+/* cuttail = nil, if the entry is Head; otherwise, cuttail */
+/* is the entry point to the one with number being object */
+
+{
+ cuthead = nil;
+ while (compartial(store->first->num, object, length)) {
+ cuthead = store->first;
+ store = store->rest;
+ }
+ return store->first;
+}
+
+
+
+ static int
+branchpoint(strt, tail)
+struct hshentry *strt, *tail;
+/* Function: check whether the deltas between strt and tail */
+/* are locked or branch point, return 1 if any is */
+/* locked or branch point; otherwise, return 0 and */
+/* mark deleted */
+
+{
+ struct hshentry *pt;
+ struct lock const *lockpt;
+ int flag;
+
+
+ pt = strt;
+ flag = false;
+ while( pt != tail) {
+ if ( pt->branches ){ /* a branch point */
+ flag = true;
+ error("can't remove branch point %s", pt->num);
+ }
+ lockpt = Locks;
+ while(lockpt && lockpt->delta != pt)
+ lockpt = lockpt->nextlock;
+ if ( lockpt ) {
+ flag = true;
+ error("can't remove locked revision %s",pt->num);
+ }
+ pt = pt->next;
+ }
+
+ if ( ! flag ) {
+ pt = strt;
+ while( pt != tail ) {
+ pt->selector = false;
+ diagnose("deleting revision %s\n",pt->num);
+ pt = pt->next;
+ }
+ }
+ return flag;
+}
+
+
+
+ static int
+removerevs()
+/* Function: get the revision range to be removed, and place the */
+/* first revision removed in delstrt, the revision before */
+/* delstrt in cuthead( nil, if delstrt is head), and the */
+/* revision after the last removed revision in cuttail(nil */
+/* if the last is a leaf */
+
+{
+ struct hshentry *target, *target2, *temp;
+ unsigned length;
+ int flag;
+
+ flag = false;
+ if (!expandsym(delrev.strt, &numrev)) return 0;
+ target = genrevs(numrev.string, (char*)nil, (char*)nil, (char*)nil, &gendeltas);
+ if ( ! target ) return 0;
+ if (cmpnum(target->num, numrev.string)) flag = true;
+ length = countnumflds(numrev.string);
+
+ if (delrev.code == 0) { /* -o rev or -o branch */
+ if (length & 1)
+ temp=searchcutpt(target->num,length+1,gendeltas);
+ else if (flag) {
+ error("Revision %s doesn't exist.", numrev.string);
+ return 0;
+ }
+ else
+ temp = searchcutpt(numrev.string, length, gendeltas);
+ cuttail = target->next;
+ if ( branchpoint(temp, cuttail) ) {
+ cuttail = nil;
+ return 0;
+ }
+ delstrt = temp; /* first revision to be removed */
+ return 1;
+ }
+
+ if (length & 1) { /* invalid branch after -o */
+ error("invalid branch range %s after -o", numrev.string);
+ return 0;
+ }
+
+ if (delrev.code == 1) { /* -o -rev */
+ if ( length > 2 ) {
+ temp = searchcutpt( target->num, length-1, gendeltas);
+ cuttail = target->next;
+ }
+ else {
+ temp = searchcutpt(target->num, length, gendeltas);
+ cuttail = target;
+ while( cuttail && ! cmpnumfld(target->num,cuttail->num,1) )
+ cuttail = cuttail->next;
+ }
+ if ( branchpoint(temp, cuttail) ){
+ cuttail = nil;
+ return 0;
+ }
+ delstrt = temp;
+ return 1;
+ }
+
+ if (delrev.code == 2) { /* -o rev- */
+ if ( length == 2 ) {
+ temp = searchcutpt(target->num, 1,gendeltas);
+ if ( flag)
+ cuttail = target;
+ else
+ cuttail = target->next;
+ }
+ else {
+ if ( flag){
+ cuthead = target;
+ if ( !(temp = target->next) ) return 0;
+ }
+ else
+ temp = searchcutpt(target->num, length, gendeltas);
+ getbranchno(temp->num, &numrev); /* get branch number */
+ target = genrevs(numrev.string, (char*)nil, (char*)nil, (char*)nil, &gendeltas);
+ }
+ if ( branchpoint( temp, cuttail ) ) {
+ cuttail = nil;
+ return 0;
+ }
+ delstrt = temp;
+ return 1;
+ }
+
+ /* -o rev1-rev2 */
+ if (!expandsym(delrev.end, &numrev)) return 0;
+ if (
+ length != countnumflds(numrev.string)
+ || length>2 && compartial(numrev.string, target->num, length-1)
+ ) {
+ error("invalid revision range %s-%s", target->num, numrev.string);
+ return 0;
+ }
+
+ target2 = genrevs(numrev.string,(char*)nil,(char*)nil,(char*)nil,&gendeltas);
+ if ( ! target2 ) return 0;
+
+ if ( length > 2) { /* delete revisions on branches */
+ if ( cmpnum(target->num, target2->num) > 0) {
+ if (cmpnum(target2->num, numrev.string))
+ flag = true;
+ else
+ flag = false;
+ temp = target;
+ target = target2;
+ target2 = temp;
+ }
+ if ( flag ) {
+ if ( ! cmpnum(target->num, target2->num) ) {
+ error("Revisions %s-%s don't exist.", delrev.strt,delrev.end);
+ return 0;
+ }
+ cuthead = target;
+ temp = target->next;
+ }
+ else
+ temp = searchcutpt(target->num, length, gendeltas);
+ cuttail = target2->next;
+ }
+ else { /* delete revisions on trunk */
+ if ( cmpnum( target->num, target2->num) < 0 ) {
+ temp = target;
+ target = target2;
+ target2 = temp;
+ }
+ else
+ if (cmpnum(target2->num, numrev.string))
+ flag = true;
+ else
+ flag = false;
+ if ( flag ) {
+ if ( ! cmpnum(target->num, target2->num) ) {
+ error("Revisions %s-%s don't exist.", delrev.strt, delrev.end);
+ return 0;
+ }
+ cuttail = target2;
+ }
+ else
+ cuttail = target2->next;
+ temp = searchcutpt(target->num, length, gendeltas);
+ }
+ if ( branchpoint(temp, cuttail) ) {
+ cuttail = nil;
+ return 0;
+ }
+ delstrt = temp;
+ return 1;
+}
+
+
+
+ static void
+doassoc()
+/* Function: add or delete(if revno is nil) association */
+/* which is stored in assoclst */
+
+{
+ char const *p;
+ struct Symrev const *curassoc;
+ struct assoc * pre, * pt;
+
+ /* add new associations */
+ curassoc = assoclst;
+ while( curassoc ) {
+ if ( curassoc->revno == nil ) { /* delete symbol */
+ pre = pt = Symbols;
+ while( pt && strcmp(pt->symbol,curassoc->ssymbol) ) {
+ pre = pt;
+ pt = pt->nextassoc;
+ }
+ if ( pt )
+ if ( pre == pt )
+ Symbols = pt->nextassoc;
+ else
+ pre->nextassoc = pt->nextassoc;
+ else
+ warn("can't delete nonexisting symbol %s",curassoc->ssymbol);
+ }
+ else {
+ if (curassoc->revno[0]) {
+ p = 0;
+ if (expandsym(curassoc->revno, &numrev))
+ p = fstr_save(numrev.string);
+ } else if (!(p = tiprev()))
+ error("no latest revision to associate with symbol %s",
+ curassoc->ssymbol
+ );
+ if (p)
+ VOID addsymbol(p, curassoc->ssymbol, curassoc->override);
+ }
+ curassoc = curassoc->nextsym;
+ }
+
+}
+
+
+
+ static void
+dolocks()
+/* Function: remove lock for caller or first lock if unlockcaller is set;
+ * remove locks which are stored in rmvlocklst,
+ * add new locks which are stored in newlocklst,
+ * add lock for Dbranch or Head if lockhead is set.
+ */
+{
+ struct Lockrev const *lockpt;
+ struct hshentry *target;
+
+ if (unlockcaller) { /* find lock for caller */
+ if ( Head ) {
+ if (Locks) {
+ switch (findlock(true, &target)) {
+ case 0:
+ breaklock(Locks->delta); /* remove most recent lock */
+ break;
+ case 1:
+ diagnose("%s unlocked\n",target->num);
+ break;
+ }
+ } else {
+ warn("No locks are set.");
+ }
+ } else {
+ warn("can't unlock an empty tree");
+ }
+ }
+
+ /* remove locks which are stored in rmvlocklst */
+ lockpt = rmvlocklst;
+ while( lockpt ) {
+ if (expandsym(lockpt->revno, &numrev)) {
+ target = genrevs(numrev.string, (char *)nil, (char *)nil, (char *)nil, &gendeltas);
+ if ( target )
+ if (!(countnumflds(numrev.string)&1) && cmpnum(target->num,numrev.string))
+ error("can't unlock nonexisting revision %s",lockpt->revno);
+ else
+ breaklock(target);
+ /* breaklock does its own diagnose */
+ }
+ lockpt = lockpt->nextrev;
+ }
+
+ /* add new locks which stored in newlocklst */
+ lockpt = newlocklst;
+ while( lockpt ) {
+ setlock(lockpt->revno);
+ lockpt = lockpt->nextrev;
+ }
+
+ if (lockhead) { /* lock default branch or head */
+ if (Dbranch) {
+ setlock(Dbranch);
+ } else if (Head) {
+ if (0 <= addlock(Head))
+ diagnose("%s locked\n",Head->num);
+ } else {
+ warn("can't lock an empty tree");
+ }
+ }
+
+}
+
+
+
+ static void
+setlock(rev)
+ char const *rev;
+/* Function: Given a revision or branch number, finds the corresponding
+ * delta and locks it for caller.
+ */
+{
+ struct hshentry *target;
+
+ if (expandsym(rev, &numrev)) {
+ target = genrevs(numrev.string, (char*)nil, (char*)nil,
+ (char*)nil, &gendeltas);
+ if ( target )
+ if (!(countnumflds(numrev.string)&1) && cmpnum(target->num,numrev.string))
+ error("can't lock nonexisting revision %s", numrev.string);
+ else
+ if (0 <= addlock(target))
+ diagnose("%s locked\n", target->num);
+ }
+}
+
+
+ static void
+domessages()
+{
+ struct hshentry *target;
+ struct Message *p;
+
+ for (p = messagelst; p; p = p->nextmessage)
+ if (
+ expandsym(p->revno, &numrev) &&
+ (target = genrevs(
+ numrev.string, (char*)0, (char*)0, (char*)0, &gendeltas
+ ))
+ )
+ target->log = p->message;
+}
+
+
+ static void
+rcs_setstate(rev,status)
+ char const *rev, *status;
+/* Function: Given a revision or branch number, finds the corresponding delta
+ * and sets its state to status.
+ */
+{
+ struct hshentry *target;
+
+ if (expandsym(rev, &numrev)) {
+ target = genrevs(numrev.string, (char*)nil, (char*)nil,
+ (char*)nil, &gendeltas);
+ if ( target )
+ if (!(countnumflds(numrev.string)&1) && cmpnum(target->num,numrev.string))
+ error("can't set state of nonexisting revision %s to %s",
+ numrev.string, status);
+ else
+ target->state = status;
+ }
+}
+
+
+
+
+
+ static int
+buildeltatext(deltas)
+ struct hshentries const *deltas;
+/* Function: put the delta text on frewrite and make necessary */
+/* change to delta text */
+{
+ register FILE *fcut; /* temporary file to rebuild delta tree */
+ char const *cutfilename, *diffilename;
+
+ cutfilename = nil;
+ cuttail->selector = false;
+ scanlogtext(deltas->first, false);
+ if ( cuthead ) {
+ cutfilename = maketemp(3);
+ if (!(fcut = fopen(cutfilename, FOPEN_W_WORK))) {
+ efaterror(cutfilename);
+ }
+
+ while (deltas->first != cuthead) {
+ deltas = deltas->rest;
+ scanlogtext(deltas->first, true);
+ }
+
+ snapshotedit(fcut);
+ Ofclose(fcut);
+ }
+
+ while (deltas->first != cuttail)
+ scanlogtext((deltas = deltas->rest)->first, true);
+ finishedit((struct hshentry *)nil, (FILE*)0, true);
+ Ozclose(&fcopy);
+
+ if ( cuthead ) {
+ diffilename = maketemp(0);
+ switch (run((char*)nil,diffilename,
+ DIFF DIFF_FLAGS, cutfilename, resultfile, (char*)nil
+ )) {
+ case DIFF_FAILURE: case DIFF_SUCCESS: break;
+ default: faterror ("diff failed");
+ }
+ return putdtext(cuttail->num,cuttail->log,diffilename,frewrite,true);
+ } else
+ return putdtext(cuttail->num,cuttail->log,resultfile,frewrite,false);
+}
+
+
+
+ static void
+buildtree()
+/* Function: actually removes revisions whose selector field */
+/* is false, and rebuilds the linkage of deltas. */
+/* asks for reconfirmation if deleting last revision*/
+{
+ struct hshentry * Delta;
+ struct branchhead *pt, *pre;
+
+ if ( cuthead )
+ if ( cuthead->next == delstrt )
+ cuthead->next = cuttail;
+ else {
+ pre = pt = cuthead->branches;
+ while( pt && pt->hsh != delstrt ) {
+ pre = pt;
+ pt = pt->nextbranch;
+ }
+ if ( cuttail )
+ pt->hsh = cuttail;
+ else if ( pt == pre )
+ cuthead->branches = pt->nextbranch;
+ else
+ pre->nextbranch = pt->nextbranch;
+ }
+ else {
+ if ( cuttail == nil && !quietflag) {
+ if (!yesorno(false, "Do you really want to delete all revisions? [ny](n): ")) {
+ error("No revision deleted");
+ Delta = delstrt;
+ while( Delta) {
+ Delta->selector = true;
+ Delta = Delta->next;
+ }
+ return;
+ }
+ }
+ Head = cuttail;
+ }
+ return;
+}
+
+#if lint
+/* This lets us lint everything all at once. */
+
+char const cmdid[] = "";
+
+#define go(p,e) {int p P((int,char**)); void e P((void)); if(*argv)return p(argc,argv);if(*argv[1])e();}
+
+ int
+main(argc, argv)
+ int argc;
+ char **argv;
+{
+ go(ciId, ciExit);
+ go(coId, coExit);
+ go(identId, identExit);
+ go(mergeId, mergeExit);
+ go(rcsId, exiterr);
+ go(rcscleanId, rcscleanExit);
+ go(rcsdiffId, rdiffExit);
+ go(rcsmergeId, rmergeExit);
+ go(rlogId, rlogExit);
+ return 0;
+}
+#endif
diff --git a/gnu/usr.bin/rcs/rcs/rcsfile.5 b/gnu/usr.bin/rcs/rcs/rcsfile.5
new file mode 100644
index 000000000000..d0dbbb80cbbb
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcs/rcsfile.5
@@ -0,0 +1,224 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: rcsfile.5,v 5.1 1991/08/19 03:13:55 eggert Exp $
+.ds r \s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.TH RCSFILE 5 \*(Dt GNU
+.SH NAME
+rcsfile \- format of RCS file
+.SH DESCRIPTION
+An \*r file's
+contents are described by the grammar
+below.
+.PP
+The text is free format: space, backspace, tab, newline, vertical
+tab, form feed, and carriage return (collectively,
+.IR "white space")
+have no significance except in strings.
+However, an \*r file must end in a newline character.
+.PP
+Strings are enclosed by
+.BR @ .
+If a string contains a
+.BR @ ,
+it must be doubled;
+otherwise, strings may contain arbitrary binary data.
+.PP
+The meta syntax uses the following conventions: `|' (bar) separates
+alternatives; `{' and `}' enclose optional phrases; `{' and `}*' enclose
+phrases that may be repeated zero or more times;
+`{' and '}+' enclose phrases that must appear at least once and may be
+repeated;
+Terminal symbols are in
+.BR boldface ;
+nonterminal symbols are in
+.IR italics .
+.LP
+.nr x \w'\f3branches\fP'
+.nr y \w'{ \f3comment\fP'
+.if \nx<\ny .nr x \ny
+.nr y \w'\f3{ branch\fP'
+.if \nx<\ny .nr x \ny
+.ta \w'\f2deltatext\fP 'u +\w'::= 'u +\nxu+\w' 'u
+.fc ~
+.nf
+\f2rcstext\fP ::= \f2admin\fP {\f2delta\fP}* \f2desc\fP {\f2deltatext\fP}*
+.LP
+\f2admin\fP ::= \f3head\fP {\f2num\fP}\f3;\fP
+ { \f3branch\fP {\f2num\fP}\f3;\fP }
+ \f3access\fP {\f2id\fP}*\f3;\fP
+ \f3symbols\fP {\f2id\fP \f3:\fP \f2num\fP}*\f3;\fP
+ \f3locks\fP {\f2id\fP \f3:\fP \f2num\fP}*\f3;\fP {\f3strict ;\fP}
+ { \f3comment\fP {\f2string\fP}\f3;\fP }
+ { \f3expand\fP {\f2string\fP}\f3;\fP }
+ { \f2newphrase\fP }*
+.LP
+\f2delta\fP ::= \f2num\fP
+ \f3date\fP \f2num\fP\f3;\fP
+ \f3author\fP \f2id\fP\f3;\fP
+ \f3state\fP {\f2id\fP}\f3;\fP
+ \f3branches\fP {\f2num\fP}*\f3;\fP
+ \f3next\fP {\f2num\fP}\f3;\fP
+ { \f2newphrase\fP }*
+.LP
+\f2desc\fP ::= \f3desc\fP \f2string\fP
+.LP
+\f2deltatext\fP ::= \f2num\fP
+ \f3log\fP \f2string\fP
+ { \f2newphrase\fP }*
+ \f3text\fP \f2string\fP
+.LP
+\f2num\fP ::= {\f2digit\fP{\f3.\fP}}+
+.LP
+\f2digit\fP ::= \f30\fP | \f31\fP | .\|.\|. | \f39\fP
+.LP
+\f2id\fP ::= \f2letter\fP{\f2idchar\fP}*
+.LP
+\f2letter\fP ::= any letter
+.LP
+\f2idchar\fP ::= any visible graphic character except \f2special\fP
+.LP
+\f2special\fP ::= \f3$\fP | \f3,\fP | \f3.\fP | \f3:\fP | \f3;\fP | \f3@\fP
+.LP
+\f2string\fP ::= \f3@\fP{any character, with \f3@\fP doubled}*\f3@\fP
+.LP
+\f2newphrase\fP ::= \f2id\fP \f2word\fP* \f3;\fP
+.LP
+\f2word\fP ::= \f2id\fP | \f2num\fP | \f2string\fP | \f3:\fP
+.fi
+.PP
+Identifiers are case sensitive. Keywords are in lower case only.
+The sets of keywords and identifiers may overlap.
+In most environments RCS uses the ISO 8859/1 encoding:
+letters are octal codes 101\-132, 141\-172, 300\-326, 330\-366 and 370-377,
+visible graphic characters are codes 041\-176 and 240\-377,
+and white space characters are codes 010\-015 and 040.
+.PP
+The
+.I newphrase
+productions in the grammar are reserved for future extensions
+to the format of \*r files.
+No
+.I newphrase
+will begin with any keyword already in use.
+.PP
+The
+.I delta
+nodes form a tree. All nodes whose numbers
+consist of a single pair
+(e.g., 2.3, 2.1, 1.3, etc.)
+are on the trunk, and are linked through the
+.B next
+field in order of decreasing numbers.
+The
+.B head
+field in the
+.I admin
+node points to the head of that sequence (i.e., contains
+the highest pair).
+The
+.B branch
+node in the admin node indicates the default
+branch (or revision) for most \*r operations.
+If empty, the default
+branch is the highest branch on the trunk.
+.PP
+All
+.I delta
+nodes whose numbers consist of
+.RI 2 n
+fields
+.RI ( n >=2)
+(e.g., 3.1.1.1, 2.1.2.2, etc.)
+are linked as follows.
+All nodes whose first
+.RI 2 n \-1
+number fields are identical are linked through the
+.B next
+field in order of increasing numbers.
+For each such sequence,
+the
+.I delta
+node whose number is identical to the first
+.RI 2 n \-2
+number fields of the deltas on that sequence is called the branchpoint.
+The
+.B branches
+field of a node contains a list of the
+numbers of the first nodes of all sequences for which it is a branchpoint.
+This list is ordered in increasing numbers.
+.LP
+.nf
+.vs 12
+.ne 38
+Example:
+.if t .in +0.5i
+.cs 1 20
+.eo
+
+ Head
+ |
+ |
+ v / \
+ --------- / \
+ / \ / \ | | / \ / \
+ / \ / \ | 2.1 | / \ / \
+ / \ / \ | | / \ / \
+/1.2.1.3\ /1.3.1.1\ | | /1.2.2.2\ /1.2.2.1.1.1\
+--------- --------- --------- --------- -------------
+ ^ ^ | ^ ^
+ | | | | |
+ | | v | |
+ / \ | --------- / \ |
+ / \ | \ 1.3 / / \ |
+ / \ ---------\ / / \-----------
+/1.2.1.1\ \ / /1.2.2.1\
+--------- \ / ---------
+ ^ | ^
+ | | |
+ | v |
+ | --------- |
+ | \ 1.2 / |
+ ----------------------\ /---------
+ \ /
+ \ /
+ |
+ |
+ v
+ ---------
+ \ 1.1 /
+ \ /
+ \ /
+ \ /
+
+.ec
+.if t .in
+.cs 1
+.ce
+Fig. 1: A revision tree
+.vs
+.fi
+.PP
+.SH IDENTIFICATION
+.de VL
+\\$2
+..
+Author: Walter F. Tichy,
+Purdue University, West Lafayette, IN, 47907.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH SEE ALSO
+ci(1), co(1), ident(1), rcs(1), rcsdiff(1), rcsmerge(1), rlog(1),
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
diff --git a/gnu/usr.bin/rcs/rcs/rcsintro.1 b/gnu/usr.bin/rcs/rcs/rcsintro.1
new file mode 100644
index 000000000000..a76caa0ee2d9
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcs/rcsintro.1
@@ -0,0 +1,292 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: rcsintro.1,v 5.1 1991/04/21 12:00:46 eggert Exp $
+.ds r \&\s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.am SS
+.LP
+..
+.TH RCSINTRO 1 \*(Dt GNU
+.SH NAME
+rcsintro \- introduction to RCS commands
+.SH DESCRIPTION
+The Revision Control System (\*r) manages multiple revisions of files.
+\*r automates the storing, retrieval, logging, identification, and merging
+of revisions. \*r is useful for text that is revised frequently, for example
+programs, documentation, graphics, papers, and form letters.
+.PP
+The basic user interface is extremely simple. The novice only needs
+to learn two commands:
+.BR ci (1)
+and
+.BR co (1).
+.BR ci ,
+short for \*(lqcheck in\*(rq, deposits the contents of a
+file into an archival file called an \*r file. An \*r file
+contains all revisions of a particular file.
+.BR co ,
+short for \*(lqcheck out\*(rq, retrieves revisions from an \*r file.
+.SS "Functions of \*r"
+.IP \(bu
+Store and retrieve multiple revisions of text. \*r saves all old
+revisions in a space efficient way.
+Changes no longer destroy the original, because the
+previous revisions remain accessible. Revisions can be retrieved according to
+ranges of revision numbers, symbolic names, dates, authors, and
+states.
+.IP \(bu
+Maintain a complete history of changes.
+\*r logs all changes automatically.
+Besides the text of each revision, \*r stores the author, the date and time of
+check-in, and a log message summarizing the change.
+The logging makes it easy to find out
+what happened to a module, without having to compare
+source listings or having to track down colleagues.
+.IP \(bu
+Resolve access conflicts. When two or more programmers wish to
+modify the same revision, \*r alerts the programmers and prevents one
+modification from corrupting the other.
+.IP \(bu
+Maintain a tree of revisions. \*r can maintain separate lines of development
+for each module. It stores a tree structure that represents the
+ancestral relationships among revisions.
+.IP \(bu
+Merge revisions and resolve conflicts.
+Two separate lines of development of a module can be coalesced by merging.
+If the revisions to be merged affect the same sections of code, \*r alerts the
+user about the overlapping changes.
+.IP \(bu
+Control releases and configurations.
+Revisions can be assigned symbolic names
+and marked as released, stable, experimental, etc.
+With these facilities, configurations of modules can be
+described simply and directly.
+.IP \(bu
+Automatically identify each revision with name, revision number,
+creation time, author, etc.
+The identification is like a stamp that can be embedded at an appropriate place
+in the text of a revision.
+The identification makes it simple to determine which
+revisions of which modules make up a given configuration.
+.IP \(bu
+Minimize secondary storage. \*r needs little extra space for
+the revisions (only the differences). If intermediate revisions are
+deleted, the corresponding deltas are compressed accordingly.
+.SS "Getting Started with \*r"
+Suppose you have a file
+.B f.c
+that you wish to put under control of \*r.
+If you have not already done so, make an \*r directory with the command
+.IP
+.B "mkdir RCS"
+.LP
+Then invoke the check-in command
+.IP
+.B "ci f.c"
+.LP
+This command creates an \*r file in the
+.B RCS
+directory,
+stores
+.B f.c
+into it as revision 1.1, and
+deletes
+.BR f.c .
+It also asks you for a description. The description
+should be a synopsis of the contents of the file. All later check-in
+commands will ask you for a log entry, which should summarize the
+changes that you made.
+.PP
+Files in the \*r directory are called \*r files;
+the others are called working files.
+To get back the working file
+.B f.c
+in the previous example, use the check-out
+command
+.IP
+.B "co f.c"
+.LP
+This command extracts the latest revision from the \*r file
+and writes
+it into
+.BR f.c .
+If you want to edit
+.BR f.c ,
+you must lock it as you check it out with the command
+.IP
+.B "co \-l f.c"
+.LP
+You can now edit
+.BR f.c .
+.PP
+Suppose after some editing you want to know what changes that you have made.
+The command
+.IP
+.B "rcsdiff f.c"
+.LP
+tells you the difference between the most recently checked-in version
+and the working file.
+You can check the file back in by invoking
+.IP
+.B "ci f.c"
+.LP
+This increments the revision number properly.
+.PP
+If
+.B ci
+complains with the message
+.IP
+.BI "ci error: no lock set by " "your name"
+.LP
+then you have tried to check in a file even though you did not
+lock it when you checked it out.
+Of course, it is too late now to do the check-out with locking, because
+another check-out would
+overwrite your modifications. Instead, invoke
+.IP
+.B "rcs \-l f.c"
+.LP
+This command will lock the latest revision for you, unless somebody
+else got ahead of you already. In this case, you'll have to negotiate with
+that person.
+.PP
+Locking assures that you, and only you, can check in the next update, and
+avoids nasty problems if several people work on the same file.
+Even if a revision is locked, it can still be checked out for
+reading, compiling, etc. All that locking
+prevents is a
+.I "check-in"
+by anybody but the locker.
+.PP
+If your \*r file is private, i.e., if you are the only person who is going
+to deposit revisions into it, strict locking is not needed and you
+can turn it off.
+If strict locking is turned off,
+the owner of the \*r file need not have a lock for check-in; all others
+still do. Turning strict locking off and on is done with the commands
+.IP
+.BR "rcs \-U f.c" " and " "rcs \-L f.c"
+.LP
+If you don't want to clutter your working directory with \*r files, create
+a subdirectory called
+.B RCS
+in your working directory, and move all your \*r
+files there. \*r commands will look first into that directory to find
+needed files. All the commands discussed above will still work, without any
+modification.
+(Actually, pairs of \*r and working files can be specified in three ways:
+(a) both are given, (b) only the working file is given, (c) only the
+\*r file is given. Both \*r and working files may have arbitrary path prefixes;
+\*r commands pair them up intelligently.)
+.PP
+To avoid the deletion of the working file during check-in (in case you want to
+continue editing or compiling), invoke
+.IP
+.BR "ci \-l f.c" " or " "ci \-u f.c"
+.LP
+These commands check in
+.B f.c
+as usual, but perform an implicit
+check-out. The first form also locks the checked in revision, the second one
+doesn't. Thus, these options save you one check-out operation.
+The first form is useful if you want to continue editing,
+the second one if you just want to read the file.
+Both update the identification markers in your working file (see below).
+.PP
+You can give
+.B ci
+the number you want assigned to a checked in
+revision. Assume all your revisions were numbered 1.1, 1.2, 1.3, etc.,
+and you would like to start release 2.
+The command
+.IP
+.BR "ci \-r2 f.c" " or " "ci \-r2.1 f.c"
+.LP
+assigns the number 2.1 to the new revision.
+From then on,
+.B ci
+will number the subsequent revisions
+with 2.2, 2.3, etc. The corresponding
+.B co
+commands
+.IP
+.BR "co \-r2 f.c" " and " "co \-r2.1 f.c"
+.PP
+retrieve the latest revision numbered
+.RI 2. x
+and the revision 2.1,
+respectively.
+.B co
+without a revision number selects
+the latest revision on the
+.IR trunk ,
+i.e. the highest
+revision with a number consisting of two fields. Numbers with more than two
+fields are needed for branches.
+For example, to start a branch at revision 1.3, invoke
+.IP
+.B "ci \-r1.3.1 f.c"
+.LP
+This command starts a branch numbered 1 at revision 1.3, and assigns
+the number 1.3.1.1 to the new revision. For more information about
+branches, see
+.BR rcsfile (5).
+.SS "Automatic Identification"
+\*r can put special strings for identification into your source and object
+code. To obtain such identification, place the marker
+.IP
+.B "$\&Id$"
+.LP
+into your text, for instance inside a comment.
+\*r will replace this marker with a string of the form
+.IP
+.BI $\&Id: " filename revision date time author state " $
+.LP
+With such a marker on the first page of each module, you can
+always see with which revision you are working.
+\*r keeps the markers up to date automatically.
+To propagate the markers into your object code, simply put
+them into literal character strings. In C, this is done as follows:
+.IP
+.ft 3
+static char rcsid[] = \&"$\&Id$\&";
+.ft
+.LP
+The command
+.B ident
+extracts such markers from any file, even object code
+and dumps.
+Thus,
+.B ident
+lets you find out
+which revisions of which modules were used in a given program.
+.PP
+You may also find it useful to put the marker
+.B $\&Log$
+into your text, inside a comment. This marker accumulates
+the log messages that are requested during check-in.
+Thus, you can maintain the complete history of your file directly inside it.
+There are several additional identification markers; see
+.BR co (1)
+for
+details.
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH "SEE ALSO"
+ci(1), co(1), ident(1), rcs(1), rcsdiff(1), rcsintro(1), rcsmerge(1), rlog(1)
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
+.br
diff --git a/gnu/usr.bin/rcs/rcsclean/Makefile b/gnu/usr.bin/rcs/rcsclean/Makefile
new file mode 100644
index 000000000000..fc0c62655275
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsclean/Makefile
@@ -0,0 +1,7 @@
+PROG= rcsclean
+
+SRCS= rcsclean.c
+LDADD= -L${.CURDIR}/../lib/obj -lrcs
+CFLAGS+= -I${.CURDIR}/../lib
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/rcs/rcsclean/rcsclean.1 b/gnu/usr.bin/rcs/rcsclean/rcsclean.1
new file mode 100644
index 000000000000..07ed7228b679
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsclean/rcsclean.1
@@ -0,0 +1,177 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: rcsclean.1,v 1.8 1991/11/03 01:09:19 eggert Exp $
+.ds r \&\s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.TH RCSCLEAN 1 \*(Dt GNU
+.SH NAME
+rcsclean \- clean up working files
+.SH SYNOPSIS
+.B rcsclean
+.RI [ options "] [ " file " .\|.\|. ]"
+.SH DESCRIPTION
+.B rcsclean
+removes working files that were checked out and never modified.
+For each
+.I file
+given,
+.B rcsclean
+compares the working file and a revision in the corresponding
+\*r file. If it finds a difference, it does nothing.
+Otherwise, it first unlocks the revision if the
+.B \-u
+option is given,
+and then removes the working file
+unless the working file is writable and the revision is locked.
+It logs its actions by outputting the corresponding
+.B "rcs \-u"
+and
+.B "rm \-f"
+commands on the standard output.
+.PP
+If no
+.I file
+is given, all working files in the current directory are cleaned.
+Pathnames matching an \*r suffix denote \*r files;
+all others denote working files.
+Names are paired as explained in
+.BR ci (1).
+.PP
+The number of the revision to which the working file is compared
+may be attached to any of the options
+.BR \-n ,
+.BR \-q ,
+.BR \-r ,
+or
+.BR \-u .
+If no revision number is specified, then if the
+.B \-u
+option is given and the caller has one revision locked,
+.B rcsclean
+uses that revision; otherwise
+.B rcsclean
+uses the latest revision on the default branch, normally the root.
+.PP
+.B rcsclean
+is useful for
+.B clean
+targets in Makefiles.
+See also
+.BR rcsdiff (1),
+which prints out the differences,
+and
+.BR ci (1),
+which
+normally asks whether to check in a file
+if it was not changed.
+.SH OPTIONS
+.TP
+.BI \-k subst
+Use
+.I subst
+style keyword substitution when retrieving the revision for comparison.
+See
+.BR co (1)
+for details.
+.TP
+.BR \-n [\f2rev\fP]
+Do not actually remove any files or unlock any revisions.
+Using this option will tell you what
+.B rcsclean
+would do without actually doing it.
+.TP
+.BR \-q [\f2rev\fP]
+Do not log the actions taken on standard output.
+.TP
+.BR \-r [\f2rev\fP]
+This option has no effect other than specifying the revision for comparison.
+.TP
+.BR \-u [\f2rev\fP]
+Unlock the revision if it is locked and no difference is found.
+.TP
+.BI \-V n
+Emulate \*r version
+.IR n .
+See
+.BR co (1)
+for details.
+.TP
+.BI \-x "suffixes"
+Use
+.I suffixes
+to characterize \*r files.
+See
+.BR ci (1)
+for details.
+.SH EXAMPLES
+.LP
+.RS
+.ft 3
+rcsclean *.c *.h
+.ft
+.RE
+.LP
+removes all working files ending in
+.B .c
+or
+.B .h
+that were not changed
+since their checkout.
+.LP
+.RS
+.ft 3
+rcsclean
+.ft
+.RE
+.LP
+removes all working files in the current directory
+that were not changed since their checkout.
+.SH FILES
+.B rcsclean
+accesses files much as
+.BR ci (1)
+does.
+.SH ENVIRONMENT
+.TP
+.B \s-1RCSINIT\s0
+options prepended to the argument list, separated by spaces.
+A backslash escapes spaces within an option.
+The
+.B \s-1RCSINIT\s0
+options are prepended to the argument lists of most \*r commands.
+Useful
+.B \s-1RCSINIT\s0
+options include
+.BR \-q ,
+.BR \-V ,
+and
+.BR \-x .
+.SH DIAGNOSTICS
+The exit status is zero if and only if all operations were successful.
+Missing working files and \*r files are silently ignored.
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH "SEE ALSO"
+ci(1), co(1), ident(1), rcs(1), rcsdiff(1), rcsintro(1), rcsmerge(1), rlog(1),
+rcsfile(5)
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
+.SH BUGS
+At least one
+.I file
+must be given in older Unix versions that
+do not provide the needed directory scanning operations.
+.br
diff --git a/gnu/usr.bin/rcs/rcsclean/rcsclean.c b/gnu/usr.bin/rcs/rcsclean/rcsclean.c
new file mode 100644
index 000000000000..ba24ab77e0b3
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsclean/rcsclean.c
@@ -0,0 +1,297 @@
+/* rcsclean - clean up working files */
+
+/* Copyright 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+#include "rcsbase.h"
+
+#if has_dirent
+ static int get_directory P((char const*,char***));
+#endif
+
+static int unlock P((struct hshentry *));
+static void cleanup P((void));
+
+static RILE *workptr;
+static int exitstatus;
+
+mainProg(rcscleanId, "rcsclean", "$Id: rcsclean.c,v 5.1 1991/11/03 01:11:44 eggert Exp $")
+{
+ static char const usage[] =
+ "\nrcsclean: usage: rcsclean [-ksubst] [-{nqru}[rev]] [-Vn] [-xsuffixes] [file ...]";
+
+ static struct buf revision;
+
+ char *a, **newargv;
+ char const *rev, *p;
+ int changelock, expmode, perform, unlocked, unlockflag, waslocked;
+ struct hshentries *deltas;
+ struct hshentry *delta;
+ struct stat workstat;
+
+ setrid();
+
+ expmode = -1;
+ rev = nil;
+ suffixes = X_DEFAULT;
+ perform = true;
+ unlockflag = false;
+
+ argc = getRCSINIT(argc, argv, &newargv);
+ argv = newargv;
+ for (;;) {
+ if (--argc <= 0) {
+# if has_dirent
+ argc = get_directory(".", &newargv);
+ argv = newargv;
+ break;
+# else
+ faterror("no file names specified");
+# endif
+ }
+ a = *++argv;
+ if (*a++ != '-')
+ break;
+ switch (*a++) {
+ case 'k':
+ if (0 <= expmode)
+ redefined('k');
+ if ((expmode = str2expmode(a)) < 0)
+ goto unknown;
+ break;
+
+ case 'n':
+ perform = false;
+ goto handle_revision;
+
+ case 'q':
+ quietflag = true;
+ /* fall into */
+ case 'r':
+ handle_revision:
+ if (*a) {
+ if (rev)
+ warn("redefinition of revision number");
+ rev = a;
+ }
+ break;
+
+ case 'u':
+ unlockflag = true;
+ goto handle_revision;
+
+ case 'V':
+ setRCSversion(*argv);
+ break;
+
+ case 'x':
+ suffixes = a;
+ break;
+
+ default:
+ unknown:
+ faterror("unknown option: %s%s", *argv, usage);
+ }
+ }
+
+ do {
+ ffree();
+
+ if (!(
+ 0 < pairfilenames(
+ argc, argv,
+ unlockflag&perform ? rcswriteopen : rcsreadopen,
+ true, true
+ ) &&
+ (workptr = Iopen(workfilename,FOPEN_R_WORK,&workstat))
+ ))
+ continue;
+
+ gettree();
+
+ p = 0;
+ if (rev) {
+ if (!fexpandsym(rev, &revision, workptr))
+ continue;
+ p = revision.string;
+ } else if (Head)
+ switch (unlockflag ? findlock(false,&delta) : 0) {
+ default:
+ continue;
+ case 0:
+ p = Dbranch ? Dbranch : "";
+ break;
+ case 1:
+ p = delta->num;
+ break;
+ }
+ delta = 0;
+ deltas = 0; /* Keep lint happy. */
+ if (p && !(delta = genrevs(p,(char*)0,(char*)0,(char*)0,&deltas)))
+ continue;
+
+ waslocked = delta && delta->lockedby;
+ locker_expansion = unlock(delta);
+ unlocked = locker_expansion & unlockflag;
+ changelock = unlocked & perform;
+ if (unlocked<waslocked && workstat.st_mode&(S_IWUSR|S_IWGRP|S_IWOTH))
+ continue;
+
+ if (!dorewrite(unlockflag, changelock))
+ continue;
+
+ if (0 <= expmode)
+ Expand = expmode;
+ else if (
+ waslocked &&
+ Expand == KEYVAL_EXPAND &&
+ WORKMODE(RCSstat.st_mode,true) == workstat.st_mode
+ )
+ Expand = KEYVALLOCK_EXPAND;
+
+ getdesc(false);
+
+ if (
+ !delta ? workstat.st_size!=0 :
+ 0 < rcsfcmp(
+ workptr, &workstat,
+ buildrevision(deltas, delta, (FILE*)0, false),
+ delta
+ )
+ )
+ continue;
+
+ if (quietflag < unlocked)
+ aprintf(stdout, "rcs -u%s %s\n", delta->num, RCSfilename);
+
+ if_advise_access(changelock && deltas->first != delta,
+ finptr, MADV_SEQUENTIAL
+ );
+ if (!donerewrite(changelock))
+ continue;
+
+ if (!quietflag)
+ aprintf(stdout, "rm -f %s\n", workfilename);
+ Izclose(&workptr);
+ if (perform && un_link(workfilename) != 0)
+ eerror(workfilename);
+
+ } while (cleanup(), ++argv, 0 < --argc);
+
+ tempunlink();
+ if (!quietflag)
+ Ofclose(stdout);
+ exitmain(exitstatus);
+}
+
+ static void
+cleanup()
+{
+ if (nerror) exitstatus = EXIT_FAILURE;
+ Izclose(&finptr);
+ Izclose(&workptr);
+ Ozclose(&fcopy);
+ Ozclose(&frewrite);
+ dirtempunlink();
+}
+
+#if lint
+# define exiterr rcscleanExit
+#endif
+ exiting void
+exiterr()
+{
+ dirtempunlink();
+ tempunlink();
+ _exit(EXIT_FAILURE);
+}
+
+ static int
+unlock(delta)
+ struct hshentry *delta;
+{
+ register struct lock **al, *l;
+
+ if (delta && delta->lockedby && strcmp(getcaller(),delta->lockedby)==0)
+ for (al = &Locks; (l = *al); al = &l->nextlock)
+ if (l->delta == delta) {
+ *al = l->nextlock;
+ delta->lockedby = 0;
+ return true;
+ }
+ return false;
+}
+
+#if has_dirent
+ static int
+get_directory(dirname, aargv)
+ char const *dirname;
+ char ***aargv;
+/*
+ * Put a vector of all DIRNAME's directory entries names into *AARGV.
+ * Ignore names of RCS files.
+ * Yield the number of entries found. Terminate the vector with 0.
+ * Allocate the storage for the vector and entry names.
+ * Do not sort the names. Do not include '.' and '..'.
+ */
+{
+ int i, entries = 0, entries_max = 64;
+ size_t chars = 0, chars_max = 1024;
+ size_t *offset = tnalloc(size_t, entries_max);
+ char *a = tnalloc(char, chars_max), **p;
+ DIR *d;
+ struct dirent *e;
+
+ if (!(d = opendir(dirname)))
+ efaterror(dirname);
+ while ((errno = 0, e = readdir(d))) {
+ char const *en = e->d_name;
+ size_t s = strlen(en) + 1;
+ if (en[0]=='.' && (!en[1] || en[1]=='.' && !en[2]))
+ continue;
+ if (rcssuffix(en))
+ continue;
+ while (chars_max < s + chars)
+ a = trealloc(char, a, chars_max<<=1);
+ if (entries == entries_max)
+ offset = trealloc(size_t, offset, entries_max<<=1);
+ offset[entries++] = chars;
+ VOID strcpy(a+chars, en);
+ chars += s;
+ }
+ if (errno || closedir(d) != 0)
+ efaterror(dirname);
+ if (chars)
+ a = trealloc(char, a, chars);
+ else
+ tfree(a);
+ *aargv = p = tnalloc(char*, entries+1);
+ for (i=0; i<entries; i++)
+ *p++ = a + offset[i];
+ *p = 0;
+ tfree(offset);
+ return entries;
+}
+#endif
diff --git a/gnu/usr.bin/rcs/rcsdiff/Makefile b/gnu/usr.bin/rcs/rcsdiff/Makefile
new file mode 100644
index 000000000000..837c241dbcf8
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsdiff/Makefile
@@ -0,0 +1,7 @@
+PROG= rcsdiff
+
+SRCS= rcsdiff.c
+LDADD= -L${.CURDIR}/../lib/obj -lrcs
+CFLAGS+= -I${.CURDIR}/../lib
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/rcs/rcsdiff/rcsdiff.1 b/gnu/usr.bin/rcs/rcsdiff/rcsdiff.1
new file mode 100644
index 000000000000..b78bbdd17c1f
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsdiff/rcsdiff.1
@@ -0,0 +1,152 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: rcsdiff.1,v 5.3 1991/04/21 12:00:46 eggert Exp $
+.ds r \&\s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.TH RCSDIFF 1 \*(Dt GNU
+.SH NAME
+rcsdiff \- compare RCS revisions
+.SH SYNOPSIS
+.B rcsdiff
+[
+.BI \-k subst
+] [
+.B \-q
+] [
+.BI \-r rev1
+[
+.BI \-r rev2
+] ] [
+.BI \-V n
+] [
+.BI \-x suffixes
+] [
+.I "diff options"
+]
+.I "file .\|.\|."
+.SH DESCRIPTION
+.B rcsdiff
+runs
+.BR diff (1)
+to compare two revisions of each \*r file given.
+.PP
+Pathnames matching an \*r suffix denote \*r files;
+all others denote working files.
+Names are paired as explained in
+.BR ci (1).
+.PP
+The option
+.B \-q
+suppresses diagnostic output.
+Zero, one, or two revisions may be specified with
+.BR \-r .
+The option
+.BI \-k subst
+affects keyword substitution when extracting
+revisions, as described in
+.BR co (1);
+for example,
+.B "\-kk\ \-r1.1\ \-r1.2"
+ignores differences in keyword values when comparing revisions
+.B 1.1
+and
+.BR 1.2 .
+To avoid excess output from locker name substitution,
+.B \-kkvl
+is assumed if (1) at most one revision option is given,
+(2) no
+.B \-k
+option is given, (3)
+.B \-kkv
+is the default keyword substitution, and
+(4) the working file's mode would be produced by
+.BR "co\ \-l".
+See
+.BR co (1)
+for details
+about
+.B \-V
+and
+.BR \-x .
+Otherwise, all options of
+.BR diff (1)
+that apply to regular files are accepted, with the same meaning as for
+.BR diff .
+.PP
+If both
+.I rev1
+and
+.I rev2
+are omitted,
+.B rcsdiff
+compares the latest revision on the
+default branch (by default the trunk)
+with the contents of the corresponding working file. This is useful
+for determining what you changed since the last checkin.
+.PP
+If
+.I rev1
+is given, but
+.I rev2
+is omitted,
+.B rcsdiff
+compares revision
+.I rev1
+of the \*r file with
+the contents of the corresponding working file.
+.PP
+If both
+.I rev1
+and
+.I rev2
+are given,
+.B rcsdiff
+compares revisions
+.I rev1
+and
+.I rev2
+of the \*r file.
+.PP
+Both
+.I rev1
+and
+.I rev2
+may be given numerically or symbolically.
+.SH EXAMPLE
+The command
+.LP
+.B " rcsdiff f.c"
+.LP
+compares the latest revision on the default branch of the \*r file
+to the contents of the working file
+.BR f.c .
+.SH ENVIRONMENT
+.TP
+.B \s-1RCSINIT\s0
+options prepended to the argument list, separated by spaces.
+See
+.BR ci (1)
+for details.
+.SH DIAGNOSTICS
+Exit status is 0 for no differences during any comparison,
+1 for some differences, 2 for trouble.
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH "SEE ALSO"
+ci(1), co(1), diff(1), ident(1), rcs(1), rcsintro(1), rcsmerge(1), rlog(1)
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
+.br
diff --git a/gnu/usr.bin/rcs/rcsdiff/rcsdiff.c b/gnu/usr.bin/rcs/rcsdiff/rcsdiff.c
new file mode 100644
index 000000000000..7155c8d89b8c
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsdiff/rcsdiff.c
@@ -0,0 +1,422 @@
+/*
+ * RCS rcsdiff operation
+ */
+/*****************************************************************************
+ * generate difference between RCS revisions
+ *****************************************************************************
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+
+/* $Log: rcsdiff.c,v $
+ * Revision 5.10 1991/10/07 17:32:46 eggert
+ * Remove lint.
+ *
+ * Revision 5.9 1991/08/19 03:13:55 eggert
+ * Add RCSINIT, -r$. Tune.
+ *
+ * Revision 5.8 1991/04/21 11:58:21 eggert
+ * Add -x, RCSINIT, MS-DOS support.
+ *
+ * Revision 5.7 1990/12/13 06:54:07 eggert
+ * GNU diff 1.15 has -u.
+ *
+ * Revision 5.6 1990/11/01 05:03:39 eggert
+ * Remove unneeded setid check.
+ *
+ * Revision 5.5 1990/10/04 06:30:19 eggert
+ * Accumulate exit status across files.
+ *
+ * Revision 5.4 1990/09/27 01:31:43 eggert
+ * Yield 1, not EXIT_FAILURE, when diffs are found.
+ *
+ * Revision 5.3 1990/09/11 02:41:11 eggert
+ * Simplify -kkvl test.
+ *
+ * Revision 5.2 1990/09/04 17:07:19 eggert
+ * Diff's argv was too small by 1.
+ *
+ * Revision 5.1 1990/08/29 07:13:55 eggert
+ * Add -kkvl.
+ *
+ * Revision 5.0 1990/08/22 08:12:46 eggert
+ * Add -k, -V. Don't use access(). Add setuid support.
+ * Remove compile-time limits; use malloc instead.
+ * Don't pass arguments with leading '+' to diff; GNU DIFF treats them as options.
+ * Add GNU diff's flags. Make lock and temp files faster and safer.
+ * Ansify and Posixate.
+ *
+ * Revision 4.6 89/05/01 15:12:27 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.5 88/08/09 19:12:41 eggert
+ * Use execv(), not system(); yield exit status like diff(1)s; allow cc -R.
+ *
+ * Revision 4.4 87/12/18 11:37:46 narten
+ * changes Jay Lepreau made in the 4.3 BSD version, to add support for
+ * "-i", "-w", and "-t" flags and to permit flags to be bundled together,
+ * merged in.
+ *
+ * Revision 4.3 87/10/18 10:31:42 narten
+ * Updating version numbers. Changes relative to 1.1 actually
+ * relative to 4.1
+ *
+ * Revision 1.3 87/09/24 13:59:21 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:22:15 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/05/03 22:13:19 wft
+ * Added default branch, option -q, exit status like diff.
+ * Added fterror() to replace faterror().
+ *
+ * Revision 3.6 83/01/15 17:52:40 wft
+ * Expanded mainprogram to handle multiple RCS files.
+ *
+ * Revision 3.5 83/01/06 09:33:45 wft
+ * Fixed passing of -c (context) option to diff.
+ *
+ * Revision 3.4 82/12/24 15:28:38 wft
+ * Added call to catchsig().
+ *
+ * Revision 3.3 82/12/10 16:08:17 wft
+ * Corrected checking of return code from diff; improved error msgs.
+ *
+ * Revision 3.2 82/12/04 13:20:09 wft
+ * replaced getdelta() with gettree(). Changed diagnostics.
+ *
+ * Revision 3.1 82/11/28 19:25:04 wft
+ * Initial revision.
+ *
+ */
+#include "rcsbase.h"
+
+#if DIFF_L
+static char const *setup_label P((struct buf*,char const*,char const[datesize]));
+#endif
+static void cleanup P((void));
+
+static int exitstatus;
+static RILE *workptr;
+static struct stat workstat;
+
+mainProg(rcsdiffId, "rcsdiff", "$Id: rcsdiff.c,v 5.10 1991/10/07 17:32:46 eggert Exp $")
+{
+ static char const cmdusage[] =
+ "\nrcsdiff usage: rcsdiff [-q] [-rrev1 [-rrev2]] [-Vn] [diff options] file ...";
+
+ int revnums; /* counter for revision numbers given */
+ char const *rev1, *rev2; /* revision numbers from command line */
+ char const *xrev1, *xrev2; /* expanded revision numbers */
+ char const *expandarg, *lexpandarg, *versionarg;
+#if DIFF_L
+ static struct buf labelbuf[2];
+ int file_labels;
+ char const **diff_label1, **diff_label2;
+ char date2[datesize];
+#endif
+ char const *cov[9];
+ char const **diffv, **diffp; /* argv for subsidiary diff */
+ char const **pp, *p, *diffvstr;
+ struct buf commarg;
+ struct buf numericrev; /* expanded revision number */
+ struct hshentries *gendeltas; /* deltas to be generated */
+ struct hshentry * target;
+ char *a, *dcp, **newargv;
+ register c;
+
+ exitstatus = DIFF_SUCCESS;
+
+ bufautobegin(&commarg);
+ bufautobegin(&numericrev);
+ revnums = 0;
+ rev1 = rev2 = xrev2 = nil;
+#if DIFF_L
+ file_labels = 0;
+#endif
+ expandarg = versionarg = 0;
+ suffixes = X_DEFAULT;
+
+ /* Room for args + 2 i/o [+ 2 labels] + 1 file + 1 trailing null. */
+ diffp = diffv = tnalloc(char const*, argc + 4 + 2*DIFF_L);
+ *diffp++ = nil;
+ *diffp++ = nil;
+ *diffp++ = DIFF;
+
+ argc = getRCSINIT(argc, argv, &newargv);
+ argv = newargv;
+ while (a = *++argv, 0<--argc && *a++=='-') {
+ dcp = a;
+ while (c = *a++) switch (c) {
+ case 'r':
+ switch (++revnums) {
+ case 1: rev1=a; break;
+ case 2: rev2=a; break;
+ default: faterror("too many revision numbers");
+ }
+ goto option_handled;
+#if DIFF_L
+ case 'L':
+ if (++file_labels == 2)
+ faterror("too many -L options");
+ /* fall into */
+#endif
+ case 'C': case 'D': case 'F': case 'I':
+ *dcp++ = c;
+ if (*a)
+ do *dcp++ = *a++;
+ while (*a);
+ else {
+ if (!--argc)
+ faterror("-%c needs following argument%s",
+ c, cmdusage
+ );
+ *diffp++ = *argv++;
+ }
+ break;
+ case 'B': case 'H': case 'T':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'h': case 'i': case 'n': case 'p':
+ case 't': case 'u': case 'w':
+ *dcp++ = c;
+ break;
+ case 'q':
+ quietflag=true;
+ break;
+ case 'x':
+ suffixes = *argv + 2;
+ goto option_handled;
+ case 'V':
+ versionarg = *argv;
+ setRCSversion(versionarg);
+ goto option_handled;
+ case 'k':
+ expandarg = *argv;
+ if (0 <= str2expmode(expandarg+2))
+ goto option_handled;
+ /* fall into */
+ default:
+ faterror("unknown option: %s%s", *argv, cmdusage);
+ };
+ option_handled:
+ if (dcp != *argv+1) {
+ *dcp = 0;
+ *diffp++ = *argv;
+ }
+ } /* end of option processing */
+
+ if (argc<1) faterror("no input file%s", cmdusage);
+
+ for (pp = diffv+3, c = 0; pp<diffp; )
+ c += strlen(*pp++) + 1;
+ diffvstr = a = tnalloc(char, c + 1);
+ for (pp = diffv+3; pp<diffp; ) {
+ p = *pp++;
+ *a++ = ' ';
+ while ((*a = *p++))
+ a++;
+ }
+ *a = 0;
+
+#if DIFF_L
+ diff_label1 = diff_label2 = nil;
+ if (file_labels < 2) {
+ if (!file_labels)
+ diff_label1 = diffp++;
+ diff_label2 = diffp++;
+ }
+#endif
+ diffp[2] = nil;
+
+ cov[0] = 0;
+ cov[2] = CO;
+ cov[3] = "-q";
+
+ /* now handle all filenames */
+ do {
+ ffree();
+
+ if (pairfilenames(argc, argv, rcsreadopen, true, false) <= 0)
+ continue;
+ diagnose("===================================================================\nRCS file: %s\n",RCSfilename);
+ if (!rev2) {
+ /* Make sure work file is readable, and get its status. */
+ if (!(workptr = Iopen(workfilename,FOPEN_R_WORK,&workstat))) {
+ eerror(workfilename);
+ continue;
+ }
+ }
+
+
+ gettree(); /* reads in the delta tree */
+
+ if (Head==nil) {
+ error("no revisions present");
+ continue;
+ }
+ if (revnums==0 || !*rev1)
+ rev1 = Dbranch ? Dbranch : Head->num;
+
+ if (!fexpandsym(rev1, &numericrev, workptr)) continue;
+ if (!(target=genrevs(numericrev.string,(char *)nil,(char *)nil,(char *)nil,&gendeltas))) continue;
+ xrev1=target->num;
+#if DIFF_L
+ if (diff_label1)
+ *diff_label1 = setup_label(&labelbuf[0], target->num, target->date);
+#endif
+
+ lexpandarg = expandarg;
+ if (revnums==2) {
+ if (!fexpandsym(
+ *rev2 ? rev2 : Dbranch ? Dbranch : Head->num,
+ &numericrev,
+ workptr
+ ))
+ continue;
+ if (!(target=genrevs(numericrev.string,(char *)nil,(char *)nil,(char *)nil,&gendeltas))) continue;
+ xrev2=target->num;
+ } else if (
+ target->lockedby
+ && !lexpandarg
+ && Expand == KEYVAL_EXPAND
+ && WORKMODE(RCSstat.st_mode,true) == workstat.st_mode
+ )
+ lexpandarg = "-kkvl";
+ Izclose(&workptr);
+#if DIFF_L
+ if (diff_label2)
+ if (revnums == 2)
+ *diff_label2 = setup_label(&labelbuf[1], target->num, target->date);
+ else {
+ time2date(workstat.st_mtime, date2);
+ *diff_label2 = setup_label(&labelbuf[1], workfilename, date2);
+ }
+#endif
+
+ diagnose("retrieving revision %s\n", xrev1);
+ bufscpy(&commarg, "-p");
+ bufscat(&commarg, xrev1);
+
+ cov[1] = diffp[0] = maketemp(0);
+ pp = &cov[4];
+ *pp++ = commarg.string;
+ if (lexpandarg)
+ *pp++ = lexpandarg;
+ if (versionarg)
+ *pp++ = versionarg;
+ *pp++ = RCSfilename;
+ *pp = 0;
+
+ if (runv(cov)) {
+ error("co failed");
+ continue;
+ }
+ if (!rev2) {
+ diffp[1] = workfilename;
+ if (workfilename[0] == '+') {
+ /* Some diffs have options with leading '+'. */
+ char *dp = ftnalloc(char, strlen(workfilename)+3);
+ diffp[1] = dp;
+ *dp++ = '.';
+ *dp++ = SLASH;
+ VOID strcpy(dp, workfilename);
+ }
+ } else {
+ diagnose("retrieving revision %s\n",xrev2);
+ bufscpy(&commarg, "-p");
+ bufscat(&commarg, xrev2);
+ cov[1] = diffp[1] = maketemp(1);
+ cov[4] = commarg.string;
+ if (runv(cov)) {
+ error("co failed");
+ continue;
+ }
+ }
+ if (!rev2)
+ diagnose("diff%s -r%s %s\n", diffvstr, xrev1, workfilename);
+ else
+ diagnose("diff%s -r%s -r%s\n", diffvstr, xrev1, xrev2);
+
+ switch (runv(diffv)) {
+ case DIFF_SUCCESS:
+ break;
+ case DIFF_FAILURE:
+ if (exitstatus == DIFF_SUCCESS)
+ exitstatus = DIFF_FAILURE;
+ break;
+ default:
+ error("diff failed");
+ }
+ } while (cleanup(),
+ ++argv, --argc >=1);
+
+
+ tempunlink();
+ exitmain(exitstatus);
+}
+
+ static void
+cleanup()
+{
+ if (nerror) exitstatus = DIFF_TROUBLE;
+ Izclose(&finptr);
+ Izclose(&workptr);
+}
+
+#if lint
+# define exiterr rdiffExit
+#endif
+ exiting void
+exiterr()
+{
+ tempunlink();
+ _exit(DIFF_TROUBLE);
+}
+
+#if DIFF_L
+ static char const *
+setup_label(b, name, date)
+ struct buf *b;
+ char const *name;
+ char const date[datesize];
+{
+ char *p;
+ size_t l = strlen(name) + 3;
+ bufalloc(b, l+datesize);
+ p = b->string;
+ VOID sprintf(p, "-L%s\t", name);
+ VOID date2str(date, p+l);
+ return p;
+}
+#endif
diff --git a/gnu/usr.bin/rcs/rcsfreeze/Makefile b/gnu/usr.bin/rcs/rcsfreeze/Makefile
new file mode 100644
index 000000000000..825d4bffec65
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsfreeze/Makefile
@@ -0,0 +1,7 @@
+# Do nothing for the following
+obj clean cleandir depend rcsfreeze all:
+ @echo No need to make $@ for rcsfreeze\; ignored
+
+install:
+ install -c -o bin -g bin -m 555 rcsfreeze.sh /usr/bin/rcsfreeze
+ install -c -o bin -g bin -m 444 rcsfreeze.1 /usr/share/man/man1
diff --git a/gnu/usr.bin/rcs/rcsfreeze/rcsfreeze.1 b/gnu/usr.bin/rcs/rcsfreeze/rcsfreeze.1
new file mode 100644
index 000000000000..be669a9f2a0d
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsfreeze/rcsfreeze.1
@@ -0,0 +1,68 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: rcsfreeze.1,v 4.4 1990/11/13 15:43:42 hammer Exp $
+.ds r \s-1RCS\s0
+.TH RCSFREEZE 1 \*(Dt GNU
+.SH NAME
+rcsfreeze \- freeze a configuration of sources checked in under RCS
+.SH SYNOPSIS
+.B rcsfreeze
+.RI [ "name" ]
+.SH DESCRIPTION
+.B rcsfreeze
+assigns a symbolic revision
+number to a set of \*r files that form a valid configuration.
+.PP
+The idea is to run
+.B rcsfreeze
+each time a new version is checked
+in. A unique symbolic name (\c
+.BI C_ number,
+where
+.I number
+is increased each time
+.B rcsfreeze
+is run) is then assigned to the most
+recent revision of each \*r file of the main trunk.
+.PP
+An optional
+.I name
+argument to
+.B rcsfreeze
+gives a symbolic name to the configuration.
+The unique identifier is still generated
+and is listed in the log file but it will not appear as
+part of the symbolic revision name in the actual \*r files.
+.PP
+A log message is requested from the user for future reference.
+.PP
+The shell script works only on all \*r files at one time.
+All changed files must be checked in already.
+Run
+.IR rcsclean (1)
+first and see whether any sources remain in the current directory.
+.SH FILES
+.TP
+.B RCS/.rcsfreeze.ver
+version number
+.TP
+.B RCS/.rcsfreeze.log
+log messages, most recent first
+.SH AUTHOR
+Stephan v. Bechtolsheim
+.SH "SEE ALSO"
+co(1), rcs(1), rcsclean(1), rlog(1)
+.SH BUGS
+.B rcsfreeze
+does not check whether any sources are checked out and modified.
+.PP
+Although both source file names and RCS file names are accepted,
+they are not paired as usual with RCS commands.
+.PP
+Error checking is rudimentary.
+.PP
+.B rcsfreeze
+is just an optional example shell script, and should not be taken too seriously.
+See \s-1CVS\s0 for a more complete solution.
diff --git a/gnu/usr.bin/rcs/rcsfreeze/rcsfreeze.sh b/gnu/usr.bin/rcs/rcsfreeze/rcsfreeze.sh
new file mode 100644
index 000000000000..421997946b76
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsfreeze/rcsfreeze.sh
@@ -0,0 +1,100 @@
+#! /bin/sh
+
+# rcsfreeze - assign a symbolic revision number to a configuration of RCS files
+
+# $Id: rcsfreeze.sh,v 4.4 1991/04/21 11:58:24 eggert Exp $
+
+# The idea is to run rcsfreeze each time a new version is checked
+# in. A unique symbolic revision number (C_[number], where number
+# is increased each time rcsfreeze is run) is then assigned to the most
+# recent revision of each RCS file of the main trunk.
+#
+# If the command is invoked with an argument, then this
+# argument is used as the symbolic name to freeze a configuration.
+# The unique identifier is still generated
+# and is listed in the log file but it will not appear as
+# part of the symbolic revision name in the actual RCS file.
+#
+# A log message is requested from the user which is saved for future
+# references.
+#
+# The shell script works only on all RCS files at one time.
+# It is important that all changed files are checked in (there are
+# no precautions against any error in this respect).
+# file names:
+# {RCS/}.rcsfreeze.ver version number
+# {RCS/}.rscfreeze.log log messages, most recent first
+
+PATH=/usr/gnu/bin:/usr/local/bin:/bin:/usr/bin:/usr/ucb:$PATH
+export PATH
+
+DATE=`date` || exit
+# Check whether we have an RCS subdirectory, so we can have the right
+# prefix for our paths.
+if [ -d RCS ]
+then RCSDIR=RCS/
+else RCSDIR=
+fi
+
+# Version number stuff, log message file
+VERSIONFILE=${RCSDIR}.rcsfreeze.ver
+LOGFILE=${RCSDIR}.rcsfreeze.log
+# Initialize, rcsfreeze never run before in the current directory
+[ -r $VERSIONFILE ] || { echo 0 >$VERSIONFILE && >>$LOGFILE; } || exit
+
+# Get Version number, increase it, write back to file.
+VERSIONNUMBER=`cat $VERSIONFILE` &&
+VERSIONNUMBER=`expr $VERSIONNUMBER + 1` &&
+echo $VERSIONNUMBER >$VERSIONFILE || exit
+
+# Symbolic Revision Number
+SYMREV=C_$VERSIONNUMBER
+# Allow the user to give a meaningful symbolic name to the revision.
+SYMREVNAME=${1-$SYMREV}
+echo >&2 "rcsfreeze: symbolic revision number computed: \"${SYMREV}\"
+rcsfreeze: symbolic revision number used: \"${SYMREVNAME}\"
+rcsfreeze: the two differ only when rcsfreeze invoked with argument
+rcsfreeze: give log message, summarizing changes (end with EOF or single '.')" \
+ || exit
+
+# Stamp the logfile. Because we order the logfile the most recent
+# first we will have to save everything right now in a temporary file.
+TMPLOG=/tmp/rcsfrz$$
+trap 'rm -f $TMPLOG; exit 1' 1 2 13 15
+# Now ask for a log message, continously add to the log file
+(
+ echo "Version: $SYMREVNAME($SYMREV), Date: $DATE
+-----------" || exit
+ while read MESS
+ do
+ case $MESS in
+ .) break
+ esac
+ echo " $MESS" || exit
+ done
+ echo "-----------
+" &&
+ cat $LOGFILE
+) >$TMPLOG &&
+
+# combine old and new logfiles
+cp $TMPLOG $LOGFILE &&
+rm -f $TMPLOG || exit
+trap 1 2 13 15
+
+# Now the real work begins by assigning a symbolic revision number
+# to each rcs file. Take the most recent version of the main trunk.
+
+status=
+
+for FILE in ${RCSDIR}*
+do
+# get the revision number of the most recent revision
+ HEAD=`rlog -h $FILE` &&
+ REV=`echo "$HEAD" | sed -n 's/^head:[ ]*//p'` &&
+# assign symbolic name to it.
+ echo >&2 "rcsfreeze: $REV $FILE" &&
+ rcs -q -n$SYMREVNAME:$REV $FILE || status=$?
+done
+
+exit $status
diff --git a/gnu/usr.bin/rcs/rcsmerge/Makefile b/gnu/usr.bin/rcs/rcsmerge/Makefile
new file mode 100644
index 000000000000..0c1f643891b8
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsmerge/Makefile
@@ -0,0 +1,7 @@
+PROG= rcsmerge
+
+SRCS= rcsmerge.c
+LDADD= -L${.CURDIR}/../lib/obj -lrcs
+CFLAGS+= -I${.CURDIR}/../lib
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/rcs/rcsmerge/rcsmerge.1 b/gnu/usr.bin/rcs/rcsmerge/rcsmerge.1
new file mode 100644
index 000000000000..82871b033bfd
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsmerge/rcsmerge.1
@@ -0,0 +1,140 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: rcsmerge.1,v 5.3 1991/08/19 03:13:55 eggert Exp $
+.ds r \&\s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.TH RCSMERGE 1 \*(Dt GNU
+.SH NAME
+rcsmerge \- merge RCS revisions
+.SH SYNOPSIS
+.B rcsmerge
+.RI [ options ] " file"
+.SH DESCRIPTION
+.B rcsmerge
+incorporates the changes between two revisions
+of an \*r file into the corresponding working file.
+.PP
+Pathnames matching an \*r suffix denote \*r files;
+all others denote working files.
+Names are paired as explained in
+.BR ci (1).
+.PP
+At least one revision must be specified with one of the options
+described below, usually
+.BR \-r .
+At most two revisions may be specified.
+If only one revision is specified, the latest
+revision on the default branch (normally the highest branch on the trunk)
+is assumed for the second revision.
+Revisions may be specified numerically or symbolically.
+.PP
+.B rcsmerge
+prints a warning if there are overlaps, and delimits
+the overlapping regions as explained in
+.BR merge (1).
+The command is useful for incorporating changes into a checked-out revision.
+.SH OPTIONS
+.TP
+.BI \-k subst
+Use
+.I subst
+style keyword substitution.
+See
+.BR co (1)
+for details.
+For example,
+.B "\-kk\ \-r1.1\ \-r1.2"
+ignores differences in keyword values when merging the changes from
+.B 1.1
+to
+.BR 1.2 .
+.TP
+.BR \-p [\f2rev\fP]
+Send the result to standard output instead of overwriting the working file.
+.TP
+.BR \-q [\f2rev\fP]
+Run quietly; do not print diagnostics.
+.TP
+.BR \-r [\f2rev\fP]
+Merge with respect to revision
+.IR rev .
+Here an empty
+.I rev
+stands for the latest revision on the default branch, normally the head.
+.TP
+.BI \-V n
+Emulate \*r version
+.IR n .
+See
+.BR co (1)
+for details.
+.TP
+.BI \-x "suffixes"
+Use
+.I suffixes
+to characterize \*r files.
+See
+.BR ci (1)
+for details.
+.SH EXAMPLES
+Suppose you have released revision 2.8 of
+.BR f.c .
+Assume
+furthermore that after you complete an unreleased revision 3.4, you receive
+updates to release 2.8 from someone else.
+To combine the updates to 2.8 and your changes between 2.8 and 3.4,
+put the updates to 2.8 into file f.c and execute
+.LP
+.B " rcsmerge \-p \-r2.8 \-r3.4 f.c >f.merged.c"
+.PP
+Then examine
+.BR f.merged.c .
+Alternatively, if you want to save the updates to 2.8 in the \*r file,
+check them in as revision 2.8.1.1 and execute
+.BR "co \-j":
+.LP
+.B " ci \-r2.8.1.1 f.c"
+.br
+.B " co \-r3.4 \-j2.8:2.8.1.1 f.c"
+.PP
+As another example, the following command undoes the changes
+between revision 2.4 and 2.8 in your currently checked out revision
+in
+.BR f.c .
+.LP
+.B " rcsmerge \-r2.8 \-r2.4 f.c"
+.PP
+Note the order of the arguments, and that
+.B f.c
+will be
+overwritten.
+.SH ENVIRONMENT
+.TP
+.B \s-1RCSINIT\s0
+options prepended to the argument list, separated by spaces.
+See
+.BR ci (1)
+for details.
+.SH DIAGNOSTICS
+Exit status is 0 for no overlaps, 1 for some overlaps, 2 for trouble.
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH "SEE ALSO"
+ci(1), co(1), ident(1), merge(1), rcs(1), rcsdiff(1), rcsintro(1), rlog(1),
+rcsfile(5)
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
+.br
diff --git a/gnu/usr.bin/rcs/rcsmerge/rcsmerge.c b/gnu/usr.bin/rcs/rcsmerge/rcsmerge.c
new file mode 100644
index 000000000000..e5d439452789
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcsmerge/rcsmerge.c
@@ -0,0 +1,252 @@
+/*
+ * rcsmerge operation
+ */
+/*****************************************************************************
+ * join 2 revisions with respect to a third
+ *****************************************************************************
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+/* $Log: rcsmerge.c,v $
+ * Revision 5.7 1991/11/20 17:58:09 eggert
+ * Don't Iopen(f, "r+"); it's not portable.
+ *
+ * Revision 5.6 1991/08/19 03:13:55 eggert
+ * Add -r$. Tune.
+ *
+ * Revision 5.5 1991/04/21 11:58:27 eggert
+ * Add -x, RCSINIT, MS-DOS support.
+ *
+ * Revision 5.4 1991/02/25 07:12:43 eggert
+ * Merging a revision to itself is no longer an error.
+ *
+ * Revision 5.3 1990/11/01 05:03:50 eggert
+ * Remove unneeded setid check.
+ *
+ * Revision 5.2 1990/09/04 08:02:28 eggert
+ * Check for I/O error when reading working file.
+ *
+ * Revision 5.1 1990/08/29 07:14:04 eggert
+ * Add -q. Pass -L options to merge.
+ *
+ * Revision 5.0 1990/08/22 08:13:41 eggert
+ * Propagate merge's exit status.
+ * Remove compile-time limits; use malloc instead.
+ * Make lock and temp files faster and safer. Ansify and Posixate. Add -V.
+ * Don't use access(). Tune.
+ *
+ * Revision 4.5 89/05/01 15:13:16 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.4 88/08/09 19:13:13 eggert
+ * Beware merging into a readonly file.
+ * Beware merging a revision to itself (no change).
+ * Use execv(), not system(); yield exit status like diff(1)'s.
+ *
+ * Revision 4.3 87/10/18 10:38:02 narten
+ * Updating version numbers. Changes relative to version 1.1
+ * actually relative to 4.1
+ *
+ * Revision 1.3 87/09/24 14:00:31 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:22:36 jenkins
+ * Port to suns
+ *
+ * Revision 4.1 83/03/28 11:14:57 wft
+ * Added handling of default branch.
+ *
+ * Revision 3.3 82/12/24 15:29:00 wft
+ * Added call to catchsig().
+ *
+ * Revision 3.2 82/12/10 21:32:02 wft
+ * Replaced getdelta() with gettree(); improved error messages.
+ *
+ * Revision 3.1 82/11/28 19:27:44 wft
+ * Initial revision.
+ *
+ */
+#include "rcsbase.h"
+
+static char const co[] = CO;
+
+mainProg(rcsmergeId, "rcsmerge", "$Id: rcsmerge.c,v 5.7 1991/11/20 17:58:09 eggert Exp $")
+{
+ static char const cmdusage[] =
+ "\nrcsmerge usage: rcsmerge -rrev1 [-rrev2] [-p] [-Vn] file";
+ static char const quietarg[] = "-q";
+
+ register int i;
+ char *a, **newargv;
+ char const *arg[3];
+ char const *rev[2]; /*revision numbers*/
+ char const *expandarg, *versionarg;
+ int tostdout;
+ int status;
+ RILE *workptr;
+ struct buf commarg;
+ struct buf numericrev; /* holds expanded revision number */
+ struct hshentries *gendeltas; /* deltas to be generated */
+ struct hshentry * target;
+
+ bufautobegin(&commarg);
+ bufautobegin(&numericrev);
+ rev[0] = rev[1] = nil;
+ status = 0; /* Keep lint happy. */
+ tostdout = false;
+ expandarg = versionarg = quietarg; /* i.e. a no-op */
+ suffixes = X_DEFAULT;
+
+ argc = getRCSINIT(argc, argv, &newargv);
+ argv = newargv;
+ while (a = *++argv, 0<--argc && *a++=='-') {
+ switch (*a++) {
+ case 'p':
+ tostdout=true;
+ goto revno;
+
+ case 'q':
+ quietflag = true;
+ revno:
+ if (!*a)
+ break;
+ /* falls into -r */
+ case 'r':
+ if (!rev[0])
+ rev[0] = a;
+ else if (!rev[1])
+ rev[1] = a;
+ else
+ faterror("too many revision numbers");
+ break;
+ case 'x':
+ suffixes = a;
+ break;
+ case 'V':
+ versionarg = *argv;
+ setRCSversion(versionarg);
+ break;
+
+ case 'k':
+ expandarg = *argv;
+ if (0 <= str2expmode(expandarg+2))
+ break;
+ /* fall into */
+ default:
+ faterror("unknown option: %s%s", *argv, cmdusage);
+ };
+ } /* end of option processing */
+
+ if (argc<1) faterror("no input file%s", cmdusage);
+ if (!rev[0]) faterror("no base revision number given");
+
+ /* now handle all filenames */
+
+ if (0 < pairfilenames(argc, argv, rcsreadopen, true, false)) {
+
+ if (argc>2 || (argc==2&&argv[1]!=nil))
+ warn("too many arguments");
+ diagnose("RCS file: %s\n", RCSfilename);
+ if (!(workptr = Iopen(workfilename,
+ FOPEN_R_WORK,
+ (struct stat*)0
+ )))
+ efaterror(workfilename);
+
+ gettree(); /* reads in the delta tree */
+
+ if (Head==nil) faterror("no revisions present");
+
+ if (!*rev[0])
+ rev[0] = Dbranch ? Dbranch : Head->num;
+ if (!fexpandsym(rev[0], &numericrev, workptr))
+ goto end;
+ if (!(target=genrevs(numericrev.string, (char *)nil, (char *)nil, (char *)nil,&gendeltas))) goto end;
+ rev[0] = target->num;
+ if (!rev[1] || !*rev[1])
+ rev[1] = Dbranch ? Dbranch : Head->num;
+ if (!fexpandsym(rev[1], &numericrev, workptr))
+ goto end;
+ if (!(target=genrevs(numericrev.string, (char *)nil, (char *)nil, (char *)nil,&gendeltas))) goto end;
+ rev[1] = target->num;
+
+ if (strcmp(rev[0],rev[1]) == 0) {
+ if (tostdout) {
+ FILE *o;
+# if text_equals_binary_stdio || text_work_stdio
+ o = stdout;
+# else
+ if (!(o=fdopen(STDOUT_FILENO,FOPEN_W_WORK)))
+ efaterror("stdout");
+# endif
+ fastcopy(workptr,o);
+ Ofclose(o);
+ }
+ goto end;
+ }
+ Izclose(&workptr);
+
+ for (i=0; i<2; i++) {
+ diagnose("retrieving revision %s\n", rev[i]);
+ bufscpy(&commarg, "-p");
+ bufscat(&commarg, rev[i]);
+ if (run(
+ (char*)0,
+ /* Do not collide with merger.c maketemp(). */
+ arg[i+1] = maketemp(i+3),
+ co, quietarg, commarg.string, expandarg,
+ versionarg, RCSfilename, (char*)0
+ ))
+ faterror("co failed");
+ }
+ diagnose("Merging differences between %s and %s into %s%s\n",
+ rev[0], rev[1], workfilename,
+ tostdout?"; result to stdout":"");
+
+ arg[0] = rev[0] = workfilename;
+ status = merge(tostdout, rev, arg);
+ }
+
+end:
+ Izclose(&workptr);
+ tempunlink();
+ exitmain(nerror ? DIFF_TROUBLE : status);
+}
+
+#if lint
+# define exiterr rmergeExit
+#endif
+ exiting void
+exiterr()
+{
+ tempunlink();
+ _exit(DIFF_TROUBLE);
+}
diff --git a/gnu/usr.bin/rcs/rcstest b/gnu/usr.bin/rcs/rcstest
new file mode 100755
index 000000000000..e0b6c828f7c5
--- /dev/null
+++ b/gnu/usr.bin/rcs/rcstest
@@ -0,0 +1,397 @@
+#!/bin/sh
+
+# Test RCS's functions.
+# The RCS commands are searched for in the PATH as usual;
+# to test the working directory's commands, prepend . to your PATH.
+
+# Test RCS by creating files RCS/a.* and RCS/a.c.
+# If all goes well, output nothing, and remove the temporary files.
+# Otherwise, send a message to standard output.
+# Exit status is 0 if OK, 1 if an RCS bug is found, and 2 if scaffolding fails.
+# With the -v option, output more debugging info.
+
+# If diff outputs `No differences encountered' when comparing identical files,
+# then rcstest may also output these noise lines; ignore them.
+
+# The current directory and ./RCS must be readable, writable, and searchable.
+
+# $Id: rcstest,v 5.8 1991/11/20 17:58:10 eggert Exp $
+
+
+# Copyright 1990, 1991 by Paul Eggert
+# Distributed under license by the Free Software Foundation, Inc.
+#
+# This file is part of RCS.
+#
+# RCS is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# RCS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with RCS; see the file COPYING. If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+# Report problems and direct all questions to:
+#
+# rcs-bugs@cs.purdue.edu
+
+RCSINIT=-x
+export RCSINIT
+
+SLASH=/
+RCSfile=RCS${SLASH}a.c
+RCS_alt=RCS${SLASH}a.d
+lockfile=RCS${SLASH}a._
+
+case $1 in
+-v) q=; set -x;;
+'') q=-q;;
+*) echo >&2 "$0: usage: $0 [-v]"; exit 2
+esac
+
+test -d RCS || {
+ echo >&2 "$0: RCS: not a directory; please \`mkdir RCS' first."
+ exit 1
+}
+
+rm -f a.* $RCSfile $RCS_alt $lockfile &&
+echo 1.1 >a.11 &&
+echo 1.1.1.1 >a.3x1 &&
+echo 1.2 >a.12 || { echo "#initialization failed"; exit 2; }
+
+case `diff -c a.11 a.3x1` in
+*'! 1.1.1.1')
+ diff='diff -c';;
+*)
+ echo "#warning: diff -c does not work, so diagnostics may be cryptic"
+ diff=diff
+esac
+
+rcs -i -L -ta.11 $q a.c &&
+<$RCSfile || {
+ echo "#rcs -i -L failed; perhaps RCS is not properly installed."
+ exit 1
+}
+
+rlog a.c >/dev/null || { echo "#rlog failed on empty RCS file"; exit 1; }
+rm -f $RCSfile || exit 2
+
+cp a.11 a.c &&
+ci -ta.11 -mm $q a.c &&
+<$RCSfile &&
+rcs -L $q a.c || { echo "#ci+rcs -L failed"; exit 1; }
+test ! -f a.c || { echo "#ci did not remove working file"; exit 1; }
+for l in '' '-l'
+do
+ co $l $q a.c &&
+ test -f a.c || { echo '#co' $l did not create working file; exit 1; }
+ $diff a.11 a.c || { echo '#ci' followed by co $l is not a no-op; exit 1; }
+done
+
+cp a.12 a.c &&
+ci -mm $q a.c &&
+co $q a.c &&
+$diff a.12 a.c || { echo "#ci+co failed"; exit 1; }
+
+co -r1.1 $q a.c &&
+$diff a.11 a.c || { echo "#can't retrieve first revision"; exit 1; }
+
+rm -f a.c &&
+cp a.3x1 a.c &&
+ci -r1.1.1 -mm $q a.c &&
+co -r1.1.1.1 $q a.c &&
+$diff a.3x1 a.c || { echo "#branches failed"; exit 1; }
+
+co -l $q a.c &&
+ci -f -mm $q a.c &&
+co -r1.3 $q a.c &&
+$diff a.12 a.c || { echo "#(co -l; ci -f) failed"; exit 1; }
+
+co -l $q a.c &&
+echo 1.4 >a.c &&
+ci -l -mm $q a.c &&
+echo error >a.c &&
+ci -mm $q a.c || { echo "#ci -l failed"; exit 1; }
+
+co -l $q a.c &&
+echo 1.5 >a.c &&
+ci -u -mm $q a.c &&
+<a.c || { echo "#ci -u didn't create a working file"; exit 1; }
+rm -f a.c &&
+echo error >a.c || exit 2
+ci -mm $q a.c 2>/dev/null && { echo "#ci -u didn't unlock the file"; exit 1; }
+
+rm -f a.c &&
+rcs -l $q a.c &&
+co -u $q a.c || { echo "#rcs -l + co -u failed"; exit 1; }
+rm -f a.c &&
+echo error >a.c || exit 2
+ci -mm $q a.c 2>/dev/null && { echo "#co -u didn't unlock the file"; exit 1; }
+
+rm -f a.c &&
+cp a.11 a.c &&
+co -f $q a.c || { echo "#co -f failed"; exit 1; }
+$diff a.11 a.c >/dev/null && { echo "#co -f had no effect"; exit 1; }
+
+co -p1.1 $q a.c >a.t &&
+$diff a.11 a.t || { echo "#co -p failed"; exit 1; }
+
+for n in n N
+do
+ rm -f a.c &&
+ co -l $q a.c &&
+ echo $n >a.$n &&
+ cp a.$n a.c &&
+ ci -${n}n -mm $q a.c &&
+ co -rn $q a.c &&
+ $diff a.$n a.c || { echo "#ci -$n failed"; exit 1; }
+done
+
+case $LOGNAME in
+?*) me=$LOGNAME;;
+*)
+ case $USER in
+ ?*) me=$USER;;
+ *)
+ me=`who am i` || exit 2
+ me=`echo "$me" | sed -e 's/ .*//' -e 's/.*!//'`
+ case $me in
+ '') echo >&2 "$0: cannot deduce user name"; exit 2
+ esac
+ esac
+esac
+date=`date -u 2>/dev/null` ||
+date=`TZ=GMT0 date 2>/dev/null` ||
+date=`TZ= date` || exit 2
+set $date
+case $2 in
+Jan) m=01;; Feb) m=02;; Mar) m=03;; Apr) m=04;; May) m=05;; Jun) m=06;;
+Jul) m=07;; Aug) m=08;; Sep) m=09;; Oct) m=10;; Nov) m=11;; Dec) m=12;;
+*) echo >&2 "$0: $2: unknown month name"; exit 2
+esac
+case $3 in
+?) d=0$3;;
+*) d=$3
+esac
+case $6 in
+[0-9][0-9][0-9][0-9]*) D=$6/$m/$d;;
+*)
+ case $5 in
+ [0-9][0-9][0-9][0-9]*) D=$5/$m/$d;;
+ *) echo >&2 "$0: bad date format: $date"; exit 2
+ esac
+esac
+T=$4
+case $PWD in
+'') PWD=`pwd`
+esac &&
+co -l $q a.c &&
+sed 's/@/$/g' >a.kv <<EOF
+@Author: w @
+@Date: $D $T @
+@Header: $PWD$SLASH$RCSfile 2.1 $D $T w s @
+@Id: a.c 2.1 $D $T w s @
+@Locker: @
+@Log: a.c @
+ * Revision 2.1 $D $T w
+ * m
+ *
+@RCSfile: a.c @
+@Revision: 2.1 @
+@Source: $PWD$SLASH$RCSfile @
+@State: s @
+EOF
+test $? = 0 &&
+sed 's/:.*\$/$/' a.kv >a.k &&
+sed -e 's/w s [$]/w s '"$me"' $/' -e 's/[$]Locker: /&'"$me/" a.kv >a.kvl &&
+sed -e '/^\$/!d' -e 's/\$$/: old $/' a.k >a.o &&
+sed -e 's/\$[^ ]*: //' -e 's/ \$//' a.kv >a.v &&
+cp a.o a.c &&
+ci -d"$date" -ss -ww -u2.1 -mm $q a.c &&
+$diff a.kv a.c || { echo "#keyword expansion failed"; exit 1; }
+co -p -ko $q a.c >a.oo &&
+$diff a.o a.oo || { echo "#co -p -ko failed"; exit 1; }
+cp a.kv a.o || exit 2
+rcs -o2.1 $q a.c &&
+rcs -l $q a.c &&
+ci -k -u $q a.c &&
+$diff a.kv a.c || { echo "#ci -k failed"; exit 1; }
+sed '/^[^$]/d' a.kv >a.i &&
+ident a.c >a.i1 &&
+sed -e 1d -e 's/^[ ]*//' a.i1 >a.i2 &&
+$diff a.i a.i2 || { echo "#ident failed"; exit 1; }
+
+rcs -i $q a.c 2>/dev/null && { echo "#rcs -i permitted existing file"; exit 1; }
+
+co -l $q a.c &&
+echo 2.2 >a.c &&
+ci -mm $q a.c &&
+echo 1.1.1.2 >a.c &&
+rcs -l1.1.1 $q a.c &&
+ci -r1.1.1.2 -mm $q a.c &&
+rcs -b1.1.1 $q a.c &&
+test " `co -p $q a.c`" = ' 1.1.1.2' || { echo "#rcs -b1.1.1 failed"; exit 1; }
+rcs -b $q a.c &&
+test " `co -p $q a.c`" = ' 2.2' || { echo "#rcs -b failed"; exit 1; }
+
+echo 2.3 >a.c || exit 2
+rcs -U $q a.c || { echo "#rcs -U failed"; exit 1; }
+ci -mm $q a.c || { echo "#rcs -U didn't unset strict locking"; exit 1; }
+rcs -L $q a.c || { echo "#rcs -L failed"; exit 1; }
+echo error >a.c || exit 2
+ci -mm $q a.c 2>/dev/null && { echo "#ci retest failed"; exit 1; }
+
+rm -f a.c &&
+log0=`rlog -h a.c` &&
+co -l $q a.c &&
+ci -mm $q a.c &&
+log1=`rlog -h a.c` &&
+test " $log0" = " $log1" || { echo "#unchanged ci didn't revert"; exit 1; }
+
+rm -f a.c &&
+rcs -nN:1.1 $q a.c &&
+co -rN $q a.c &&
+$diff a.11 a.c || { echo "#rcs -n failed"; exit 1; }
+
+rcs -NN:2.1 $q a.c &&
+co -rN $q a.c &&
+$diff a.kv a.c || { echo "#rcs -N failed"; exit 1; }
+
+co -l $q a.c &&
+rcs -c':::' $q a.c &&
+echo '$''Log$' >a.c &&
+ci -u -mm $q a.c &&
+test " `sed '$!d' a.c`" = ' :::' || { echo "#rcs -c failed"; exit 1; }
+
+rcs -o2.2: $q a.c &&
+co $q a.c &&
+$diff a.kv a.c || { echo "#rcs -o failed"; exit 1; }
+
+rcsdiff -r1.1 -r2.1 $q a.c >a.0
+case $? in
+1) ;;
+*) echo "#rcsdiff bad status"; exit 1
+esac
+diff a.11 a.kv >a.1
+$diff a.0 a.1 || { echo "#rcsdiff failed"; exit 1; }
+
+rcs -l2.1 $q a.c || { echo "#rcs -l2.1 failed"; exit 1; }
+for i in k kv kvl o v
+do
+ rm -f a.c &&
+ cp a.$i a.c &&
+ rcsdiff -k$i $q a.c || { echo "#rcsdiff -k$i failed"; exit 1; }
+done
+co -p1.1 -ko $q a.c >a.t &&
+$diff a.11 a.t || { echo "#co -p1.1 -ko failed"; exit 1; }
+rcs -u2.1 $q a.c || { echo "#rcs -u2.1 failed"; exit 1; }
+
+rm -f a.c &&
+co -l $q a.c &&
+cat >a.c <<'EOF'
+2.2
+a
+b
+c
+d
+EOF
+test $? = 0 &&
+ci -l -mm $q a.c &&
+co -p2.2 $q a.c | sed -e s/2.2/2.3/ -e s/b/b1/ >a.c &&
+ci -l -mm $q a.c &&
+co -p2.2 $q a.c | sed -e s/2.2/new/ -e s/d/d1/ >a.c || exit 2
+cat >a.0 <<'EOF'
+2.3
+a
+b1
+c
+d1
+EOF
+cat >a.1 <<'EOF'
+<<<<<<< a.c
+new
+=======
+2.3
+>>>>>>> 2.3
+a
+b1
+c
+d1
+EOF
+rcsmerge -r2.2 -r2.3 $q a.c
+case $? in
+0)
+ if $diff a.0 a.c >/dev/null
+ then echo "#warning: diff3 -E does not work, " \
+ "so merge and rcsmerge ignore overlaps and suppress overlap lines."
+ else
+ $diff a.1 a.c || { echo "#rcsmerge failed (status 0)"; exit 1; }
+ echo "#warning: The diff3 lib program exit status ignores overlaps," \
+ "so rcsmerge does not warn about overlap lines that it generates."
+ fi
+ ;;
+1)
+ $diff a.1 a.c || { echo "#rcsmerge failed (status 1)"; exit 1; }
+ ;;
+*)
+ echo "#rcsmerge bad status"; exit 1
+esac
+
+nl='
+'
+{
+ co -p $q a.c | tr "$nl" '\200' >a.24 &&
+ cp a.24 a.c &&
+ ciOut=`(ci -l -mm $q a.c 2>&1)` &&
+ case $ciOut in
+ ?*) echo >&2 "$ciOut"
+ esac &&
+ co -p $q a.c | tr '\200' "$nl" >a.c &&
+ rcsdiff -r2.3 $q a.c >/dev/null &&
+
+ echo 2.5 >a.c &&
+ ci -l -mm $q a.c &&
+ cp a.24 a.c &&
+ rcsdiff -r2.4 $q a.c >/dev/null
+} || echo "#warning: Traditional diff is used, so RCS is limited to text files."
+
+rcs -u -o2.4: $q a.c || { echo "#rcs -u -o failed"; exit 1; }
+
+rcs -i -Aa.c -t- $q a.d || { echo "#rcs -i -A failed"; exit 1; }
+
+rlog -r2.1 a.c >a.t &&
+grep '^checked in with -k' a.t >/dev/null &&
+sed '/^checked in with -k/d' a.t >a.u &&
+$diff - a.u <<EOF
+
+RCS file: $RCSfile
+Working file: a.c
+head: 2.3
+branch:
+locks: strict
+access list:
+symbolic names:
+ N: 2.1
+ n: 1.8
+comment leader: ":::"
+keyword substitution: kv
+total revisions: 13; selected revisions: 1
+description:
+1.1
+----------------------------
+revision 2.1
+date: $D $T; author: w; state: s; lines: +13 -1
+=============================================================================
+EOF
+test $? = 0 || { echo "#rlog failed"; exit 1; }
+
+
+test ! -f $lockfile || { echo "#lock file not removed"; exit 1; }
+
+exec rm -f a.* $RCSfile $RCS_alt
diff --git a/gnu/usr.bin/rcs/rlog/Makefile b/gnu/usr.bin/rcs/rlog/Makefile
new file mode 100644
index 000000000000..b6a126865ad7
--- /dev/null
+++ b/gnu/usr.bin/rcs/rlog/Makefile
@@ -0,0 +1,7 @@
+PROG= rlog
+
+SRCS= rlog.c
+LDADD= -L${.CURDIR}/../lib/obj -lrcs
+CFLAGS+= -I${.CURDIR}/../lib
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/rcs/rlog/rlog.1 b/gnu/usr.bin/rcs/rlog/rlog.1
new file mode 100644
index 000000000000..fa627ffdc172
--- /dev/null
+++ b/gnu/usr.bin/rcs/rlog/rlog.1
@@ -0,0 +1,260 @@
+.de Id
+.ds Rv \\$3
+.ds Dt \\$4
+..
+.Id $Id: rlog.1,v 5.3 1991/08/22 06:50:48 eggert Exp $
+.ds g \&\s-1UTC\s0
+.ds r \&\s-1RCS\s0
+.if n .ds - \%--
+.if t .ds - \(em
+.TH RLOG 1 \*(Dt GNU
+.SH NAME
+rlog \- print log messages and other information about RCS files
+.SH SYNOPSIS
+.B rlog
+.RI [ " options " ] " file " .\|.\|.
+.SH DESCRIPTION
+.B rlog
+prints information about \*r files.
+.PP
+Pathnames matching an \*r suffix denote \*r files;
+all others denote working files.
+Names are paired as explained in
+.BR ci (1).
+.PP
+.B rlog
+prints the following information for each
+\*r file: \*r pathname, working pathname, head (i.e., the number
+of the latest revision on the trunk), default branch, access list, locks,
+symbolic names, suffix, total number of revisions,
+number of revisions selected for printing, and
+descriptive text. This is followed by entries for the selected revisions in
+reverse chronological order for each branch. For each revision,
+.B rlog
+prints revision number, author, date/time, state, number of
+lines added/deleted (with respect to the previous revision),
+locker of the revision (if any), and log message.
+All times are displayed in Coordinated Universal Time (\*g).
+Without options,
+.B rlog
+prints complete information.
+The options below restrict this output.
+.nr n \w'\f3\-V\fP\f2n\fP '+1n-1/1n
+.TP \nn
+.B \-L
+Ignore \*r files that have no locks set.
+This is convenient in combination with
+.BR \-h ,
+.BR \-l ,
+and
+.BR \-R .
+.TP
+.B \-R
+Print only the name of the \*r file.
+This is convenient for translating a
+working pathname into an \*r pathname.
+.TP
+.B \-h
+Print only the \*r pathname, working pathname, head,
+default branch, access list, locks,
+symbolic names, and suffix.
+.TP
+.B \-t
+Print the same as
+.BR \-h ,
+plus the descriptive text.
+.TP
+.B \-b
+Print information about the revisions on the default branch, normally
+the highest branch on the trunk.
+.TP
+.BI \-d "dates"
+Print information about revisions with a checkin date/time in the ranges given by
+the semicolon-separated list of
+.IR dates .
+A range of the form
+.IB d1 < d2
+or
+.IB d2 > d1
+selects the revisions that were deposited between
+.I d1
+and
+.I d2
+inclusive.
+A range of the form
+.BI < d
+or
+.IB d >
+selects
+all revisions dated
+.I d
+or earlier.
+A range of the form
+.IB d <
+or
+.BI > d
+selects
+all revisions dated
+.I d
+or later.
+A range of the form
+.I d
+selects the single, latest revision dated
+.I d
+or earlier.
+The date/time strings
+.IR d ,
+.IR d1 ,
+and
+.I d2
+are in the free format explained in
+.BR co (1).
+Quoting is normally necessary, especially for
+.B <
+and
+.BR > .
+Note that the separator is
+a semicolon.
+.TP
+.BR \-l [\f2lockers\fP]
+Print information about locked revisions only.
+In addition, if the comma-separated list
+.I lockers
+of login names is given,
+ignore all locks other than those held by the
+.IR lockers .
+For example,
+.B "rlog\ \-L\ \-R\ \-lwft\ RCS/*"
+prints the name of \*r files locked by the user
+.BR wft .
+.TP
+.BR \-r [\f2revisions\fP]
+prints information about revisions given in the comma-separated list
+.I revisions
+of revisions and ranges.
+A range
+.IB rev1 : rev2
+means revisions
+.I rev1
+to
+.I rev2
+on the same branch,
+.BI : rev
+means revisions from the beginning of the branch up to and including
+.IR rev ,
+and
+.IB rev :
+means revisions starting with
+.I rev
+to the end of the branch containing
+.IR rev .
+An argument that is a branch means all
+revisions on that branch.
+A range of branches means all revisions
+on the branches in that range.
+A branch followed by a
+.B .\&
+means the latest revision in that branch.
+A bare
+.B \-r
+with no
+.I revisions
+means the latest revision on the default branch, normally the trunk.
+.TP
+.BI \-s states
+prints information about revisions whose state attributes match one of the
+states given in the comma-separated list
+.IR states .
+.TP
+.BR \-w [\f2logins\fP]
+prints information about revisions checked in by users with
+login names appearing in the comma-separated list
+.IR logins .
+If
+.I logins
+is omitted, the user's login is assumed.
+.TP
+.BI \-V n
+Emulate \*r version
+.I n
+when generating logs.
+See
+.BR co (1)
+for more.
+.TP
+.BI \-x "suffixes"
+Use
+.I suffixes
+to characterize \*r files.
+See
+.BR ci (1)
+for details.
+.PP
+.B rlog
+prints the intersection of the revisions selected with
+the options
+.BR \-d ,
+.BR \-l ,
+.BR \-s ,
+and
+.BR \-w ,
+intersected
+with the union of the revisions selected by
+.B \-b
+and
+.BR \-r .
+.SH EXAMPLES
+.LP
+.nf
+.B " rlog \-L \-R RCS/*"
+.B " rlog \-L \-h RCS/*"
+.B " rlog \-L \-l RCS/*"
+.B " rlog RCS/*"
+.fi
+.LP
+The first command prints the names of all \*r files in the subdirectory
+.B RCS
+that have locks. The second command prints the headers of those files,
+and the third prints the headers plus the log messages of the locked revisions.
+The last command prints complete information.
+.SH ENVIRONMENT
+.TP
+.B \s-1RCSINIT\s0
+options prepended to the argument list, separated by spaces.
+See
+.BR ci (1)
+for details.
+.SH DIAGNOSTICS
+The exit status is zero if and only if all operations were successful.
+.SH IDENTIFICATION
+Author: Walter F. Tichy.
+.br
+Revision Number: \*(Rv; Release Date: \*(Dt.
+.br
+Copyright \(co 1982, 1988, 1989 by Walter F. Tichy.
+.br
+Copyright \(co 1990, 1991 by Paul Eggert.
+.SH "SEE ALSO"
+ci(1), co(1), ident(1), rcs(1), rcsdiff(1), rcsintro(1), rcsmerge(1),
+rcsfile(5)
+.br
+Walter F. Tichy,
+\*r\*-A System for Version Control,
+.I "Software\*-Practice & Experience"
+.BR 15 ,
+7 (July 1985), 637-654.
+.SH BUGS
+The separator for revision ranges in the
+.B \-r
+option used to be
+.B \-
+instead of
+.BR : ,
+but this leads to confusion when symbolic names contain
+.BR \- .
+For backwards compatibility
+.B "rlog \-r"
+still supports the old
+.B \-
+separator, but it warns about this obsolete use.
+.br
diff --git a/gnu/usr.bin/rcs/rlog/rlog.c b/gnu/usr.bin/rcs/rlog/rlog.c
new file mode 100644
index 000000000000..b18b0c97ceb0
--- /dev/null
+++ b/gnu/usr.bin/rcs/rlog/rlog.c
@@ -0,0 +1,1204 @@
+/*
+ * RLOG operation
+ */
+/*****************************************************************************
+ * print contents of RCS files
+ *****************************************************************************
+ */
+
+/* Copyright (C) 1982, 1988, 1989 Walter Tichy
+ Copyright 1990, 1991 by Paul Eggert
+ Distributed under license by the Free Software Foundation, Inc.
+
+This file is part of RCS.
+
+RCS is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+RCS is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RCS; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Report problems and direct all questions to:
+
+ rcs-bugs@cs.purdue.edu
+
+*/
+
+
+
+
+/* $Log: rlog.c,v $
+ * Revision 5.9 1991/09/17 19:07:40 eggert
+ * Getscript() didn't uncache partial lines.
+ *
+ * Revision 5.8 1991/08/19 03:13:55 eggert
+ * Revision separator is `:', not `-'.
+ * Check for missing and duplicate logs. Tune.
+ * Permit log messages that do not end in newline (including empty logs).
+ *
+ * Revision 5.7 1991/04/21 11:58:31 eggert
+ * Add -x, RCSINIT, MS-DOS support.
+ *
+ * Revision 5.6 1991/02/26 17:07:17 eggert
+ * Survive RCS files with missing logs.
+ * strsave -> str_save (DG/UX name clash)
+ *
+ * Revision 5.5 1990/11/01 05:03:55 eggert
+ * Permit arbitrary data in logs and comment leaders.
+ *
+ * Revision 5.4 1990/10/04 06:30:22 eggert
+ * Accumulate exit status across files.
+ *
+ * Revision 5.3 1990/09/11 02:41:16 eggert
+ * Plug memory leak.
+ *
+ * Revision 5.2 1990/09/04 08:02:33 eggert
+ * Count RCS lines better.
+ *
+ * Revision 5.0 1990/08/22 08:13:48 eggert
+ * Remove compile-time limits; use malloc instead. Add setuid support.
+ * Switch to GMT.
+ * Report dates in long form, to warn about dates past 1999/12/31.
+ * Change "added/del" message to make room for the longer dates.
+ * Don't generate trailing white space. Add -V. Ansify and Posixate.
+ *
+ * Revision 4.7 89/05/01 15:13:48 narten
+ * changed copyright header to reflect current distribution rules
+ *
+ * Revision 4.6 88/08/09 19:13:28 eggert
+ * Check for memory exhaustion; don't access freed storage.
+ * Shrink stdio code size; remove lint.
+ *
+ * Revision 4.5 87/12/18 11:46:38 narten
+ * more lint cleanups (Guy Harris)
+ *
+ * Revision 4.4 87/10/18 10:41:12 narten
+ * Updating version numbers
+ * Changes relative to 1.1 actually relative to 4.2
+ *
+ * Revision 1.3 87/09/24 14:01:10 narten
+ * Sources now pass through lint (if you ignore printf/sprintf/fprintf
+ * warnings)
+ *
+ * Revision 1.2 87/03/27 14:22:45 jenkins
+ * Port to suns
+ *
+ * Revision 4.2 83/12/05 09:18:09 wft
+ * changed rewriteflag to external.
+ *
+ * Revision 4.1 83/05/11 16:16:55 wft
+ * Added -b, updated getnumericrev() accordingly.
+ * Replaced getpwuid() with getcaller().
+ *
+ * Revision 3.7 83/05/11 14:24:13 wft
+ * Added options -L and -R;
+ * Fixed selection bug with -l on multiple files.
+ * Fixed error on dates of the form -d'>date' (rewrote getdatepair()).
+ *
+ * Revision 3.6 82/12/24 15:57:53 wft
+ * shortened output format.
+ *
+ * Revision 3.5 82/12/08 21:45:26 wft
+ * removed call to checkaccesslist(); used DATEFORM to format all dates;
+ * removed unused variables.
+ *
+ * Revision 3.4 82/12/04 13:26:25 wft
+ * Replaced getdelta() with gettree(); removed updating of field lockedby.
+ *
+ * Revision 3.3 82/12/03 14:08:20 wft
+ * Replaced getlogin with getpwuid(), %02d with %.2d, fancydate with PRINTDATE.
+ * Fixed printing of nil, removed printing of Suffix,
+ * added shortcut if no revisions are printed, disambiguated struct members.
+ *
+ * Revision 3.2 82/10/18 21:09:06 wft
+ * call to curdir replaced with getfullRCSname(),
+ * fixed call to getlogin(), cosmetic changes on output,
+ * changed conflicting long identifiers.
+ *
+ * Revision 3.1 82/10/13 16:07:56 wft
+ * fixed type of variables receiving from getc() (char -> int).
+ */
+
+
+
+#include "rcsbase.h"
+
+struct lockers { /* lockers in locker option; stored */
+ char const * login; /* lockerlist */
+ struct lockers * lockerlink;
+ } ;
+
+struct stateattri { /* states in state option; stored in */
+ char const * status; /* statelist */
+ struct stateattri * nextstate;
+ } ;
+
+struct authors { /* login names in author option; */
+ char const * login; /* stored in authorlist */
+ struct authors * nextauthor;
+ } ;
+
+struct Revpairs{ /* revision or branch range in -r */
+ unsigned numfld; /* option; stored in revlist */
+ char const * strtrev;
+ char const * endrev;
+ struct Revpairs * rnext;
+ } ;
+
+struct Datepairs{ /* date range in -d option; stored in */
+ char strtdate[datesize]; /* duelst and datelist */
+ char enddate[datesize];
+ struct Datepairs * dnext;
+ };
+
+static char extractdelta P((struct hshentry const*));
+static int checkrevpair P((char const*,char const*));
+static struct hshentry const *readdeltalog P((void));
+static unsigned extdate P((struct hshentry*));
+static void cleanup P((void));
+static void exttree P((struct hshentry*));
+static void getauthor P((char*));
+static void getdatepair P((char*));
+static void getlocker P((char*));
+static void getnumericrev P((void));
+static void getrevpairs P((char*));
+static void getscript P((struct hshentry*));
+static void getstate P((char*));
+static void putabranch P((struct hshentry const*));
+static void putadelta P((struct hshentry const*,struct hshentry const*,int));
+static void putforest P((struct branchhead const*));
+static void putree P((struct hshentry const*));
+static void putrunk P((void));
+static void recentdate P((struct hshentry const*,struct Datepairs*));
+static void trunclocks P((void));
+
+static char const *insDelFormat;
+static int branchflag; /*set on -b */
+static int exitstatus;
+static int lockflag;
+static struct Datepairs *datelist, *duelst;
+static struct Revpairs *revlist, *Revlst;
+static struct authors *authorlist;
+static struct lockers *lockerlist;
+static struct stateattri *statelist;
+
+
+mainProg(rlogId, "rlog", "$Id: rlog.c,v 5.9 1991/09/17 19:07:40 eggert Exp $")
+{
+ static char const cmdusage[] =
+ "\nrlog usage: rlog -{bhLRt} -ddates -l[lockers] -rrevs -sstates -w[logins] -Vn file ...";
+
+ register FILE *out;
+ char *a, **newargv;
+ struct Datepairs *currdate;
+ char const *accessListString, *accessFormat, *commentFormat;
+ char const *headFormat, *symbolFormat;
+ struct access const *curaccess;
+ struct assoc const *curassoc;
+ struct hshentry const *delta;
+ struct lock const *currlock;
+ int descflag, selectflag;
+ int onlylockflag; /* print only files with locks */
+ int onlyRCSflag; /* print only RCS file name */
+ unsigned revno;
+
+ descflag = selectflag = true;
+ onlylockflag = onlyRCSflag = false;
+ out = stdout;
+ suffixes = X_DEFAULT;
+
+ argc = getRCSINIT(argc, argv, &newargv);
+ argv = newargv;
+ while (a = *++argv, 0<--argc && *a++=='-') {
+ switch (*a++) {
+
+ case 'L':
+ onlylockflag = true;
+ break;
+
+ case 'R':
+ onlyRCSflag =true;
+ break;
+
+ case 'l':
+ lockflag = true;
+ getlocker(a);
+ break;
+
+ case 'b':
+ branchflag = true;
+ break;
+
+ case 'r':
+ getrevpairs(a);
+ break;
+
+ case 'd':
+ getdatepair(a);
+ break;
+
+ case 's':
+ getstate(a);
+ break;
+
+ case 'w':
+ getauthor(a);
+ break;
+
+ case 'h':
+ descflag = false;
+ break;
+
+ case 't':
+ selectflag = false;
+ break;
+
+ case 'q':
+ /* This has no effect; it's here for consistency. */
+ quietflag = true;
+ break;
+
+ case 'x':
+ suffixes = a;
+ break;
+
+ case 'V':
+ setRCSversion(*argv);
+ break;
+
+ default:
+ faterror("unknown option: %s%s", *argv, cmdusage);
+
+ };
+ } /* end of option processing */
+
+ if (argc<1) faterror("no input file%s", cmdusage);
+
+ if (! (descflag|selectflag)) {
+ warn("-t overrides -h.");
+ descflag = true;
+ }
+
+ if (RCSversion < VERSION(5)) {
+ accessListString = "\naccess list: ";
+ accessFormat = " %s";
+ commentFormat = "\ncomment leader: \"";
+ headFormat = "\nRCS file: %s; Working file: %s\nhead: %s%s\nbranch: %s%s\nlocks: ";
+ insDelFormat = " lines added/del: %lu/%lu";
+ symbolFormat = " %s: %s;";
+ } else {
+ accessListString = "\naccess list:";
+ accessFormat = "\n\t%s";
+ commentFormat = "\ncomment leader: \"";
+ headFormat = "\nRCS file: %s\nWorking file: %s\nhead:%s%s\nbranch:%s%s\nlocks:%s";
+ insDelFormat = " lines: +%lu -%lu";
+ symbolFormat = "\n\t%s: %s";
+ }
+
+ /* now handle all filenames */
+ do {
+ ffree();
+
+ if (pairfilenames(argc, argv, rcsreadopen, true, false) <= 0)
+ continue;
+
+ /* now RCSfilename contains the name of the RCS file, and finptr
+ * the file descriptor. Workfilename contains the name of the
+ * working file.
+ */
+
+ /* Keep only those locks given by -l. */
+ if (lockflag)
+ trunclocks();
+
+ /* do nothing if -L is given and there are no locks*/
+ if (onlylockflag && !Locks)
+ continue;
+
+ if ( onlyRCSflag ) {
+ aprintf(out, "%s\n", RCSfilename);
+ continue;
+ }
+ /* print RCS filename , working filename and optional
+ administrative information */
+ /* could use getfullRCSname() here, but that is very slow */
+ aprintf(out, headFormat, RCSfilename, workfilename,
+ Head ? " " : "", Head ? Head->num : "",
+ Dbranch ? " " : "", Dbranch ? Dbranch : "",
+ StrictLocks ? " strict" : ""
+ );
+ currlock = Locks;
+ while( currlock ) {
+ aprintf(out, symbolFormat, currlock->login,
+ currlock->delta->num);
+ currlock = currlock->nextlock;
+ }
+ if (StrictLocks && RCSversion<VERSION(5))
+ aputs(" strict", out);
+
+ aputs(accessListString, out); /* print access list */
+ curaccess = AccessList;
+ while(curaccess) {
+ aprintf(out, accessFormat, curaccess->login);
+ curaccess = curaccess->nextaccess;
+ }
+
+ aputs("\nsymbolic names:", out); /* print symbolic names */
+ for (curassoc=Symbols; curassoc; curassoc=curassoc->nextassoc)
+ aprintf(out, symbolFormat, curassoc->symbol, curassoc->num);
+ aputs(commentFormat, out);
+ awrite(Comment.string, Comment.size, out);
+ aputs("\"\n", out);
+ if (VERSION(5)<=RCSversion || Expand != KEYVAL_EXPAND)
+ aprintf(out, "keyword substitution: %s\n",
+ expand_names[Expand]
+ );
+
+ gettree();
+
+ aprintf(out, "total revisions: %u", TotalDeltas);
+
+ revno = 0;
+
+ if (Head && selectflag & descflag) {
+
+ getnumericrev(); /* get numeric revision or branch names */
+
+ exttree(Head);
+
+ /* get most recently date of the dates pointed by duelst */
+ currdate = duelst;
+ while( currdate) {
+ VOID sprintf(currdate->strtdate,DATEFORM,0,0,0,0,0,0);
+ recentdate(Head, currdate);
+ currdate = currdate->dnext;
+ }
+
+ revno = extdate(Head);
+
+ aprintf(out, ";\tselected revisions: %u", revno);
+ }
+
+ afputc('\n',out);
+ if (descflag) {
+ aputs("description:\n", out);
+ getdesc(true);
+ }
+ if (revno) {
+ while (! (delta = readdeltalog())->selector || --revno)
+ ;
+ if (delta->next && countnumflds(delta->num)==2)
+ /* Read through delta->next to get its insertlns. */
+ while (readdeltalog() != delta->next)
+ ;
+ putrunk();
+ putree(Head);
+ }
+ aputs("=============================================================================\n",out);
+ } while (cleanup(),
+ ++argv, --argc >= 1);
+ Ofclose(out);
+ exitmain(exitstatus);
+}
+
+ static void
+cleanup()
+{
+ if (nerror) exitstatus = EXIT_FAILURE;
+ Izclose(&finptr);
+}
+
+#if lint
+# define exiterr rlogExit
+#endif
+ exiting void
+exiterr()
+{
+ _exit(EXIT_FAILURE);
+}
+
+
+
+ static void
+putrunk()
+/* function: print revisions chosen, which are in trunk */
+
+{
+ register struct hshentry const *ptr;
+
+ for (ptr = Head; ptr; ptr = ptr->next)
+ putadelta(ptr, ptr->next, true);
+}
+
+
+
+ static void
+putree(root)
+ struct hshentry const *root;
+/* function: print delta tree (not including trunk) in reverse
+ order on each branch */
+
+{
+ if ( root == nil ) return;
+
+ putree(root->next);
+
+ putforest(root->branches);
+}
+
+
+
+
+ static void
+putforest(branchroot)
+ struct branchhead const *branchroot;
+/* function: print branches that has the same direct ancestor */
+{
+
+ if ( branchroot == nil ) return;
+
+ putforest(branchroot->nextbranch);
+
+ putabranch(branchroot->hsh);
+ putree(branchroot->hsh);
+}
+
+
+
+
+ static void
+putabranch(root)
+ struct hshentry const *root;
+/* function : print one branch */
+
+{
+
+ if ( root == nil) return;
+
+ putabranch(root->next);
+
+ putadelta(root, root, false);
+}
+
+
+
+
+
+ static void
+putadelta(node,editscript,trunk)
+ register struct hshentry const *node, *editscript;
+ int trunk;
+/* function: Print delta node if node->selector is set. */
+/* editscript indicates where the editscript is stored */
+/* trunk indicated whether this node is in trunk */
+{
+ static char emptych[] = EMPTYLOG;
+
+ register FILE *out;
+ char const *s;
+ size_t n;
+ struct branchhead const *newbranch;
+ struct buf branchnum;
+ char datebuf[datesize];
+
+ if (!node->selector)
+ return;
+
+ out = stdout;
+ aprintf(out,
+ "----------------------------\nrevision %s", node->num
+ );
+ if ( node->lockedby )
+ aprintf(out, "\tlocked by: %s;", node->lockedby);
+
+ aprintf(out, "\ndate: %s; author: %s; state: %s;",
+ date2str(node->date, datebuf),
+ node->author, node->state
+ );
+
+ if ( editscript )
+ if(trunk)
+ aprintf(out, insDelFormat,
+ editscript->deletelns, editscript->insertlns);
+ else
+ aprintf(out, insDelFormat,
+ editscript->insertlns, editscript->deletelns);
+
+ newbranch = node->branches;
+ if ( newbranch ) {
+ bufautobegin(&branchnum);
+ aputs("\nbranches:", out);
+ while( newbranch ) {
+ getbranchno(newbranch->hsh->num, &branchnum);
+ aprintf(out, " %s;", branchnum.string);
+ newbranch = newbranch->nextbranch;
+ }
+ bufautoend(&branchnum);
+ }
+
+ afputc('\n', out);
+ s = node->log.string;
+ if (!(n = node->log.size)) {
+ s = emptych;
+ n = sizeof(emptych)-1;
+ }
+ awrite(s, n, out);
+ if (s[n-1] != '\n')
+ afputc('\n', out);
+}
+
+
+
+
+
+ static struct hshentry const *
+readdeltalog()
+/* Function : get the log message and skip the text of a deltatext node.
+ * Return the delta found.
+ * Assumes the current lexeme is not yet in nexttok; does not
+ * advance nexttok.
+ */
+{
+ register struct hshentry * Delta;
+ struct buf logbuf;
+ struct cbuf cb;
+
+ if (eoflex())
+ fatserror("missing delta log");
+ nextlex();
+ if (!(Delta = getnum()))
+ fatserror("delta number corrupted");
+ getkeystring(Klog);
+ if (Delta->log.string)
+ fatserror("duplicate delta log");
+ bufautobegin(&logbuf);
+ cb = savestring(&logbuf);
+ Delta->log = bufremember(&logbuf, cb.size);
+
+ nextlex();
+ while (nexttok==ID && strcmp(NextString,Ktext)!=0)
+ ignorephrase();
+ getkeystring(Ktext);
+ Delta->insertlns = Delta->deletelns = 0;
+ if ( Delta != Head)
+ getscript(Delta);
+ else
+ readstring();
+ return Delta;
+}
+
+
+ static void
+getscript(Delta)
+struct hshentry * Delta;
+/* function: read edit script of Delta and count how many lines added */
+/* and deleted in the script */
+
+{
+ int ed; /* editor command */
+ declarecache;
+ register RILE *fin;
+ register int c;
+ register unsigned long i;
+ struct diffcmd dc;
+
+ fin = finptr;
+ setupcache(fin);
+ initdiffcmd(&dc);
+ while (0 <= (ed = getdiffcmd(fin,true,(FILE *)0,&dc)))
+ if (!ed)
+ Delta->deletelns += dc.nlines;
+ else {
+ /* skip scripted lines */
+ i = dc.nlines;
+ Delta->insertlns += i;
+ cache(fin);
+ do {
+ for (;;) {
+ cacheget(c);
+ switch (c) {
+ default:
+ continue;
+ case SDELIM:
+ cacheget(c);
+ if (c == SDELIM)
+ continue;
+ if (--i)
+ fatserror("unexpected end to edit script");
+ nextc = c;
+ uncache(fin);
+ return;
+ case '\n':
+ break;
+ }
+ break;
+ }
+ ++rcsline;
+ } while (--i);
+ uncache(fin);
+ }
+}
+
+
+
+
+
+
+
+ static void
+exttree(root)
+struct hshentry *root;
+/* function: select revisions , starting with root */
+
+{
+ struct branchhead const *newbranch;
+
+ if (root == nil) return;
+
+ root->selector = extractdelta(root);
+ root->log.string = nil;
+ exttree(root->next);
+
+ newbranch = root->branches;
+ while( newbranch ) {
+ exttree(newbranch->hsh);
+ newbranch = newbranch->nextbranch;
+ }
+}
+
+
+
+
+ static void
+getlocker(argv)
+char * argv;
+/* function : get the login names of lockers from command line */
+/* and store in lockerlist. */
+
+{
+ register char c;
+ struct lockers * newlocker;
+ argv--;
+ while( ( c = (*++argv)) == ',' || c == ' ' || c == '\t' ||
+ c == '\n' || c == ';') ;
+ if ( c == '\0') {
+ lockerlist=nil;
+ return;
+ }
+
+ while( c != '\0' ) {
+ newlocker = talloc(struct lockers);
+ newlocker->lockerlink = lockerlist;
+ newlocker->login = argv;
+ lockerlist = newlocker;
+ while ( ( c = (*++argv)) != ',' && c != '\0' && c != ' '
+ && c != '\t' && c != '\n' && c != ';') ;
+ *argv = '\0';
+ if ( c == '\0' ) return;
+ while( ( c = (*++argv)) == ',' || c == ' ' || c == '\t' ||
+ c == '\n' || c == ';') ;
+ }
+}
+
+
+
+ static void
+getauthor(argv)
+char *argv;
+/* function: get the author's name from command line */
+/* and store in authorlist */
+
+{
+ register c;
+ struct authors * newauthor;
+
+ argv--;
+ while( ( c = (*++argv)) == ',' || c == ' ' || c == '\t' ||
+ c == '\n' || c == ';') ;
+ if ( c == '\0' ) {
+ authorlist = talloc(struct authors);
+ authorlist->login = getusername(false);
+ authorlist->nextauthor = nil;
+ return;
+ }
+
+ while( c != '\0' ) {
+ newauthor = talloc(struct authors);
+ newauthor->nextauthor = authorlist;
+ newauthor->login = argv;
+ authorlist = newauthor;
+ while( ( c = *++argv) != ',' && c != '\0' && c != ' '
+ && c != '\t' && c != '\n' && c != ';') ;
+ * argv = '\0';
+ if ( c == '\0') return;
+ while( ( c = (*++argv)) == ',' || c == ' ' || c == '\t' ||
+ c == '\n' || c == ';') ;
+ }
+}
+
+
+
+
+ static void
+getstate(argv)
+char * argv;
+/* function : get the states of revisions from command line */
+/* and store in statelist */
+
+{
+ register char c;
+ struct stateattri *newstate;
+
+ argv--;
+ while( ( c = (*++argv)) == ',' || c == ' ' || c == '\t' ||
+ c == '\n' || c == ';') ;
+ if ( c == '\0'){
+ warn("missing state attributes after -s options");
+ return;
+ }
+
+ while( c != '\0' ) {
+ newstate = talloc(struct stateattri);
+ newstate->nextstate = statelist;
+ newstate->status = argv;
+ statelist = newstate;
+ while( (c = (*++argv)) != ',' && c != '\0' && c != ' '
+ && c != '\t' && c != '\n' && c != ';') ;
+ *argv = '\0';
+ if ( c == '\0' ) return;
+ while( ( c = (*++argv)) == ',' || c == ' ' || c == '\t' ||
+ c == '\n' || c == ';') ;
+ }
+}
+
+
+
+ static void
+trunclocks()
+/* Function: Truncate the list of locks to those that are held by the */
+/* id's on lockerlist. Do not truncate if lockerlist empty. */
+
+{
+ struct lockers const *plocker;
+ struct lock * plocked, * nextlocked;
+
+ if ( (lockerlist == nil) || (Locks == nil)) return;
+
+ /* shorten Locks to those contained in lockerlist */
+ plocked = Locks;
+ Locks = nil;
+ while( plocked != nil) {
+ plocker = lockerlist;
+ while((plocker != nil) && ( strcmp(plocker->login, plocked->login)!=0))
+ plocker = plocker->lockerlink;
+ nextlocked = plocked->nextlock;
+ if ( plocker != nil) {
+ plocked->nextlock = Locks;
+ Locks = plocked;
+ }
+ plocked = nextlocked;
+ }
+}
+
+
+
+ static void
+recentdate(root, pd)
+ struct hshentry const *root;
+ struct Datepairs *pd;
+/* function: Finds the delta that is closest to the cutoff date given by */
+/* pd among the revisions selected by exttree. */
+/* Successively narrows down the interval given by pd, */
+/* and sets the strtdate of pd to the date of the selected delta */
+{
+ struct branchhead const *newbranch;
+
+ if ( root == nil) return;
+ if (root->selector) {
+ if ( cmpnum(root->date, pd->strtdate) >= 0 &&
+ cmpnum(root->date, pd->enddate) <= 0)
+ VOID strcpy(pd->strtdate, root->date);
+ }
+
+ recentdate(root->next, pd);
+ newbranch = root->branches;
+ while( newbranch) {
+ recentdate(newbranch->hsh, pd);
+ newbranch = newbranch->nextbranch;
+ }
+}
+
+
+
+
+
+
+ static unsigned
+extdate(root)
+struct hshentry * root;
+/* function: select revisions which are in the date range specified */
+/* in duelst and datelist, start at root */
+/* Yield number of revisions selected, including those already selected. */
+{
+ struct branchhead const *newbranch;
+ struct Datepairs const *pdate;
+ unsigned revno;
+
+ if (!root)
+ return 0;
+
+ if ( datelist || duelst) {
+ pdate = datelist;
+ while( pdate ) {
+ if ( (pdate->strtdate)[0] == '\0' || cmpnum(root->date,pdate->strtdate) >= 0){
+ if ((pdate->enddate)[0] == '\0' || cmpnum(pdate->enddate,root->date) >= 0)
+ break;
+ }
+ pdate = pdate->dnext;
+ }
+ if ( pdate == nil) {
+ pdate = duelst;
+ for (;;) {
+ if (!pdate) {
+ root->selector = false;
+ break;
+ }
+ if ( cmpnum(root->date, pdate->strtdate) == 0)
+ break;
+ pdate = pdate->dnext;
+ }
+ }
+ }
+ revno = root->selector + extdate(root->next);
+
+ newbranch = root->branches;
+ while( newbranch ) {
+ revno += extdate(newbranch->hsh);
+ newbranch = newbranch->nextbranch;
+ }
+ return revno;
+}
+
+
+
+ static char
+extractdelta(pdelta)
+ struct hshentry const *pdelta;
+/* function: compare information of pdelta to the authorlist, lockerlist,*/
+/* statelist, revlist and yield true if pdelta is selected. */
+
+{
+ struct lock const *plock;
+ struct stateattri const *pstate;
+ struct authors const *pauthor;
+ struct Revpairs const *prevision;
+ unsigned length;
+
+ if ((pauthor = authorlist)) /* only certain authors wanted */
+ while (strcmp(pauthor->login, pdelta->author) != 0)
+ if (!(pauthor = pauthor->nextauthor))
+ return false;
+ if ((pstate = statelist)) /* only certain states wanted */
+ while (strcmp(pstate->status, pdelta->state) != 0)
+ if (!(pstate = pstate->nextstate))
+ return false;
+ if (lockflag) /* only locked revisions wanted */
+ for (plock = Locks; ; plock = plock->nextlock)
+ if (!plock)
+ return false;
+ else if (plock->delta == pdelta)
+ break;
+ if ((prevision = Revlst)) /* only certain revs or branches wanted */
+ for (;;) {
+ length = prevision->numfld;
+ if (
+ countnumflds(pdelta->num) == length+(length&1) &&
+ 0 <= compartial(pdelta->num, prevision->strtrev, length) &&
+ 0 <= compartial(prevision->endrev, pdelta->num, length)
+ )
+ break;
+ if (!(prevision = prevision->rnext))
+ return false;
+ }
+ return true;
+}
+
+
+
+ static void
+getdatepair(argv)
+ char * argv;
+/* function: get time range from command line and store in datelist if */
+/* a time range specified or in duelst if a time spot specified */
+
+{
+ register char c;
+ struct Datepairs * nextdate;
+ char const * rawdate;
+ int switchflag;
+
+ argv--;
+ while( ( c = (*++argv)) == ',' || c == ' ' || c == '\t' ||
+ c == '\n' || c == ';') ;
+ if ( c == '\0' ) {
+ warn("missing date/time after -d");
+ return;
+ }
+
+ while( c != '\0' ) {
+ switchflag = false;
+ nextdate = talloc(struct Datepairs);
+ if ( c == '<' ) { /* case: -d <date */
+ c = *++argv;
+ (nextdate->strtdate)[0] = '\0';
+ } else if (c == '>') { /* case: -d'>date' */
+ c = *++argv;
+ (nextdate->enddate)[0] = '\0';
+ switchflag = true;
+ } else {
+ rawdate = argv;
+ while( c != '<' && c != '>' && c != ';' && c != '\0')
+ c = *++argv;
+ *argv = '\0';
+ if ( c == '>' ) switchflag=true;
+ str2date(rawdate,
+ switchflag ? nextdate->enddate : nextdate->strtdate);
+ if ( c == ';' || c == '\0') { /* case: -d date */
+ VOID strcpy(nextdate->enddate,nextdate->strtdate);
+ nextdate->dnext = duelst;
+ duelst = nextdate;
+ goto end;
+ } else {
+ /* case: -d date< or -d date>; see switchflag */
+ while ( (c= *++argv) == ' ' || c=='\t' || c=='\n');
+ if ( c == ';' || c == '\0') {
+ /* second date missing */
+ if (switchflag)
+ *nextdate->strtdate= '\0';
+ else
+ *nextdate->enddate= '\0';
+ nextdate->dnext = datelist;
+ datelist = nextdate;
+ goto end;
+ }
+ }
+ }
+ rawdate = argv;
+ while( c != '>' && c != '<' && c != ';' && c != '\0')
+ c = *++argv;
+ *argv = '\0';
+ str2date(rawdate,
+ switchflag ? nextdate->strtdate : nextdate->enddate);
+ nextdate->dnext = datelist;
+ datelist = nextdate;
+ end:
+ if ( c == '\0') return;
+ while( (c = *++argv) == ';' || c == ' ' || c == '\t' || c =='\n');
+ }
+}
+
+
+
+ static void
+getnumericrev()
+/* function: get the numeric name of revisions which stored in revlist */
+/* and then stored the numeric names in Revlst */
+/* if branchflag, also add default branch */
+
+{
+ struct Revpairs * ptr, *pt;
+ unsigned n;
+ struct buf s, e;
+ char const *lrev;
+ struct buf const *rstart, *rend;
+
+ Revlst = nil;
+ ptr = revlist;
+ bufautobegin(&s);
+ bufautobegin(&e);
+ while( ptr ) {
+ n = 0;
+ rstart = &s;
+ rend = &e;
+
+ switch (ptr->numfld) {
+
+ case 1: /* -r rev */
+ if (expandsym(ptr->strtrev, &s)) {
+ rend = &s;
+ n = countnumflds(s.string);
+ if (!n && (lrev = tiprev())) {
+ bufscpy(&s, lrev);
+ n = countnumflds(lrev);
+ }
+ }
+ break;
+
+ case 2: /* -r rev- */
+ if (expandsym(ptr->strtrev, &s)) {
+ bufscpy(&e, s.string);
+ n = countnumflds(s.string);
+ (n<2 ? e.string : strrchr(e.string,'.'))[0] = 0;
+ }
+ break;
+
+ case 3: /* -r -rev */
+ if (expandsym(ptr->endrev, &e)) {
+ if ((n = countnumflds(e.string)) < 2)
+ bufscpy(&s, ".1");
+ else {
+ bufscpy(&s, e.string);
+ VOID strcpy(strrchr(s.string,'.'), ".1");
+ }
+ }
+ break;
+
+ default: /* -r rev1-rev2 */
+ if (
+ expandsym(ptr->strtrev, &s)
+ && expandsym(ptr->endrev, &e)
+ && checkrevpair(s.string, e.string)
+ ) {
+ n = countnumflds(s.string);
+ /* Swap if out of order. */
+ if (compartial(s.string,e.string,n) > 0) {
+ rstart = &e;
+ rend = &s;
+ }
+ }
+ break;
+ }
+
+ if (n) {
+ pt = ftalloc(struct Revpairs);
+ pt->numfld = n;
+ pt->strtrev = fstr_save(rstart->string);
+ pt->endrev = fstr_save(rend->string);
+ pt->rnext = Revlst;
+ Revlst = pt;
+ }
+ ptr = ptr->rnext;
+ }
+ /* Now take care of branchflag */
+ if (branchflag && (Dbranch||Head)) {
+ pt = ftalloc(struct Revpairs);
+ pt->strtrev = pt->endrev =
+ Dbranch ? Dbranch : fstr_save(partialno(&s,Head->num,1));
+ pt->rnext=Revlst; Revlst=pt;
+ pt->numfld = countnumflds(pt->strtrev);
+ }
+ bufautoend(&s);
+ bufautoend(&e);
+}
+
+
+
+ static int
+checkrevpair(num1,num2)
+ char const *num1, *num2;
+/* function: check whether num1, num2 are legal pair,i.e.
+ only the last field are different and have same number of
+ fields( if length <= 2, may be different if first field) */
+
+{
+ unsigned length = countnumflds(num1);
+
+ if (
+ countnumflds(num2) != length
+ || 2 < length && compartial(num1, num2, length-1) != 0
+ ) {
+ error("invalid branch or revision pair %s : %s", num1, num2);
+ return false;
+ }
+
+ return true;
+}
+
+
+
+ static void
+getrevpairs(argv)
+register char * argv;
+/* function: get revision or branch range from command line, and */
+/* store in revlist */
+
+{
+ register char c;
+ struct Revpairs * nextrevpair;
+ int separator;
+
+ c = *argv;
+
+ /* Support old ambiguous '-' syntax; this will go away. */
+ if (strchr(argv,':'))
+ separator = ':';
+ else {
+ if (strchr(argv,'-') && VERSION(5) <= RCSversion)
+ warn("`-' is obsolete in `-r%s'; use `:' instead", argv);
+ separator = '-';
+ }
+
+ for (;;) {
+ while (c==' ' || c=='\t' || c=='\n')
+ c = *++argv;
+ nextrevpair = talloc(struct Revpairs);
+ nextrevpair->rnext = revlist;
+ revlist = nextrevpair;
+ nextrevpair->numfld = 1;
+ nextrevpair->strtrev = argv;
+ for (;; c = *++argv) {
+ switch (c) {
+ default:
+ continue;
+ case '\0': case ' ': case '\t': case '\n':
+ case ',': case ';':
+ break;
+ case ':': case '-':
+ if (c == separator)
+ break;
+ continue;
+ }
+ break;
+ }
+ *argv = '\0';
+ while (c==' ' || c=='\t' || c=='\n')
+ c = *++argv;
+ if (c == separator) {
+ while( (c =(*++argv)) == ' ' || c == '\t' || c =='\n') ;
+ nextrevpair->endrev = argv;
+ for (;; c = *++argv) {
+ switch (c) {
+ default:
+ continue;
+ case '\0': case ' ': case '\t': case '\n':
+ case ',': case ';':
+ break;
+ case ':': case '-':
+ if (c == separator)
+ continue;
+ break;
+ }
+ break;
+ }
+ *argv = '\0';
+ while (c==' ' || c=='\t' || c =='\n')
+ c = *++argv;
+ nextrevpair->numfld =
+ !nextrevpair->endrev[0] ? 2 /* -rrev- */ :
+ !nextrevpair->strtrev[0] ? 3 /* -r-rev */ :
+ 4 /* -rrev1-rev2 */;
+ }
+ if (!c)
+ break;
+ if (c!=',' && c!=';')
+ error("missing `,' near `%c%s'", c, argv+1);
+ }
+}
diff --git a/gnu/usr.bin/tar/COPYING b/gnu/usr.bin/tar/COPYING
new file mode 100644
index 000000000000..a43ea2126fb6
--- /dev/null
+++ b/gnu/usr.bin/tar/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/gnu/usr.bin/tar/ChangeLog b/gnu/usr.bin/tar/ChangeLog
new file mode 100644
index 000000000000..7934f2b1b2e3
--- /dev/null
+++ b/gnu/usr.bin/tar/ChangeLog
@@ -0,0 +1,1732 @@
+Thu Mar 25 13:32:40 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * version.c: Released version 1.11.2.
+
+ * Makefile.in (dist): Do the link differently; some of the
+ files have changed filesystems which makes it more complex.
+
+ * Makefile.in (dist, shar): Use gzip instead of compress.
+
+ * create.c (dump_file): Test for curdev==-1, not curdev<0.
+ Some losing NFS systems give negative device numbers
+ sometimes.
+
+Thu Mar 25 11:55:15 1993 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * level-0, level-1 (TAR_PART1): Use `--block-size', not just
+ `--block', which is now ambiguous.
+
+Wed Mar 24 22:12:51 1993 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * backup-specs (TAR): New variable.
+
+ * level-0, level-1 (TAR_PART1): Get path of GNU tar from `TAR'
+ variable, don't hardcode it.
+
+Sat Mar 20 00:20:05 1993 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * backup-specs (SLEEP_MESSAGE): put backslashes in front of nested
+ double quotes.
+
+ * level-0, level-1 (BACKUP_DIRS): Don't put in quotes.
+ (LOGFILE): Use sed to construct name, not awk.
+
+ * dump-remind (recipients): Replaced inefficient pipeline with a
+ single, simple sed script.
+ (volno): Deal with the possibility that VOLNO_FILE may not be
+ created yet.
+
+Fri Mar 19 15:05:15 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * backup-specs (VOLNO_FILE): Removed abusive comment by Noah.
+
+ * buffer.c (new_volume): Write the global volume number to the
+ volno file before running the info script, so that the script
+ can look at it.
+
+Thu Mar 18 20:11:54 1993 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * Makefile.in (AUX): Include `dump-remind' in distribution.
+
+ * backup-specs (SLEEP_MESSAGE): New variable.
+ level-0, level-1: Use it instead of external `dont_touch' file.
+
+ * level-0, level-1: Put most of the script in () and pipe
+ everything from the subshell through tee -a $LOGFILE. Since you
+ really want most of the output to go to the logfile anyway, and
+ since all those pipelines were preventing one from getting the
+ exit status of most commands, this seems like the right idea.
+
+ * level-0, level-1 (LOGFILE): Use YYYY-MM-DD (all numeric) format
+ for log file name, since that makes the file names sortable in a
+ coherent way. Suffix should always be `level-n' where n is the
+ dump level. level-0 script was just using `-full' instead.
+
+ * level-0, level-1 (DUMP_LEVEL): New variable. Set to `0' or `1'
+ in each script as appropriate.
+
+ * level-0, level-1 (HOST): Renamed to `localhost' for clarity.
+ (host): renamed to `remotehost' for clarity.
+
+ * level-0, level-1 (startdate): New variable. Use it in Subject
+ line of mailed report.
+
+ * level-0, level-1: Fixed all instances where sed is called with a
+ script on the command line to use `-e' option.
+
+ * level-0, level-1: Don't try to call logfile.sed to filter
+ LOGFILE. It's not distributed with tar and was never really used
+ anyway.
+
+ * level-0, level-1: Put quotes around most variable names (barring
+ those that are known to intentionally contain text that should be
+ expanded into multiple words, like `TAR_PART1').
+
+ * level-0, level-1: Got rid of annoying trailing backslashes in awk
+ scripts. They were gratuitous. Made them a little more readable
+ by adding some whitespace.
+
+Wed Mar 17 10:30:58 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * tar.c (describe, long_options): Changed --compress-block to
+ --block-compress.
+ (options): Fixed f_compress_block sanity check error message
+ to give the correct name of the option.
+
+Tue Mar 16 14:52:40 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * extract.c (extract_archive): case LF_DIR: Do chown when
+ necessary. Don't bother jumping to set_filestat for
+ f_modified; repeat the chmod code here. Replace `break',
+ deleted on 2 September 1992.
+
+ * tar.c (describe, long_options, options): Added gzip options
+ and use-compress-program option.
+ * tar.h: Added new compression options.
+ * buffer.c (child_open, open_archive): Use new compression options.
+
+ * create.c (start_header): Only mask off high bits when
+ creating old-style archives.
+ * list.c (decode_header): Mask off potentially misleading
+ high bits from the mode when reading headers.
+
+Mon Mar 15 11:34:34 1993 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * extract.c (extract_archive): Put arguments in the right
+ order for error message.
+
+ * create.c (deal_with_sparse): if the last byte was null, we
+ didn't write it out.
+
+ * gnu.c, create.c, extract.c, diffarch.c, list.c throughout:
+ Replace malloc calls with ck_malloc and realloc with ck_realloc.
+
+ * tar.c (describe): Improve doc for -L.
+
+ * tar.c (name_next): Don't apply exclusion to explicitly named
+ files.
+
+ * tar.c (long_options, describe): Added new-volume-script as
+ an alias for info-script.
+
+ * extract.c (extract_archive): LF_DUMPDIR case; misplaced paren.
+
+ * extract.c (extract_archive): extract_file case, first if,
+ include space for null in namelen computation.
+
+ * extract.c (extract_sparse_file): Use value returned by write
+ to properly create error message.
+
+ * create.c (create_archive): Don't assume we have anything to
+ dump.
+
+ * buffer.c (open_archive): Set current_file_name for the
+ volume header so that verbose listings work properly.
+
+ * Makefile.in (realclean): Added getdate.c.
+
+Thu Jan 14 23:38:44 1993 David J. MacKenzie (djm@kropotkin.gnu.ai.mit.edu)
+
+ * tar.c: Include fnmatch.h after port.h to make sure we get our FNM_*
+ (e.g. on HPUX 8).
+
+Tue Nov 24 08:30:54 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * tar.c (addname), gnu.c (read_dir_file): Use HAVE_GETCWD, not USG.
+
+ * port.h, rmt.h: Use HAVE_STRING_H, not USG.
+
+ * port.h: Add dir header decls.
+ * create.c, gnu.c: Use SYSNDIR, SYSDIR, and NDIR
+ instead of BSD42 and USG. Rename DP_NAMELEN to NLENGTH.
+ Use `struct dirent' instead of `struct direct'.
+ * create.c, gnu.c, tar.c: Remove dir header decls.
+
+Wed Nov 18 15:31:30 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * tar.c: Change FNM_TARPATH to FNM_LEADING_DIR to match change
+ in fnmatch.[ch].
+
+Wed Oct 21 00:52:24 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * level-0, level-1: put curly braces around variables for clarity.
+
+ * backup-specs (DUMP_REMIND_SCRIPT): define it (but commented out
+ so that distributed dump scripts won't use it by default).
+ level-0, level-1 (TAR_PART1): use --info-script if
+ DUMP_REMIND_SCRIPT is defined.
+ dump-remind: new file (intended as an example).
+
+Thu Oct 15 03:33:28 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * level-0, level-1: remove $LOGFILE.tmp files before exiting.
+
+Fri Oct 2 00:28:01 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * tar.c (describe): Fix some tab alignments.
+
+ * Makefile.in (SRC3): Add getdate.c, for systems without bison/yacc
+ (like MS-DOS).
+
+ * diffarch.c (diff_sparse_files): Add missing arg to fprintf calls.
+
+ * extract.c (extract_archive, restore_saved_dir_info),
+ buffer.c (child_open), list.c (decode_header, print_header):
+ Delete unused vars.
+
+ * port.c [__MSDOS__]: Have strstr, rename, and mkdir. Don't
+ define ck_pipe.
+
+ * buffer.c, tar.c (init_volume_number, closeout_volume_number),
+ create.c (write_long): Declare as void, not int, since they
+ don't return a value.
+
+Thu Sep 24 00:06:02 1992 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu)
+
+ * level-0, level-1 (TAR_PART1): remove --atime-preserve
+ because of a total screw.
+
+Tue Sep 22 14:15:48 1992 Michael I Bushnell (mib@wookumz.gnu.ai.mit.edu)
+
+ * buffer.c (close_archive): Removed leftover `break' from when
+ this was a switch.
+
+Tue Sep 22 08:33:16 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * create.c, port.h: indented #pragma directives with 1 space.
+
+Fri Sep 18 14:15:17 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * All source files: re indented using GNU indent.
+
+ * rtapelib.c (__rmt_read): Only read the amount left in the
+ buffer; otherwise a broken rmt server (which puts too much
+ data out) could overwrite past our buffer.
+
+Thu Sep 17 14:08:58 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * create.c: Throughout, use struct utimbuf rather than array
+ of longs.
+
+ * configure.in: Check for getpwuid and getgrgid.
+
+ * Makefile.in (SRC3, AUX): Move alloca.c to SRC3.
+ (OBJ3): Add @ALLOCA@.
+
+ * Makefile.in (getdate.c): Look in srcdir for getdate.y.
+
+ * buffer.c (close_archive): We can't check WTERMSIG
+ meaningfully unless we already know tha WIFSIGNALED is true.
+ (There is no guarantee it WTERMSIG will return zero when
+ WIFSIGNALED is false.)
+ * port.c (rmdir, mkdir): Check WIFSIGNALED rather than
+ WTERMSIG.
+
+ * Makefile.in (getdate.c): Use $(YACC) instead of `yacc'.
+
+Tue Sep 15 14:49:48 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * version.c: Released version 1.11.1.
+
+ * Makefile (AUX): Added NEWS.
+
+ * Makefile.in (rmt): Added $(LIBS).
+ * configure.in: Added tests for libraries needed on Solaris.
+
+ * mangle.c (extract_mangle): Null terminate link name for
+ losing archives missing it.
+
+ * Makefile.in: added target and rule for getdate.c: getdate.y;
+ some makes don't have one built in.
+
+Mon Sep 14 16:23:15 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * tar.c (options, main): Advise use of --help rather than
+ +help.
+
+ * create.c (write_long): Using hstat here is a Bad Idea, and
+ totally unnecessary at that.
+
+ * list.c (read_header): Compute both signed and normal
+ checksums.
+
+ * configure.in: Define BSD in the presence of /sdmach or
+ /../../mach.
+
+ * diffarch.c, buffer.c: Declare valloc as void* rather than
+ char*.
+
+ * Makefile.in: Don't install info files.
+
+ * configure.in: Check for malloc was scrambled.
+
+ * port.h: Undefine index and rindex if necessary; some
+ string.h's define them for us.
+
+ * tar.c (addname): Missing braces after if.
+ * gnu.c (read_dir_file): Missing braces after if.
+
+ * names.c: Add include of <stdio.h>,
+
+ * create.c (start_header): Set current_file_name so that
+ print_header (used for verbose create) works properly.
+ (dump_file): Set current_link_name when setting up symlink
+ and hardlink records.
+
+Fri Sep 11 01:05:52 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * fnmatch.[ch]: New files.
+ * wildmat.c: File removed.
+ * tar.c: Include fnmatch.h and use fnmatch instead of wildmat.
+ * Makefile.in, makefile.pc: Replace wildmat.o(bj) with fnmatch.
+
+Thu Sep 10 23:19:30 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * buffer.c, tar.c: Remove redundant decls of getenv, rindex.
+
+ * Makefile.in: Add uninstall target.
+ Define libdir instead of hardcoding /etc for installing rmt.
+
+Thu Sep 10 13:06:03 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * list.c (read_header): On second thought, that doesn't work
+ either, so just store the names in malloced areas. Sigh.
+
+ * NEWS: New file.
+ * README: Removed things that belong in NEWS; point to it.
+
+ * list.c (read_header): current_file_name and
+ current_link_name need to be set to the arrays in head rather
+ than header; header is the actual read buffer and will change.
+
+ * extract.c (extract_archive):
+ * buffer.c (new_volume): `#' directives need to start in
+ column 1.
+
+Thu Sep 10 06:09:18 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * level-0, level-1 (TAR_PART1): put --atime-preserve inside quotes.
+
+Wed Sep 9 13:34:26 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * Makefile.in (AUX): Add getpagesize.h.
+ (AUX): Comment out manuals.
+ (all): Comment out dependency on tar.info.
+
+ * version.c: Release of version 1.11.
+
+ * level-0, level-1 (TAR_PART1): Use --atime-preserve.
+
+ * Makefile, configure.in: Arrange to use local malloc on HP-UX.
+
+ * port.h Use the canonical Autoconf chunk for alloca instead
+ of just looking for gcc.
+
+Wed Sep 9 03:16:58 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * port.h: If compiling with gcc, use __builtin_alloca.
+
+Tue Sep 8 16:13:41 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * extract.c: Removed long name support from here.
+ * list.c (read_header): Understand and skip longname/longlink
+ headers here. Names for current file are stored in new global
+ variables. All source files except create.c changed to refer
+ to current_file_name and current_link_name instead of fields
+ directly from the current header.
+
+Thu Sep 3 12:41:08 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * create.c (write_long): New function.
+ (dump_file): When writing link records or symlink records, use
+ new write_long function instead of mangling when the link
+ target is too long.
+ (start_header): Use write_long instead of mangling for long
+ names.
+ * extract.c (saverec): Recognize LF_LONGNAME and LF_LONGLINK.
+ (saverec): Throughout, use longname and longlink if they are set.
+
+Wed Sep 2 14:41:13 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * mangle.c: This is now deprecated; retain extract_mangle for
+ backward compatability.
+
+ * list.c (print_header): patch from Chris Arthur to prevent
+ printing 0 when the gid or uid is null.
+
+ * list.c (decode_header): patch from Chris Arthur to use the
+ gid field when the gid is empty, and similarly for uid.
+
+ * extract.c: saved_dir_info, saved_dir_info_head: new type and
+ var.
+ (extract_archive): When extracting directories, now save info
+ in saved_dir_info_head.
+ (restore_saved_dir_info): New function.
+ * list.c (read_and): Call restore_saved_dir_info at the end of
+ the run.
+ This patch is from Chris Arthur (csa@pennies.sw.stratus.com).
+
+Mon Aug 31 15:39:55 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * create.c (create_archive): If there are no names specified,
+ write nothing on the archive instead of dumping ".".
+
+ * buffer.c (open_archive): Useful error message.
+
+ * tar.c, tar.h: Recognize f_atime_preserve.
+ * create.c (dump_file): Implement f_atime_preserve.
+
+ * rmt.h (_remdev): Don't require /dev/ to be in remote archive
+ names; obey new force-local flag.
+ * tar.c, tar.h: Implement new force-local flag.
+
+ * tar.c (describe): same-owner and same-order were confused.
+
+ * create.c (dump_file): Check for toplevel had sense reversed.
+
+ * buffer.c (new_archive): Don't free old_name...when these
+ come from the command line, they aren't malloced, and it isn't
+ important to save this trivial amount of memory.
+
+ * tar.h: replace ar_file with ar_files, n_ar_files,
+ cur_ar_files.
+ * buffer.c (open_archive): multi-volume compressed archives
+ never worked; give an appropriate error. Change open of
+ ar_file to open of ar_files[0].
+ (writeerror, readerror, flush_archive): use
+ ar_files[cur_ar_file] instead of ar_file.
+ (new_archive): Necessary changes to support ar_files.
+ * tar.c (options): handle multiple tape drive arguments.
+
+Fri Aug 28 17:42:13 1992 Michael I Bushnell (mib@wookumz.gnu.ai.mit.edu)
+
+ * list.c (decode_header), create.c (start_header), tar.h (TMAGIC):
+ Undo djm's changes below; tar does not support the final
+ Posix.1 format; it's bad to make it look like it does.
+
+Sun Jul 19 02:13:46 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * port.h: Try to prevent redefining major.
+ * port.c: HAVE_BZERO -> minix. Fix a typo.
+
+ * list.c (decode_header): Recognize the final POSIX.1 magic as
+ well as the early draft magic for ustar.
+ * create.c (start_header): Create a final POSIX.1 magic string
+ instead of an early draft string for ustar.
+ * tar.h (TMAGIC): Remove the trailing blanks.
+
+ * rmt.c, rtapelib.c: Use POSIX and STDC headers if available.
+ * rmt.h: Declare the external functions defined in rtapelib.c.
+
+Tue Jul 14 00:44:37 1992 David J. MacKenzie (djm@apple-gunkies.gnu.ai.mit.edu)
+
+ * pathmax.h: New file.
+ * port.h: Include it.
+ * create.c (create_archive): Allocate PATH_MAX instead of
+ NAME_MAX for temporary buffer so we don't have to figure out
+ what NAME_MAX is (portably).
+
+Fri Jul 10 08:30:42 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * gnu.c (collect_and_sort_names): write_dir_file has no argument.
+
+ * level-0, level-1: Avoid silly Sun awk lossage.
+
+Mon Jul 6 20:11:32 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * port.c (rename): If unlinking the source at the end fails,
+ unlink the destination instead to avoid leaving a mess.
+
+Fri Jul 3 15:16:42 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * buffer.c, diffarch.c, update.c, rtapelib.c: Change NO_MTIO to
+ HAVE_SYS_MTIO_H.
+
+ * port.c, tar.h: Change FOO_MISSING to HAVE_FOO.
+
+Tue Jun 23 23:39:02 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * rmt.c: Add #ifdefs to work on ISC.
+
+Wed May 20 00:12:27 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu)
+
+ * port.h: Define major, minor, makedev if the system doesn't.
+
+Wed May 13 21:16:38 1992 Michael I Bushnell (mib@apple-gunkies.gnu.ai.mit.edu)
+
+ * gnu.c (add_dir_name): Store legitimate value into
+ dir_contents when get_dir_contents returns NULL.
+
+Thu May 7 23:44:35 1992 Michael I Bushnell (mib@apple-gunkies.gnu.ai.mit.edu)
+
+ * gnu.c (add_dir_name): Check for return of NULL from get_dir_contents;
+ see djm's change of Fri Jul 26 01:12:58 1991.
+
+Mon May 4 22:50:57 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu)
+
+ * tar.h: Make comments for option names say -- instead of +.
+
+Thu Apr 30 03:09:16 1992 Noah Friedman (friedman@nutrimat.gnu.ai.mit.edu)
+
+ * level-1: Added `$' before VOLNO_FILE in definition of TAR_PART1.
+ Added line to remove $VOLNO_FILE from any previous dump before
+ starting.
+
+ * level-0, level-1: Change long options to use `--' instead of `+'
+ (support for `+' will go away soon)
+
+Wed Apr 29 14:23:10 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * tar.c, tar.t: Added +volno-file option.
+ buffer.c: New functions init_volume_number,
+ closeout_volume_number.
+ tar.c (main): Call new functions in the right place.
+
+ * buffer.c (fl_write, fl_read): Mod to allow losing tape
+ drives which use short counts to indicate end of tape
+ correctly handle the multi-tape stuff. The read half won't
+ co-exist with f_reblock; there's no way to fix that, of
+ course.
+
+ * tar.c, tar.h: Added new option +show-omitted-dirs, from
+ Karl Berry.
+ list.c (read_and): Implemented show-omitted-dirs.
+
+ * tar.c, tar.h: Added new option +checkpoint.
+ buffer.c (fl_read, fl_write): Implemented +checkpoint lazily.
+
+ * create.c (dump_file): Added toplevel argument; some devices
+ can be negative, so the old method was bogus. All callers
+ changed.
+
+ * tar.c, tar.h: Added new option +ignore-failed-read.
+ create.c (dump_file): Implemented +ignore-failed-read.
+
+ * create.c (finish_sparse_file): Commented out debugging printf.
+
+ * tar.c, tar.h: Added new option +remove-files to delete files
+ after they are added to the archive.
+ create.c (dump_file): Implemented +remove-files for
+ everything but directories. I don't think they need it.
+
+Tue Apr 28 13:21:42 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * create.c: (dump_file): save_name needs to be set equal to p,
+ not something inside the header, because the header changes at
+ the first buffer flush.
+
+Fri Apr 24 10:41:13 1992 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * create.c: Djm incorrectly moved the include of port.h to
+ precede the include of sys/file.h; restored.
+
+ * tar.c (main): Cases CMD_EXTRACT and CMD_LIST: declare error
+ string with const.
+
+ * gnu.c (collect_and_sort_names): Leave if around
+ write_dir_file in place.
+
+Wed Apr 22 02:16:14 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu)
+
+ * rtapelib.c: SIGTYPE -> RETSIGTYPE.
+
+Mon Mar 9 22:42:05 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * rtapelib.c: Reformat and make comments more complete.
+ Rename a few variables for clarity.
+
+Thu Mar 5 14:07:34 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * tar.c (describe): Document long options as starting with --.
+
+Thu Jan 23 22:54:41 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * tar.c (options): Check get_date return value for error indication.
+
+Tue Dec 24 00:03:03 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * tar.c, gnu.c, extract.c, create.c, port.h, rmt.h: Change
+ POSIX ifdefs to HAVE_UNISTD_H and _POSIX_VERSION.
+
+Fri Dec 20 13:50:38 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * testpad.c (main): flush stderr so perror and fprintf
+ cooperate right.
+
+Wed Dec 18 16:52:42 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * port.h: Check MAJOR_IN_MKDEV and MAJOR_IN_SYSMACROS to find
+ where to get major, minor and makedev.
+ * create.c, list.c, update.c: Don't check USG to include
+ sys/sysmacros.h.
+
+Thu Dec 12 21:57:10 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * mangle.c (extract_mangle): Correctly null terminate name of
+ link target.
+
+Thu Nov 21 07:44:18 1991 Michael I Bushnell (mib at nutrimat)
+
+ * create.c (dump_file, at start of ISREG output loop): use
+ filename from header instead of real name to make sure that we
+ get the mangled version and not one that is too long and
+ overflows buffers.
+
+Sat Nov 16 01:37:45 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * tar.h: Use new criteria for STDC version of msg.
+
+Sat Nov 2 21:31:57 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * create.c, gnu.c, tar.c: Use DIRENT instead of NDIR to select
+ between dirent.h and ndir.h for USG.
+
+ * port.c: Rename WANT_FOO to FOO_MISSING to make sharing code
+ and configure script with other utilities easier. Use
+ VPRINTF_MISSING and DOPRNT_MISSING instead of FOO_MSG to
+ select error reporting routines.
+
+Thu Oct 17 20:19:02 1991 Michael I Bushnell (mib at churchy.gnu.ai.mit.edu)
+
+ * level-0: Repair damage from previous mod: stdin to rsh must
+ be the terminal or tar's questions lose.
+
+Sat Aug 31 15:05:27 1991 Noah Friedman (friedman at nutrimat.gnu.ai.mit.edu)
+
+ * level-0: Fixed several syntax errors associated with
+ stdout/stderr redirection.
+ Made sure remote host executes commands from sh where redirection
+ is necessary, since root's shell might be csh in some places and
+ the redirect syntax differs.
+
+Thu Aug 29 00:54:01 1991 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * tar.c (long_options). Fixed info-script long option.
+
+Mon Aug 26 16:53:50 1991 David J. MacKenzie (djm at pogo.gnu.ai.mit.edu)
+
+ * configure, Makefile.in: Only put $< in Makefiles if VPATH
+ is being used, because older makes don't understand it.
+
+Mon Aug 19 01:47:57 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * create.c: Indent '#pragma alloca' so non-ANSI compilers
+ don't choke on it.
+
+Wed Aug 14 14:10:43 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * list.c (UGSWIDTH): Increase from 11 (sort of like Unix tar) to
+ 18, so that with normal user and group names of <= 8 chars,
+ the columns never shift in a tar -t listing.
+
+Fri Aug 2 00:41:08 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * Makefile.in (dist): Include texinfo.tex and tar.info*.
+ (install): Install tar.info*.
+ * configure: Set INSTALLDATA.
+
+ * configure: Create config.status. Remove it and Makefile if
+ interrupted while creating them.
+
+ * configure: Check for +srcdir etc. arg and look for
+ Makefile.in in that directory. Set VPATH if srcdir is not `.'.
+ * Makefile.in: Add `prefix'.
+ (tar.info): New target.
+
+Tue Jul 30 17:08:04 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * configure: NEED_TZSET has become FTIME_MISSING.
+
+Mon Jul 29 19:23:10 1991 David J. MacKenzie (djm at wombat.gnu.ai.mit.edu)
+
+ * port.c [F_CHSIZE]: Additional version.
+
+Sat Jul 27 22:27:47 1991 David J. MacKenzie (djm at wombat.gnu.ai.mit.edu)
+
+ * rmt.h: Clean up ifdefs.
+
+ * makefile.pc: Fix typo.
+ port.h: Change MSDOS to __MSDOS__.
+ [__MSDOS__]: Define off_t. Include io.h and not sys/param.h.
+ [__TURBOC__]: Use void * and don't define const.
+
+Fri Jul 26 01:12:58 1991 David J. MacKenzie (djm at bleen)
+
+ * buffer.c: Rename `eof' to `hit_eof' to avoid conflict with an
+ MSDOS function.
+ * gnu.c (get_dir_contents): Return NULL, not "\0\0\0\0", on error.
+ * diffarch.c (diff_archive): Open files in binary mode.
+ Don't use or free a non-malloc'd return value from get_dir_contents.
+ * msd_dir.c [__TURBOC__]: Include stdlib.h.
+ * rmt.h: lseek returns off_t, not long.
+
+ * tar.c (describe): -X is +exclude-from, not +exclude.
+ (names_notfound): Free memory only if amiga, not !unix.
+
+ * tar.h, tar.c: Add +null option to make -T read
+ null-terminated filenames (such as those produced by GNU find
+ -print0), and disable -C option.
+ This guarantees that odd filenames will get archived.
+ * tar.c (read_name_from_file): New function.
+ (name_next): Call it instead of fgets.
+
+Wed Jul 24 11:17:48 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * create.c [_AIX]: Declare alloca.
+
+ * buffer.c (open_archive): Check for successful open before,
+ not after, fstatting the fd.
+
+Tue Jul 23 20:51:31 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * configure: Only define BSD42 if sys/file.h exists.
+ If alloca is missing and /usr/ucblib exists (SVR4), use it
+ instead of -lPW.
+
+ * port.h [!__STDC__]: #define const.
+ * gnu.c (dirent_cmp): Fix args to agree with ANSI C prototype.
+ * create.c: Declare ck_realloc.
+ * gnu.c, diffarch.c: Move check for symlinks to after port.h include.
+
+Sat Jul 20 00:03:54 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * msd_dir.[ch]: Use POSIX-style `struct dirent' instead of
+ `struct direct'.
+ * create.c, gnu.c, tar.c: Adjust callers.
+
+Thu Jul 18 00:05:01 1991 David J. MacKenzie (djm at bleen)
+
+ * port.c (ck_malloc, ck_realloc): Return PTR, not char *.
+ * gnu.c, create.c, tar.c: Fix decls.
+
+ * port.c: Don't use the preprocessor to guess missing
+ functions on Unix; let configure do it.
+ [WANT_GETWD] (getwd): Function removed; not needed because
+ getcwd is used if needed.
+ * gnu.c, tar.c: Use getcwd if POSIX.
+
+ * rtapelib.c: Use SIGTYPE instead of testing SIGNAL_VOID.
+ Default to void (more common these days) instead of int.
+
+ * tar.c, gnu.c, mangle.c: Remove VOIDSTAR defn. Use PTR instead.
+ * port.h: Define PTR.
+
+ * gnu.c, tar.c [__MSDOS__ || USG]: Remove incorrect getcwd
+ decl; put correct one in port.h [!POSIX].
+
+ * tar.c (describe): Print on stdout instead of stderr; it's
+ not so much a usage message (since you have to ask for it
+ explicitly) as on-line help, and you really need to be able to
+ page it because it's more than a screen long.
+
+ * Make #ifdefs for sys/file.h or fcntl.h, directory header,
+ sys/mtio.h consistent between files. Use NO_MTIO instead of
+ tricks with USG and HAVE_MTIO and NO_RMTIOCTL.
+ * Move decls of ANSI C and POSIX functions to port.h and
+ use standard headers to declare them if available
+ [STDC_HEADERS or POSIX].
+ * Add many missing function declarations and return types.
+ * Some places used __MSDOS__, some MSDOS; standardize on __MSDOS__.
+ * Change S_IF macros to S_IS for POSIX.
+ * port.h: Define appropriate S_IS macros if missing.
+ * port.h: Rename macros for testing exit status to conform to
+ POSIX; use the system's versions if available [POSIX].
+ * Use POSIX PATH_MAX and NAME_MAX instead of MAXPATHLEN and MAXNAMLEN.
+ * port.h: Define PATH_MAX and NAME_MAX.
+ * create.c, gnu.c, tar.c: Use ck_malloc and free instead of
+ auto arrays of size PATH_MAX or NAME_MAX, since with pathconf
+ they might not be constants.
+ * Move all definitions of O_* to port.h to reduce redundancy.
+ * Make all source files that now need to include port.h do so.
+ * port.c: Remove #undefs of WANT_* so you can use -DWANT_*
+ when compiling, instead of having to edit port.c.
+ [WANT_DUMB_GET_DATE] (get_date): Function removed.
+ Even systems without bison can get bison output and compile it.
+ [WANT_STRING] (index, rindex, bcopy, bzero, bcmp): Functions
+ removed; the translation is now done by macros in port.h.
+ * wildmat.c (wildmat): Use POSIX.2 '!' instead of '^' to negate
+ character classes.
+
+Mon Jul 15 13:47:45 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * testpad.c (main): Return type void.
+
+ * port.c [WANT_STRING]: Don't include memory.h if NO_MEMORY_H.
+
+ * create.c (dump_file) [AIX]: Fix typo, `allocate' for `alloca'.
+ * gnu.c (collect_and_sort_names): Move misplaced brace out of #ifdef.
+ From: Minh Tran-Le <TRANLE@intellicorp.com>.
+
+ * configure: Also look in sys/signal.h for signal decl.
+
+Wed Jul 10 01:42:55 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * Rename rtape_server.c to rmt.c and rtape_lib.c to rtapelib.c.
+
+ * configure, Makefile.in: $(INSTALLPROG) -> $(INSTALL).
+
+Tue Jul 9 01:38:37 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * Most files: Refer to GPL version 2.
+ * COPYING: Use version 2.
+
+ * port.c [__TURBOC__] (utime): New function.
+
+ * xmalloc: New function (just calls ck_malloc), for alloca.c
+ and bison.simple (in getdate.y output).
+
+ * Makefile.in (AUX): Include alloca.c and tcexparg.c, a
+ command line globber for Turbo C.
+
+Mon Jul 8 14:30:52 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * testpad.c: Open and write to testpad.h instead of stdout,
+ because some MS-DOS makes (Borland's at least) can't do
+ redirection in commands.
+ * Makefile.in: Don't redirect testpad output.
+
+Mon Jul 8 12:56:35 1991 Michael I Bushnell (mib at churchy.gnu.ai.mit.edu)
+
+ * buffer.c (fl_read): Missing \n in printf.
+
+Mon Jul 8 03:40:28 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * create.c, extract.c, gnu.c, diffarch.c, tar.c: Comment out
+ unused variables.
+
+ * tar.c (options): Cast get_date arg to VOIDSTAR instead of
+ `struct timeb *', since on some non-BSD systems the latter is
+ undefined.
+
+Sat Jul 6 04:53:14 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * Replace Makefile with configure, Makefile.in, and makefile.pc.
+ Update README with current compilation instructions.
+
+ * port.c [WANT_RENAME] (rename): New function.
+
+Wed Jul 3 18:10:52 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * testpad.c (main): Avoid warning from some compilers on array
+ address.
+
+ * rtape_server.c (sys_errlist): Should be declared extern.
+
+Mon Jul 1 14:14:06 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * Release of version 1.10; appropriate changes to README.
+
+ * create.c: Removed printf's about sparse files.
+
+ * Fix a misplaced quote in level-0 and change some >& into
+ 2>&1.
+
+Fri Jun 21 23:04:31 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * list.c (skip_extended_headers): Userec was being called in
+ the wrong place.
+
+Thu Jun 20 19:10:35 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * tar.h: Use ANSI prototypes for msg and msg_perror if
+ STDC_MSG is defined, even if BSD42 is also.
+
+ * Makefile: Replace DESTDIR with bindir.
+ (install): Don't install tar.texinfo. There's no standard
+ place for texinfo files, and /usr/local/man is inappropriate.
+ Add TAGS, distclean, and realclean targets and SHELL= line.
+
+ * version.c: Move old change history to bottom of ChangeLog.
+
+Wed Jun 12 12:43:58 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * rtape_lib.c (__rmt_write): #ifdef should reference
+ SIGNAL_VOID, not USG.
+
+Wed Jun 5 14:57:11 1991 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * tar.c (name_match, addname): Ugly hack to handle -C without
+ any files specified.
+ tar.h (struct name): New field for ugly hack.
+
+Mon Jun 3 14:46:46 1991 Michael I Bushnell (mib@geech.gnu.ai.mit.edu)
+
+ * testpad.c: New file to determine if we need special padding
+ in struct header in tar.h.
+
+ * tar.h (struct header): include padding if necessary, include
+ testpad.h.
+
+ * Makefile: rules to create testpad.h, etc.
+
+Wed May 22 16:02:35 1991 Michael I Bushnell (mib@churchy.gnu.ai.mit.edu)
+
+ * tar.c (options): -L takes an argument.
+
+ * rtape_lib.c (__rmt_open): add /usr/bin/nsh to the list of
+ remote shell programs.
+
+ * create.c: define MAXPATHLEN if we don't get it from a system
+ header file.
+
+ * create.c (deal_with_sparse): return a real return value if
+ we can't open the file.
+
+ * tar.c (long_options): +newer takes an argument.
+ (describe): fix printing in various trivial ways
+
+Tue May 21 17:15:19 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * tar.c (long_options): +get and +concatentate don't require arguments
+
+Mon May 20 15:55:30 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * create.c (write_eot): Don't try and write an EOF if we are
+ already at one.
+
+ * port.c (strstr): Looking for null string should return zero.
+
+Sun May 19 22:30:10 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * tar.c (options): -l doesn't take an argument
+
+ * Makefile: minor fix for SGI 4D defines from torda@scum.ethz.ch
+
+ * rtape_server.c (main.c): Suggested mod for 386/AIX from
+ Minh Tran-Le. I'm suspicious about this one.
+
+ * create.c (dump_file): Mods from Minh Tran-Le for hidden
+ files on AIX.
+ gnu.c (collect_and_sort_name, get_dir_contents): AIX hidden file mod.
+
+ * tar.c: (name_next): Mod from David Taylor to allow -C inside
+ a file list given to -T.
+
+ * Makefile: Comment describing presence of USE_REXEC.
+
+ * extract.c (extract_archive, case LF_SPARSE): zero check for
+ last element on numbytes needs to look at value after
+ converted from octal.
+
+ * port.c: Don't always demand strstr, check for HAVE_STRSTR
+ instead.
+ Makefile: Comment describing presence of HAVE_STRSTR option.
+
+Sun May 19 18:39:48 1991 David J. MacKenzie (djm at churchy.gnu.ai.mit.edu)
+
+ * port.c (get_date): Renamed from getdate, to avoid SVR4 conflict.
+ * tar.c: Call get_date instead of getdate.
+
+Fri May 10 02:58:17 1991 Noah Friedman (friedman at nutrimat)
+
+ * tar.c: added "\n\" to the end of some documentation strings
+ where they were left off.
+
+Thu May 9 17:28:54 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * Makefile: added level-0, level-1, and backup-specs to AUX.
+ * version.c: changed to 1.10 beta.
+ * README: updated for 1.10 beta release.
+
+Tue Apr 2 12:04:54 1991 Michael I Bushnell (mib at godwin)
+
+ * create.c (dump_file): HPUX's st_blocks is in 1024 byte units
+ instead of 512 like the rest of the world, so I special cased
+ it.
+ * tar.c: Undo Noah's changes.
+
+Mon Apr 1 17:49:28 1991 Noah Friedman (friedman at wookumz.gnu.ai.mit.edu)
+
+ (This ought to be temporary until things are fixed properly. )
+
+ * tar.c: (struct option long_options): flag for "sparse" zero if
+ compiling under hpux.
+ tar.c: (functon options): case 'S' is a no-op if compiling under
+ hpux.
+
+Sat Mar 30 12:20:41 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * tar.h: new variable tape_length.
+
+ * tar.c (options): add new option +tape-length / -L.
+
+ * buffer.c (fl_write): Turn #ifdef TEST code for limited tape
+ length on always, for tape-length option.
+
+ * create.c (dump_file): avoid apollo lossage where S_IFIFO == S_IFSOCK.
+
+ * buffer.c: include regex.h
+ * buffer.c (fl_read, open_archive): Use regex routines for
+ volume header match.
+ * xmalloc.c: removed file; wasn't necessary.
+ * tar.c: (main) use ck_malloc instead of xmalloc.
+
+Thu Mar 28 04:05:05 1991 Noah Friedman (friedman at goldman)
+
+ * regex.c, regex.o: New links.
+ * tar.c: include regex.h.
+ * Makefile (OBJ2): Add regex.o.
+ (regex.o, tar.o): Depend on regex.h
+ (SRC2, AUX): Add the new files.
+
+Sat Mar 23 15:39:42 1991 Noah Friedman (friedman at wookumz.gnu.ai.mit.edu)
+
+ * Makefile: added default flags and options for compiling under
+ hpux.
+
+ * Added files alloca.c and xmalloc.c
+
+Sat Mar 23 14:35:31 1991 Michael I Bushnell (mib at geech.gnu.ai.mit.edu)
+
+ * port.c: Define WANT_VALLOC in HPUX.
+
+Fri Mar 15 06:20:15 1991 David J. MacKenzie (djm at geech.ai.mit.edu)
+
+ * rtape_lib.c: If USG and not HAVE_MTIO, define NO_RMTIOCTL
+ automatically.
+ (_rmt_rexec): Temporarily re-open stdin and stdout to
+ /dev/tty, to guarantee that rexec() can prompt and read the
+ login name and password from the user.
+ From pascal@cnam.cnam.fr (Pascal Meheut).
+ * Makefile: Mention -DUSE_REXEC.
+
+Fri Mar 8 20:15:11 1991 Michael I Bushnell (mib at wookumz.ai.mit.edu)
+
+ * tar.h, Makefile: Makefile CPP macro HAVE_SIZE_T might be
+ useful for some people.
+
+ * gnu.c: lstat->stat define where appropriate
+
+ * buffer.c (fl_write): keep track of amount written for +totals.
+ * tar.c, tar.h: set flag f_totals from +totals option
+ * tar.h (f_totals, tot_written): new variables
+ * tar.c (main): print total written with CMD_CREATE
+
+ * tar.c (main): return appropriate exit status
+
+Thu Jan 17 00:50:21 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * port.c: Remove a spurious `+' between functions (a remnant
+ of a context diff, apparently).
+
+Wed Jan 9 19:43:59 1991 Michael I Bushnell (mib at pogo.ai.mit.edu)
+
+ * create.c (where_is_data): Rewritten to be better, and then
+ #ifdef-ed out.
+ (deal_with_sparse): Severly pruned. Now we write or don't
+ write only complete blocks, not worrying about partial blocks.
+ This simplifies calculations, removes bugs, and elides the
+ second scan through the block. The first was zero_record, the
+ second was where_is_data.
+
+Mon Jan 7 17:13:29 1991 Michael I Bushnell (mib at wookumz.ai.mit.edu)
+
+ * create.c (deal_with_sparse): Second computation (for short
+ reads) of numbytes increment had subtraction backwards.
+ Need to handle calling where_is_data better when we did a
+ short read (it might go past the end of the read), also, set
+ sparsearray[...].offset in this case too.
+
+Fri Jan 4 12:24:38 EST 1991 Jay Fenlason (hack@ai.mit.edu)
+
+ * buffer.c Return a special error code if the archive you're
+ trying to read starts with a different label than the one specified
+ on the command line.
+
+Wed Jan 2 12:05:21 EST 1991 Jay Fenlason (hack@ai.mit.edu)
+
+ * gnu.c Prepend the current directory to the gnu_dumpfile, so that
+ -C's won't affect where the output goes. (sigh.)
+
+Tue Dec 18 18:05:59 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * (gnu.c) Don't complain if the gnudumpfile we're reading info
+ from doesn't exist.
+
+ * create.c Write out gnudumpfile after finishing writing the archive.
+
+ * tar.c Add +exclude FNAME, and make +exclude-from do what +exclude
+ used to.
+
+ Make +version an operation, not an option.
+
+ add +confirmation alias for +interactive.
+
+Tue Dec 4 13:28:08 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * tar.c (check_exclude) Don't let MUMBLE match MUMBLE.c or fooMUMBLE
+ but only foo/MUMBLE
+
+ * Add the name mangler (mangle.c, plus changes to create.c and
+ extract.c)
+
+ * extract.c Three small patches from Chip Salzenberg
+ (tct!chip@uunet.uu.net)
+
+ Don't complain when extracting a link, IFF it already exists.
+
+ Don't complain when extracting a directory IFF it already
+ exists.
+
+ Don't ad u+wx to directories when running as root.
+
+ * gnu.c Some changes from Chip Salzenberg to make
+ +listed-incremental work.
+
+ * port.c Add the F_FREESP emulation of the ftruncate syscall.
+
+Wed Nov 21 15:57:07 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ Remove excess \n from lots of msg() calls.
+
+Mon Nov 19 14:09:43 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * tar.c Rename +volume to +label
+
+Fri Nov 16 15:43:44 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * tar.c (describe): Include the default values for -b and -f
+ (as set in the Makefile) in the message.
+
+Thu Nov 15 13:36:45 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * extract.c (extract_archive) Do the utime() call before the
+ chmod() call, 'cuz some versons of utime() trash the file's mode
+ bits.
+
+ * list.c (read_and) Call do_something on volume headers and
+ multivol files even if they don't match the names we're looking for,
+ etc. . .
+
+Tue Nov 6 13:51:46 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * port.c (un-quote-string) Don't try to write a null
+ if there's already one there.
+
+Thu Nov 1 14:58:57 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * buffer.c (new_volume) fflush(msg_file) before reading for
+ confirmation on new volume. On EOF or error, print error msg and
+ abort.
+
+Mon Oct 29 12:06:35 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * getdate.y Use new version of getdate().
+
+ * tar.c (name_add) Use sizeof(char *) instead of sizeof(int)
+
+ * README give the correct return address.
+
+Thu Oct 25 16:03:58 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ rtape_lib.c Change RMTIOCTL to NO_RMTIOCTL, so it is on by default.
+
+ rmt.h Add _isrmt() #define for NO_REMOTE case.
+
+ gnu.c Add forward reference for add_dir_name().
+
+Tue Oct 16 11:04:52 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ 1.09 New -G file implementation of gnu-dump stuff.
+
+ * tar.c (name_add) Get the calls to ck_realloc and ck_malloc right.
+
+Thu Oct 11 11:23:38 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * gnu.c Fix A couple of typos.
+
+Wed Sep 19 13:35:03 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * getdate.y [USG] (ftime): Use `daylight' unless
+ DAYLIGHT_MISSING is defined.
+
+Mon Sep 17 18:04:21 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * gnu.c (gnu_restore) Don't use a passed char* for the
+ file name, use skipcrud+head->header.name, just like everything
+ else does. This means that gnu_restore will still work with
+ small buffers, etc.
+
+Thu Sep 13 15:01:17 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * tar.c (add_exclude) Don't bus-error if the exclude file doesn't
+ end with a newline.
+
+Sun Sep 9 22:35:27 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * Makefile (dist): Remove .fname when done.
+
+Thu Sep 6 12:48:58 EDT 1990 Jay Fenlason (hack@ai.mti.edu)
+
+ * gnu.c (gnu_restore) Rember to skip_file() over the directory
+ contents, even if we don't have to do anything with them.
+
+ * create.c extract.c diffarch.c Free sparsearray after we're done
+ with it.
+
+Tue Sep 4 10:18:50 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * Makefile Include gnu.c in dist
+
+ * gnu.c move add_dir above read_dir_file so that cc doesn't complain
+ about add_dir returning void.
+
+Sun Sep 2 20:46:34 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * getdate.y: Declare some more functions and add storage
+ classes where omitted to shut compiler up.
+ [USG] (ftime): Don't use extern var `daylight'; appears that
+ some systems don't have it.
+
+Wed Aug 29 00:05:06 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * getdate.y (lookup): In the code that allows `Aug.' to be
+ recognized as `Aug', don't chop off the final `.' from words
+ like `a.m.', so they can be recognized.
+
+Thu Aug 16 11:34:07 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * buffer.c (open_archive) If -O, write verbosity to stderr
+ instead of stdout.
+
+Fri Aug 10 12:29:28 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * getdate.y Handle an explicit DST in the input string.
+ A dozen line patch from Per Foreby (perf@efd.lth.se).
+
+Mon Jul 16 13:05:11 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * tar.c rename -g -G +incremental, +listed-imcremental, etc.
+
+Fri Jul 13 14:10:33 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * tar.c Make +newer and +newer-mtime work according to their names.
+
+ * gnu.c If +newer or +newer-mtime, use the time specified on the
+ command line.
+
+ * buffer.c, create.c Add test to see if dimwit is trying to
+ archive the archive.
+
+ * tar.c (long_options[]) re-ordered, so that groups of similar
+ options are next to each other. . . I think.
+
+ (describe) Modified to more closely reflect reality.
+
+Fri Jul 6 13:13:59 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * tar.c add compile-time option for SYS V (?) style
+ tape-drive names /dev/rmt/{n}[lmh]
+
+ * tar.c Fix getopt-style stuff so that -C always works correctly.
+
+ * gnu.c, tar.c make filename to -G optional.
+
+ * {all over}, replace some fprintf(stderr...) calls with calls
+ to msg().
+
+ * port.c Make -Dmumble_MSG option on command line override
+ internal assumptions.
+
+ * Makefile Mention -Dmumble_MSG options
+
+Fri Jul 6 02:35:31 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * tar.c (options): Don't change `c' if it is 0, as getopt now
+ handles that internally.
+
+Mon Jul 2 15:21:13 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * gnu.c (new file) Moved all the f_gnudump stuff here where we
+ can keep track of it easier. Also made -G take a file name where it
+ stores the inode information about directories so that we can
+ detect moved directores.
+
+ * create.c (dump_file) Changed slightly to work with the new
+ f_gnudump.
+
+ * tar.c Moved the f_gnudump stuff to gnu.c
+
+ * tar.c, extract.c added the +do-chown option, which forces tar
+ to always try to chown the created files to their original owners.
+
+ * version.c New version 1.09
+
+Sun Jun 24 14:26:28 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * create.c: Change ifdefs for directory library header
+ selection to be like the ones in tar.c.
+ * Makefile [Xenix]: Link with -ldir to get the dirent.h
+ directory library.
+
+Thu Jun 7 03:31:51 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * Makefile, buffer.c, diffarch.c: Change MTIO symbol to HAVE_MTIO
+ because SCO Xenix defines 'MTIO' for an incompatible tape driver
+ system in a file included by termio.h.
+ * tar.h: Don't define size_t for Xenix.
+
+Tue Jun 5 11:38:00 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * create.c (dump_file) Only print the
+ "... is on a different filesystem..." if f_verbose is on.
+ also add a case for S_IFSOCK and treat it like a FIFO.
+ (Not sure if that's the right thing to do or not, but it's better
+ than all those Unknown File Type msgs.)
+
+Thu May 31 19:25:36 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * port.c Use #ifdef sparc instead of #ifdef SPARC since
+ the lowercase version is defined, and the uppercase one isn't.
+
+Tue May 22 11:49:18 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * port.c (ck_malloc) if size==0 pretend size=1
+ (ck_realloc) if(!ptr) call ck_malloc instead.
+
+Tue May 15 12:05:45 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * diffarch.c (diff_archive) If not f_absolute_paths, and attempt to
+ open a file listed in the archive fails, try /filename also. This will
+ allow diff to open the wrong file if both /filename and filename exist,
+ but there's nothing we can do about that.
+
+Fri May 11 16:17:43 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * Makefile, Descripbe new -DMTIO option.
+
+ * buffer.c diffarch.c Change ifdefs slightly, so that
+ -DMTIO will include sys/mtio.h even if USG is defined.
+ This is for HUPX and similar BSD/USG crossovers.
+
+Tue May 8 13:14:54 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+ * update.c (update_archive) Call reset_eof() when appropriate.
+
+ * buffer.c (reset_eof) New function, that turns of EOF flag, and
+ re-sets the ar_record and ar_last pointers. This will allow
+ 'tar rf non-existant-file' to not core-dump.
+
+Fri May 4 14:05:31 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * tar.c: Recognize the +sparse option. It was documented, but
+ only the short form (-S) was actually recognized.
+
+Tue Apr 17 21:34:14 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * create.c Don't access location 0 if ->dir_contents is null.
+
+Wed Apr 11 17:30:03 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * buffer.c (flush_archive, close_archive, new_volume) Always check
+ the return value of rmtclose(), and only give a warning msg if it is
+ <0. Some device drivers (including Sun floppy disk, and HP
+ streaming tape) return -1 after an IO error (or something like that.)
+
+Fri Mar 23 00:06:30 1990 Jim Kingdon (kingdon at mole.ai.mit.edu)
+
+ * tar.c (long_options): Make it so +append +extract +list +update
+ +catenate and +delete don't take arguments.
+
+Mon Mar 12 13:33:53 EST 1990
+
+ * buffer.c (open_archive, fl_write) Set the mtime of the volume
+ header to the current time.
+
+Wed Mar 7 14:10:10 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * buffer.c Fix +compress-block A two character patch from
+ Juha Sarlin (juha@tds.kth.se)
+ Replace #ifdef __GNU__ with #ifdef __STDC__
+ (new_volume) If open of new archive fails, ask again
+ (Is probably user error.)
+
+ * tar.c Replace #ifdef __GNU__ with #ifdef __STDC__
+
+ * port.c Clean up #ifdef and #defines a bit.
+ (quote_copy_string) Sometimes the malloc'd buffer
+ would be up to two characters too short.
+
+ * extract.c (extract_archive) Don't declare ind static.
+
+ * create.c (dump_file) Don't declare index_offset static.
+
+ * diffarch.c Remove diff_name variable, and always use
+ head->header.name, which will always work, unlike diff_name, which
+ becomes trash when the next block is read in.
+
+Thu Mar 1 13:43:30 EST 1990 Jay Fenlason (hack@wookumz.ai.mit.edu)
+
+ * Makefile Mention the -NO_REMOTE option.
+ * port.c Fix typo, and define WANT_FTRUNCATE on i386 machines.
+
+Mon Feb 26 17:44:53 1990 Jim Kingdon (kingdon at pogo.ai.mit.edu)
+
+ * getdate.y: Declare yylex and yyerror as static.
+ #define yyparse to getdate_yyparse.
+
+Sun Feb 25 20:47:23 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * tar.c: Remove +old option, since it is a valid abbreviation of
+ +old-archive, which does the same thing.
+ (describe): A few small cleanups in message.
+
+Mon Feb 5 14:29:21 EST 1990 Jay Fenlason (hack@wookumz)
+
+ * port.c define LOSING_MSG on sparc, since doprnt_msg doesn't work.
+ Fix typo in #ifdef WANT_GETWD
+
+Fri Jan 26 16:11:20 EST 1990 Jay Fenlason (hack@wookumz)
+
+ 1.08 Sparse file support added. Also various other features.
+
+ * diffarch.c (compare_chunk) Include correct arguments in
+ a call to fprintf() for an error msg.
+ (compare_chunks, compare_dir) First argument is a long, not an int.
+
+ * tar.c (options) Use tar variable (argv[0]) as the name to print
+ in an error msg, instead of a constant "tar".
+ (confirm) Use external variable char TTY_NAME[] for name of file
+ to open for confirmation input.
+
+ * buffer.c (new_volume) Ditto.
+
+ * port.c Add declaration for TTY_NAME[].
+
+ * rmt.h Add long declarations for lseek() and __rmt_lseek();
+
+Tue Jan 23 14:06:21 EST 1990 Jay Fenlason (hack@wookumz)
+ * tar.c, create.c Create the +newer-mtime option, which is like
+ +newer, but only looks for files whose mtime is newer than the
+ given date.
+
+ * rtape_lib.c Make *both* instances of signal-handler stuff use
+ void (*foo)() on USG systems.
+
+Thu Jan 11 14:03:45 EST 1990 Jay Fenlason (hack@wookumz)
+
+ * getdate.y Parse European dates of the form YYMMDD.
+ In ftime() Init timezone by calling localtime(), and remember that
+ timezone is in seconds, but we want timeb->timezone to be in minutes.
+ This small patch from Joergen Haegg (jh@aahas.se)
+
+ * rtape_lib.c (__rmt_open) Also look for /usr/bsd/rsh.
+ Declare signal handler as returning void instead of int if USG is
+ defined.
+
+ * port.c Declare WANT_GETWD for SGI 4-D IRIS.
+
+ * Makefile Include defines for SGI 4D version. There are a simple
+ patch from Mike Muuss (mike@brl.mil).
+
+ * buffer.c (fl_read) Work properly on broken Ultrix systems where
+ read() returns -1 with errno==ENOSPC on end of tape. Correctly go
+ on to the next volume if f_multivol.
+
+ * list.c (list_archive,print_header) Flush msg_file after printing
+ messages.
+
+ * port.c Delete unused references to alloca().
+ Don't crash if malloc() returns zero in quote_copy_string.
+ Flush stderr in msg() and msg_perror().
+
+ * tar.c Flush msg_file after printing confirmation msg.
+
+Wed Jan 10 01:58:46 1990 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * tar.c (main): Change -help option and references to it to +help,
+ and remove suggestion to run info (which is unreleased, so not
+ likely to be of any help).
+
+Tue Jan 9 16:16:00 EST 1990 Jay Fenlason (hack @wookumz)
+
+ * create.c (dump_file) Close file descriptor if start_header()
+ fails.
+ (dump_file) Change test for ./ ness to not think that
+ .{any character} is a ./ These are both trivial changes from
+ Piercarlo "Peter" Grandi pcg%cs.aber.ac.uk@nsfnet-relay.ac.uk
+
+ * diffarch.c (diff_init) Print correct number of bytes in error
+ message.
+
+Tue Jan 9 03:19:49 1990 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * Makefile: Add comment at top noting that two source files also
+ contain #defines that might need to be changed by hand.
+
+ * create.c, diffarch.c, extract.c: Change L_SET to 0 in lseek
+ calls, because only BSD defines it.
+ * create.c (dump_file): Make sparse file checking code conditional
+ on BSD42 because it uses st_blocks, which the other systems lack.
+
+Tue Jan 2 13:35:56 EST 1990 Jay Fenlason (hack@gnu)
+
+ * port.c (quote_copy_string) Fix so it doesn't scramble memory if
+ the last character is non-printable. A trivial fix from Kian-Tat Lim
+ (ktl@wag240.caltech.edu).
+
+Tue Dec 19 11:19:37 1989 Jim Kingdon (kingdon at pogo)
+
+ * port.c [BSD42]: Define DOPRNT_MSG.
+ tar.h [BSD42]: Do not prototype msg{,_perror}.
+
+Fri Dec 8 11:02:47 EST 1989 Jay Fenlason (hack@gnu)
+
+ * create.c (dump_file) Remove typo in msg.
+
+Fri Dec 1 19:26:47 1989 David J. MacKenzie (djm at trix)
+
+ * Makefile: Remove comments referring to certain systems lacking
+ getopt, since it is now provided always and needed by all systems.
+
+ * port.c: Remove copy of getopt.c, as it is now linked in
+ separately to always get the current version.
+
+ * tar.c: Rename +cat-tars option to +catenate or +concatenate,
+ and +local-filesystem to +one-file-system (preferred by rms
+ and used in GNU cp for the same purpose).
+ (describe): Reflect changes.
+
+Tue Nov 28 04:28:26 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * port.c: Move declaration of alloca into #else /* sparc */
+ so it will compile on sparcs.
+
+Mon Nov 27 15:17:08 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * tar.c (options): Remove -version option (replaced by +version).
+ (describe): Mention long options.
+
+Sat Nov 25 04:25:23 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * getoldopt.c (getoldopt): Make `opt_index' argument a pointer to
+ an int, not char.
+
+ * tar.c: Modify long options per rms's suggestions:
+ Make preserve-permissions an alias for same-permissions.
+ Make preserve-order an alias for same-order.
+ Define preserve to mean both of those combined.
+ Make old an alias for old-archive.
+ Make portability an alias for old-archive, also.
+ Rename sym-links to dereference.
+ Rename gnudump to incremental.
+ Rename filename to file.
+ Make compare an alias for diff. Leave diff but prefer compare.
+ Rename blocking-factor to block-size.
+ Rename chdir to directory.
+ Make uncompress an alias for compress.
+ Rename confirm to interactive.
+ Make get an alias for extract.
+ Rename volume-header to volume.
+
+ Also make +version an alias for -version.
+
+ (options): Shorten code that interprets long options by using
+ the equivalent short options' code. This also makes it tons
+ easier to change the long options.
+
+ (describe): Make usage message more internally consistent
+ stylistically.
+
+Mon Nov 20 14:55:39 EST 1989 hack@ai.mit.edu
+
+ * list.c (read_and) Call check_exclude() to see if the files
+ should be skipped on extract or list.
+
+Thu Nov 9 18:59:32 1989 Jim Kingdon (kingdon at hobbes.ai.mit.edu)
+
+ * buffer.c (fl_read): Fix typos in error message
+ "tar EOF not on block boundary".
+
+Mon Oct 23 13:09:40 EDT 1989 (hack@ai.mit.edu)
+
+ * tar.c (long_options[]) Add an option for blocked compression.
+
+Thu Oct 19 13:38:16 EDT 1989 (hack@ai.mit.edu)
+
+ * buffer.c (writeerror) Print a more useful error msg.
+
+Wed Sep 27 18:33:41 EDT 1989 (hack@ai.mit.edu)
+
+ * tar.c (main) Mention "tar -help" if the luser types a non-workable
+ set of options.
+
+Mon Sep 11 15:03:29 EDT 1989 (hack@ai.mit.edu)
+
+ * tar.c (options) Have -F correctly set info_script.
+
+Tue Aug 29 12:58:06 EDT 1989 (hack@ai.mit.edu)
+
+ * Makefile Include ChangeLog in tar.tar and tar.tar.Z
+
+Mon Aug 28 17:42:24 EDT 1989 (hack@ai.mit.edu)
+
+ * tar.c (options) Made -F imply -M
+ Also remind tar that the -f option takes an argument!
+
+ * Modified -F option to make it do what (I think) it
+ should. e.g, if you say -F, tar won't send a msg to
+ msg_file and wait for a <return> It'll just run the program
+ it was given, and when the prog returns, the new tape had
+ *better* be ready. . .
+
+ * buffer.c (open_archive) Give error message and abort if
+ the luser didn't give an archive name.
+
+Fri Aug 25 20:05:27 EDT 1989 Joy Kendall (jak at hobbes)
+
+ * Added code to make a new option to run a specified script
+ at the end of each tape in a multi-volume backup. Changed:
+ tar.c: made new switch, -F, and new long-named option,
+ "info-script". Code is where you would expect.
+ tar.h: added flag f_run_script_at_end, and an extern char *
+ called info_script, which optarg gets set to.
+ buffer.c: line 1158 in new_volume(): if f_run_script_at_end
+ is set, we give info_script to system(), otherwise we do
+ what we've always done. **FIXME** I'm not sure if that's all
+ that has to be done here.
+
+Thu Aug 24 10:09:38 EDT 1989 Joy Kendall (jak at spiff)
+(These changes made over the course of 6/89 - 8/89)
+
+ * diffarch.c: diff_archive: Added switches for LF_SPARSE in the
+ case statements that needed it. Also, skip any extended headers
+ if we need to when we skip over a file. (need to change
+ the bit about, if the size doesn't agree AND the file is NOT
+ sparse, then there's a discrepancy, because I added another
+ field to the header which should be able to deal with the
+ sizes) If the file is sparse, call the added routine
+ "diff_sparse_files" to compare. Also added routine
+ "fill_in_sparse_array".
+
+ * extract.c: extract_archive: added the switch LF_SPARSE
+ to the case statement as needed, and code to treat the
+ sparse file. At label "again_file", modified opening the
+ file to see if we should have O_APPEND be one of the modes.
+ Added code at label "extract_file" to call the new routine
+ "extract_sparse_file" when we have an LF_SPARSE flag.
+
+ Note: really should erase the commented-out code in there,
+ because it's confusing.
+
+ * update.c: made sure that if a file needed to be "skipped"
+ over, it would check to see if the linkflag was sparse, and
+ if so, would then make sure to skip over any "extended
+ headers" that might come after the header itself. Do so by
+ calling "skip_extended_headers".
+
+ * create.c: create_archive: added code to detect a sparse
+ file when in the long case statement. Added ways to detect
+ extended headers, and label "extend" (ack! should get rid of
+ that, is atrocious). Call the new routine "finish_sparse_file"
+ if the linkflag is LF_SPARSE to write the info to the tape.
+ Also added routines "init_sparsearray", "deal_with_sparse",
+ "clear_buffer", "where_is_data", "zero_record", and
+ "find_new_file_size".
+
+ * tar.h: Added the #define's SPARSE_EXT_HDR and
+ SPARSE_IN_HDR. Added the struct sparse and the struct
+ sp_array. Added the linkflag LF_SPARSE. Changed the tar
+ header in several ways:
+ - added an array of struct sparse's SPARSE_IN_HDR long
+ - added a char flag isextended
+ - added a char string realsize to store the true
+ size of a sparse file
+ Added another choice to the union record called a
+ struct extended_header, which is an array of 21 struct
+ sparse's and a char isextended flag. Added flag
+ f_sparse_file to list of flags.
+
+ * tar.c: added long-named options to make tar compatible with
+ getopt_long, changed Makefile.
+
+... ... .. ..:..:.. ... .... Jay Fenlason (hack@ai.mit.edu)
+
+ 1.07 New version to go on beta tape with GCC 1.35
+ Better USG support. Also support for __builtin_alloca
+ if we're compiling with GCC.
+ diffarch.c: Include the correct header files so MTIOCTOP
+ is defined.
+ tar.c: Don't print the verbose list of options unless
+ given -help. The list of options is *way* too long.
+
+ 1.06 Use STDC_MSG if __STDC__ defined
+ ENXIO meand end-of-volume in archive (for the UNIX PC)
+ Added break after volume-header case (line 440) extract.c
+ Added patch from arnold@unix.cc.emory.edu to rtape_lib.c
+ Added f_absolute_paths option.
+ Deleted refereces to UN*X manual sections (dump(8), etc)
+ Fixed to not core-dump on illegal options
+ Modified msg_perror to call perror("") instead of perror(0)
+ patch so -X - works
+ Fixed tar.c so 'tar cf - -C dir' doesn't core-dump
+ tar.c (name_match): Fixed to chdir() to the appropriate
+ directory if the matching name's change_dir is set. This
+ makes tar xv -C foo {files} work.
+
+ 1.05 A fix to make confirm() work when the archive is on stdin
+ include 'extern FILE *msg_file;' in pr_mkdir(), and fix
+ tar.h to work with __STDC__
+
+ Added to port.c: mkdir() ftruncate() Removed: lstat()
+ Fixed -G to work with -X
+ Another fix to tar.texinfo
+ Changed tar.c to say argv[0]":you must specify exactly ...
+ buffer.c: modified child_open() to keep tar from hanging when
+ it is done reading/writing a compressed archive
+ added fflush(msg_file) before printing error messages
+ create.c: fixed to make link_names non-absolute
+
+ 1.04 Added functions msg() and msg_perror() Modified all the
+ files to call them. Also checked that all (I hope)
+ calls to msg_perror() have a valid errno value
+ (modified anno() to leave errno alone), etc
+ Re-fixed the -X option. This time for sure. . .
+ re-modified the msg stuff. flushed anno() completely
+ Modified the directory stuff so it should work on sysV boxes
+ added ftime() to getdate.y
+ Fixed un_quote_string() so it won't wedge on \" Also fixed
+ \ddd (like \123, etc)
+ More fixes to tar.texinfo
+
+ 1.03 Fixed buffer.c so 'tar tzf NON_EXISTENT_FILE' returns an error
+ message instead of hanging forever
+ More fixes to tar.texinfo
+
+ 1.02 Fixed tar.c so 'tar -h' and 'tar -v' don't cause core dump
+ Also fixed the 'usage' message to be more up-to-date.
+ Fixed diffarch.c so verify should compile without MTIOCTOP
+ defined
+
+ 1.01 Fixed typoes in tar.texinfo
+ Fixed a bug in the #define for rmtcreat()
+ Fixed the -X option to not call realloc() of 0.
+
+ Version 1.00: version.c added. -version option added
+ Installed new version of the remote-tape library
+ Added -help option
+
+Local Variables:
+mode: indented-text
+left-margin: 8
+version-control: never
+End:
diff --git a/gnu/usr.bin/tar/Makefile b/gnu/usr.bin/tar/Makefile
new file mode 100644
index 000000000000..810fe3b7a74d
--- /dev/null
+++ b/gnu/usr.bin/tar/Makefile
@@ -0,0 +1,14 @@
+PROG= tar
+SRCS= buffer.c create.c diffarch.c extract.c fnmatch.c getdate.y \
+ getoldopt.c getopt.c getopt1.c gnu.c list.c mangle.c names.c port.c \
+ regex.c rtapelib.c tar.c update.c version.c
+CFLAGS+= -DRETSIGTYPE=void -DDIRENT=1 -DHAVE_SYS_MTIO_H=1 -DHAVE_UNISTD_H=1
+CFLAGS+= -DHAVE_GETGRGID=1 -DHAVE_GETPWUID=1 -DHAVE_STRING_H=1
+CFLAGS+= -DHAVE_LIMITS_H=1 -DHAVE_STRSTR=1 -DHAVE_VALLOC=1 -DHAVE_MKDIR=1
+CFLAGS+= -DHAVE_MKNOD=1 -DHAVE_RENAME=1 -DHAVE_FTRUNCATE=1 -DHAVE_GETCWD=1
+CFLAGS+= -DHAVE_VPRINTF=1 -DNEEDPAD -I${.CURDIR}
+CFLAGS+= -DDEF_AR_FILE=\"/dev/rst0\" -DDEFBLOCKING=20
+NOMAN=noman
+
+.include <bsd.prog.mk>
+.include "../../usr.bin/Makefile.inc"
diff --git a/gnu/usr.bin/tar/Makefile.gnu b/gnu/usr.bin/tar/Makefile.gnu
new file mode 100644
index 000000000000..a03617ae3666
--- /dev/null
+++ b/gnu/usr.bin/tar/Makefile.gnu
@@ -0,0 +1,185 @@
+# Generated automatically from Makefile.in by configure.
+# Un*x Makefile for GNU tar program.
+# Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#### Start of system configuration section. ####
+
+srcdir = .
+VPATH = .
+
+# If you use gcc, you should either run the fixincludes script that
+# comes with it or else use gcc with the -traditional option. Otherwise
+# ioctl calls will be compiled incorrectly on some systems.
+CC = gcc
+YACC = bison -y
+INSTALL = /usr/local/bin/install -c
+INSTALL_PROGRAM = $(INSTALL)
+INSTALL_DATA = $(INSTALL) -m 644
+
+# Things you might add to DEFS:
+# -DSTDC_HEADERS If you have ANSI C headers and libraries.
+# -DHAVE_UNISTD_H If you have unistd.h.
+# -DHAVE_STRING_H If you don't have ANSI C headers but have string.h.
+# -DHAVE_LIMITS_H If you have limits.h.
+# -DBSD42 If you have sys/dir.h (unless you use -DPOSIX),
+# sys/file.h, and st_blocks in `struct stat'.
+# -DDIRENT If you have dirent.h.
+# -DSYSNDIR Old Xenix systems (sys/ndir.h).
+# -DSYSDIR Old BSD systems (sys/dir.h).
+# -DNDIR Old System V systems (ndir.h).
+# -DMAJOR_IN_MKDEV If major, minor, makedev defined in sys/mkdev.h.
+# -DMAJOR_IN_SYSMACROS If major, minor, makedev defined in sys/sysmacros.h.
+# -DRETSIGTYPE=int If your signal handlers return int, not void.
+# -DHAVE_SYS_MTIO_H If you have sys/mtio.h (magtape ioctls).
+# -DHAVE_SYS_GENTAPE_H If you have sys/gentape.h (ISC magtape ioctls).
+# -DHAVE_NETDB_H To use rexec for remote tape operations
+# instead of forking rsh or remsh.
+# -DNO_REMOTE If you have neither a remote shell nor rexec.
+# -DHAVE_VPRINTF If you have vprintf function.
+# -DHAVE_DOPRNT If you have _doprnt function (but lack vprintf).
+# -DHAVE_FTIME If you have ftime system call.
+# -DHAVE_STRSTR If you have strstr function.
+# -DHAVE_VALLOC If you have valloc function.
+# -DHAVE_MKDIR If you have mkdir and rmdir system calls.
+# -DHAVE_MKNOD If you have mknod system call.
+# -DHAVE_RENAME If you have rename system call.
+# -DHAVE_GETCWD If not POSIX.1 but have getcwd function.
+# -DHAVE_FTRUNCATE If you have ftruncate system call.
+# -DV7 On Version 7 Unix (not tested in a long time).
+# -DEMUL_OPEN3 If you lack a 3-argument version of open, and want
+# to emulate it with system calls you do have.
+# -DNO_OPEN3 If you lack the 3-argument open and want to
+# disable the tar -k option instead of emulating open.
+# -DXENIX If you have sys/inode.h and need it to be included.
+
+DEF_AR_FILE = /dev/rst0
+DEFBLOCKING = 20
+DEFS = -DRETSIGTYPE=void -DDIRENT=1 -DHAVE_SYS_MTIO_H=1 -DHAVE_UNISTD_H=1 -DHAVE_GETGRGID=1 -DHAVE_GETPWUID=1 -DHAVE_STRING_H=1 -DHAVE_LIMITS_H=1 -DHAVE_STRSTR=1 -DHAVE_VALLOC=1 -DHAVE_MKDIR=1 -DHAVE_MKNOD=1 -DHAVE_RENAME=1 -DHAVE_FTRUNCATE=1 -DHAVE_GETCWD=1 -DHAVE_VPRINTF=1 -DDEF_AR_FILE=\"$(DEF_AR_FILE)\" -DDEFBLOCKING=$(DEFBLOCKING)
+
+# Set this to rtapelib.o unless you defined NO_REMOTE, in which case
+# make it empty.
+RTAPELIB = rtapelib.o
+LIBS =
+
+CFLAGS = -g
+LDFLAGS = -g
+
+prefix = /usr/bin
+exec_prefix = $(prefix)
+
+# Prefix for each installed program, normally empty or `g'.
+binprefix =
+
+# The directory to install tar in.
+bindir = $(exec_prefix)/bin
+
+# Where to put the rmt executable.
+libdir = /sbin
+
+# The directory to install the info files in.
+infodir = $(prefix)/info
+
+#### End of system configuration section. ####
+
+SHELL = /bin/sh
+
+SRC1 = tar.c create.c extract.c buffer.c getoldopt.c update.c gnu.c mangle.c
+SRC2 = version.c list.c names.c diffarch.c port.c fnmatch.c getopt.c malloc.c
+SRC3 = getopt1.c regex.c getdate.y getdate.c alloca.c
+SRCS = $(SRC1) $(SRC2) $(SRC3)
+OBJ1 = tar.o create.o extract.o buffer.o getoldopt.o update.o gnu.o mangle.o
+OBJ2 = version.o list.o names.o diffarch.o port.o fnmatch.o getopt.o
+OBJ3 = getopt1.o regex.o getdate.o $(RTAPELIB)
+OBJS = $(OBJ1) $(OBJ2) $(OBJ3)
+AUX = README INSTALL NEWS COPYING ChangeLog Makefile.in makefile.pc \
+ configure configure.in \
+ tar.h fnmatch.h pathmax.h port.h open3.h getopt.h regex.h \
+ rmt.h rmt.c rtapelib.c \
+ msd_dir.h msd_dir.c tcexparg.c \
+ level-0 level-1 backup-specs dump-remind getpagesize.h
+# tar.texinfo tar.info* texinfo.tex \
+
+all: tar rmt
+# tar.info
+
+.c.o:
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) $(DEFS) -I$(srcdir) -I. $<
+
+tar: $(OBJS)
+ $(CC) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
+
+rmt: rmt.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(srcdir)/rmt.c $(LIBS)
+
+tar.info: tar.texinfo
+ makeinfo $(srcdir)/tar.texinfo
+
+install: all
+ $(INSTALL_PROGRAM) tar $(bindir)/$(binprefix)tar
+ -test ! -f rmt || $(INSTALL_PROGRAM) rmt $(libdir)/rmt
+# for file in $(srcdir)/tar.info*; \
+# do $(INSTALL_DATA) $$file $(infodir)/$$file; \
+# done
+
+uninstall:
+ rm -f $(bindir)/$(binprefix)tar $(infodir)/tar.info*
+ -rm -f $(libdir)/rmt
+
+$(OBJS): tar.h pathmax.h port.h
+regex.o buffer.o tar.o: regex.h
+tar.o fnmatch.o: fnmatch.h
+
+getdate.c: getdate.y
+ $(YACC) $(srcdir)/getdate.y
+ mv y.tab.c getdate.c
+# getdate.y has 8 shift/reduce conflicts.
+
+TAGS: $(SRCS)
+ etags $(SRCS)
+
+clean:
+ rm -f *.o tar rmt core
+mostlyclean: clean
+
+distclean: clean
+ rm -f Makefile config.status
+
+realclean: distclean
+ rm -f TAGS *.info* getdate.c y.tab.c
+
+shar: $(SRCS) $(AUX)
+ shar $(SRCS) $(AUX) | gzip > tar-`sed -e '/version_string/!d' -e 's/[^0-9.]*\([0-9.]*\).*/\1/' -e q version.c`.shar.z
+
+dist: $(SRCS) $(AUX)
+ echo tar-`sed -e '/version_string/!d' -e 's/[^0-9.]*\([0-9.]*\).*/\1/' -e q version.c` > .fname
+ -rm -rf `cat .fname`
+ mkdir `cat .fname`
+ for file in $(SRCS) $(AUX); do \
+ ln $$file `cat .fname` || cp $$file `cat .fname`; done
+ tar chzf `cat .fname`.tar.z `cat .fname`
+ -rm -rf `cat .fname` .fname
+
+tar.zoo: $(SRCS) $(AUX)
+ -rm -rf tmp.dir
+ -mkdir tmp.dir
+ -rm tar.zoo
+ for X in $(SRCS) $(AUX) ; do echo $$X ; sed 's/$$/ /' $$X > tmp.dir/$$X ; done
+ cd tmp.dir ; zoo aM ../tar.zoo *
+ -rm -rf tmp.dir
+
+# Prevent GNU make v3 from overflowing arg limit on SysV.
+.NOEXPORT:
diff --git a/gnu/usr.bin/tar/README b/gnu/usr.bin/tar/README
new file mode 100644
index 000000000000..4b577e78eb70
--- /dev/null
+++ b/gnu/usr.bin/tar/README
@@ -0,0 +1,40 @@
+Hey! Emacs! Yo! This is -*- Text -*- !!!
+
+This GNU tar 1.11.2. Please send bug reports, etc., to
+bug-gnu-utils@prep.ai.mit.edu. This is a beta-test release. Please
+try it out. There is no manual; the release of version 1.12 will
+contain a manual.
+
+GNU tar is based heavily on John Gilmore's public domain tar, but with
+added features. The manual is currently being written.
+
+This distribution also includes rmt, the remote tape server (which
+normally must reside in /etc). The mt tape drive control program is
+in the GNU cpio distribution.
+
+See the file INSTALL for compilation and installation instructions for Unix.
+See the file NEWS for information on all that is new in this version
+of tar.
+
+makefile.pc is a makefile for Turbo C 2.0 on MS-DOS.
+
+Various people have been having problems using floppies on a NeXT. In
+order to have them work right, you need to kill the automounting
+program which tries to monut floppies as soon as they are added.
+
+If you want to do incremental dumps, use the distributed backup
+scripts. They are what we use at the FSF to do all our backups. Most
+importantly, do not use --incremental (-G) or --after-date (-N) or
+--newer-mtime to do incremental dumps. The only option that works
+correctly for this purpose is --listed-incremental. (When extracting
+incremental dumps, use --incremental (-G).)
+
+If your system needs to link with -lPW to get alloca, but has
+rename in the C library (so HAVE_RENAME is defined), -lPW might
+give you an incorrect version of rename. On HP-UX this manifests
+itself as an undefined data symbol called "Error" when linking cp, ln,
+and mv. If this happens, use `ar x' to extract alloca.o from libPW.a
+and `ar rc' to put it in a library liballoca.a, and put that in LIBS
+instead of -lPW. This problem does not occur when using gcc, which
+has alloca built in.
+
diff --git a/gnu/usr.bin/tar/buffer.c b/gnu/usr.bin/tar/buffer.c
new file mode 100644
index 000000000000..e0ffc2d28654
--- /dev/null
+++ b/gnu/usr.bin/tar/buffer.c
@@ -0,0 +1,1584 @@
+/* Buffer management for tar.
+ Copyright (C) 1988, 1992, 1993 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ * Buffer management for tar.
+ *
+ * Written by John Gilmore, ihnp4!hoptoad!gnu, on 25 August 1985.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#ifndef STDC_HEADERS
+extern int errno;
+#endif
+#include <sys/types.h> /* For non-Berkeley systems */
+#include <signal.h>
+#include <time.h>
+time_t time ();
+
+#ifdef HAVE_SYS_MTIO_H
+#include <sys/ioctl.h>
+#include <sys/mtio.h>
+#endif
+
+#ifdef BSD42
+#include <sys/file.h>
+#else
+#ifndef V7
+#include <fcntl.h>
+#endif
+#endif
+
+#ifdef __MSDOS__
+#include <process.h>
+#endif
+
+#ifdef XENIX
+#include <sys/inode.h>
+#endif
+
+#include "tar.h"
+#include "port.h"
+#include "rmt.h"
+#include "regex.h"
+
+/* Either stdout or stderr: The thing we write messages (standard msgs, not
+ errors) to. Stdout unless we're writing a pipe, in which case stderr */
+FILE *msg_file = stdout;
+
+#define STDIN 0 /* Standard input file descriptor */
+#define STDOUT 1 /* Standard output file descriptor */
+
+#define PREAD 0 /* Read file descriptor from pipe() */
+#define PWRITE 1 /* Write file descriptor from pipe() */
+
+#define MAGIC_STAT 105 /* Magic status returned by child, if
+ it can't exec. We hope compress/sh
+ never return this status! */
+
+void *valloc ();
+
+void writeerror ();
+void readerror ();
+
+void ck_pipe ();
+void ck_close ();
+
+int backspace_output ();
+extern void finish_header ();
+void flush_archive ();
+int isfile ();
+int new_volume ();
+void verify_volume ();
+extern void to_oct ();
+
+#ifndef __MSDOS__
+/* Obnoxious test to see if dimwit is trying to dump the archive */
+dev_t ar_dev;
+ino_t ar_ino;
+#endif
+
+/*
+ * The record pointed to by save_rec should not be overlaid
+ * when reading in a new tape block. Copy it to record_save_area first, and
+ * change the pointer in *save_rec to point to record_save_area.
+ * Saved_recno records the record number at the time of the save.
+ * This is used by annofile() to print the record number of a file's
+ * header record.
+ */
+static union record **save_rec;
+union record record_save_area;
+static long saved_recno;
+
+/*
+ * PID of child program, if f_compress or remote archive access.
+ */
+static int childpid = 0;
+
+/*
+ * Record number of the start of this block of records
+ */
+long baserec;
+
+/*
+ * Error recovery stuff
+ */
+static int r_error_count;
+
+/*
+ * Have we hit EOF yet?
+ */
+static int hit_eof;
+
+/* Checkpointing counter */
+static int checkpoint;
+
+/* JF we're reading, but we just read the last record and its time to update */
+extern time_to_start_writing;
+int file_to_switch_to = -1; /* If remote update, close archive, and use
+ this descriptor to write to */
+
+static int volno = 1; /* JF which volume of a multi-volume tape
+ we're on */
+static int global_volno = 1; /* Volume number to print in external messages. */
+
+char *save_name = 0; /* Name of the file we are currently writing */
+long save_totsize; /* total size of file we are writing. Only
+ valid if save_name is non_zero */
+long save_sizeleft; /* Where we are in the file we are writing.
+ Only valid if save_name is non-zero */
+
+int write_archive_to_stdout;
+
+/* Used by fl_read and fl_write to store the real info about saved names */
+static char real_s_name[NAMSIZ];
+static long real_s_totsize;
+static long real_s_sizeleft;
+
+/* Reset the EOF flag (if set), and re-set ar_record, etc */
+
+void
+reset_eof ()
+{
+ if (hit_eof)
+ {
+ hit_eof = 0;
+ ar_record = ar_block;
+ ar_last = ar_block + blocking;
+ ar_reading = 0;
+ }
+}
+
+/*
+ * Return the location of the next available input or output record.
+ * Return NULL for EOF. Once we have returned NULL, we just keep returning
+ * it, to avoid accidentally going on to the next file on the "tape".
+ */
+union record *
+findrec ()
+{
+ if (ar_record == ar_last)
+ {
+ if (hit_eof)
+ return (union record *) NULL; /* EOF */
+ flush_archive ();
+ if (ar_record == ar_last)
+ {
+ hit_eof++;
+ return (union record *) NULL; /* EOF */
+ }
+ }
+ return ar_record;
+}
+
+
+/*
+ * Indicate that we have used all records up thru the argument.
+ * (should the arg have an off-by-1? XXX FIXME)
+ */
+void
+userec (rec)
+ union record *rec;
+{
+ while (rec >= ar_record)
+ ar_record++;
+ /*
+ * Do NOT flush the archive here. If we do, the same
+ * argument to userec() could mean the next record (if the
+ * input block is exactly one record long), which is not what
+ * is intended.
+ */
+ if (ar_record > ar_last)
+ abort ();
+}
+
+
+/*
+ * Return a pointer to the end of the current records buffer.
+ * All the space between findrec() and endofrecs() is available
+ * for filling with data, or taking data from.
+ */
+union record *
+endofrecs ()
+{
+ return ar_last;
+}
+
+
+/*
+ * Duplicate a file descriptor into a certain slot.
+ * Equivalent to BSD "dup2" with error reporting.
+ */
+void
+dupto (from, to, msg)
+ int from, to;
+ char *msg;
+{
+ int err;
+
+ if (from != to)
+ {
+ err = close (to);
+ if (err < 0 && errno != EBADF)
+ {
+ msg_perror ("Cannot close descriptor %d", to);
+ exit (EX_SYSTEM);
+ }
+ err = dup (from);
+ if (err != to)
+ {
+ msg_perror ("cannot dup %s", msg);
+ exit (EX_SYSTEM);
+ }
+ ck_close (from);
+ }
+}
+
+#ifdef __MSDOS__
+void
+child_open ()
+{
+ fprintf (stderr, "MS-DOS %s can't use compressed or remote archives\n", tar);
+ exit (EX_ARGSBAD);
+}
+
+#else
+void
+child_open ()
+{
+ int pipe[2];
+ int err = 0;
+
+ int kidpipe[2];
+ int kidchildpid;
+
+#define READ 0
+#define WRITE 1
+
+ ck_pipe (pipe);
+
+ childpid = fork ();
+ if (childpid < 0)
+ {
+ msg_perror ("cannot fork");
+ exit (EX_SYSTEM);
+ }
+ if (childpid > 0)
+ {
+ /* We're the parent. Clean up and be happy */
+ /* This, at least, is easy */
+
+ if (ar_reading)
+ {
+ f_reblock++;
+ archive = pipe[READ];
+ ck_close (pipe[WRITE]);
+ }
+ else
+ {
+ archive = pipe[WRITE];
+ ck_close (pipe[READ]);
+ }
+ return;
+ }
+
+ /* We're the kid */
+ if (ar_reading)
+ {
+ dupto (pipe[WRITE], STDOUT, "(child) pipe to stdout");
+ ck_close (pipe[READ]);
+ }
+ else
+ {
+ dupto (pipe[READ], STDIN, "(child) pipe to stdin");
+ ck_close (pipe[WRITE]);
+ }
+
+ /* We need a child tar only if
+ 1: we're reading/writing stdin/out (to force reblocking)
+ 2: the file is to be accessed by rmt (compress doesn't know how)
+ 3: the file is not a plain file */
+#ifdef NO_REMOTE
+ if (!(ar_files[0][0] == '-' && ar_files[0][1] == '\0') && isfile (ar_files[0]))
+#else
+ if (!(ar_files[0][0] == '-' && ar_files[0][1] == '\0') && !_remdev (ar_files[0]) && isfile (ar_files[0]))
+#endif
+ {
+ /* We don't need a child tar. Open the archive */
+ if (ar_reading)
+ {
+ archive = open (ar_files[0], O_RDONLY | O_BINARY, 0666);
+ if (archive < 0)
+ {
+ msg_perror ("can't open archive %s", ar_files[0]);
+ exit (EX_BADARCH);
+ }
+ dupto (archive, STDIN, "archive to stdin");
+ /* close(archive); */
+ }
+ else
+ {
+ archive = creat (ar_files[0], 0666);
+ if (archive < 0)
+ {
+ msg_perror ("can't open archive %s", ar_files[0]);
+ exit (EX_BADARCH);
+ }
+ dupto (archive, STDOUT, "archive to stdout");
+ /* close(archive); */
+ }
+ }
+ else
+ {
+ /* We need a child tar */
+ ck_pipe (kidpipe);
+
+ kidchildpid = fork ();
+ if (kidchildpid < 0)
+ {
+ msg_perror ("child can't fork");
+ exit (EX_SYSTEM);
+ }
+
+ if (kidchildpid > 0)
+ {
+ /* About to exec compress: set up the files */
+ if (ar_reading)
+ {
+ dupto (kidpipe[READ], STDIN, "((child)) pipe to stdin");
+ ck_close (kidpipe[WRITE]);
+ /* dup2(pipe[WRITE],STDOUT); */
+ }
+ else
+ {
+ /* dup2(pipe[READ],STDIN); */
+ dupto (kidpipe[WRITE], STDOUT, "((child)) pipe to stdout");
+ ck_close (kidpipe[READ]);
+ }
+ /* ck_close(pipe[READ]); */
+ /* ck_close(pipe[WRITE]); */
+ /* ck_close(kidpipe[READ]);
+ ck_close(kidpipe[WRITE]); */
+ }
+ else
+ {
+ /* Grandchild. Do the right thing, namely sit here and
+ read/write the archive, and feed stuff back to compress */
+ tar = "tar (child)";
+ if (ar_reading)
+ {
+ dupto (kidpipe[WRITE], STDOUT, "[child] pipe to stdout");
+ ck_close (kidpipe[READ]);
+ }
+ else
+ {
+ dupto (kidpipe[READ], STDIN, "[child] pipe to stdin");
+ ck_close (kidpipe[WRITE]);
+ }
+
+ if (ar_files[0][0] == '-' && ar_files[0][1] == '\0')
+ {
+ if (ar_reading)
+ archive = STDIN;
+ else
+ archive = STDOUT;
+ }
+ else /* This can't happen if (ar_reading==2)
+ archive = rmtopen(ar_files[0], O_RDWR|O_CREAT|O_BINARY, 0666);
+ else */ if (ar_reading)
+ archive = rmtopen (ar_files[0], O_RDONLY | O_BINARY, 0666);
+ else
+ archive = rmtcreat (ar_files[0], 0666);
+
+ if (archive < 0)
+ {
+ msg_perror ("can't open archive %s", ar_files[0]);
+ exit (EX_BADARCH);
+ }
+
+ if (ar_reading)
+ {
+ for (;;)
+ {
+ char *ptr;
+ int max, count;
+
+ r_error_count = 0;
+ error_loop:
+ err = rmtread (archive, ar_block->charptr, (int) (blocksize));
+ if (err < 0)
+ {
+ readerror ();
+ goto error_loop;
+ }
+ if (err == 0)
+ break;
+ ptr = ar_block->charptr;
+ max = err;
+ while (max)
+ {
+ count = (max < RECORDSIZE) ? max : RECORDSIZE;
+ err = write (STDOUT, ptr, count);
+ if (err != count)
+ {
+ if (err < 0)
+ {
+ msg_perror ("can't write to compression program");
+ exit (EX_SYSTEM);
+ }
+ else
+ msg ("write to compression program short %d bytes",
+ count - err);
+ count = (err < 0) ? 0 : err;
+ }
+ ptr += count;
+ max -= count;
+ }
+ }
+ }
+ else
+ {
+ for (;;)
+ {
+ int n;
+ char *ptr;
+
+ n = blocksize;
+ ptr = ar_block->charptr;
+ while (n)
+ {
+ err = read (STDIN, ptr, (n < RECORDSIZE) ? n : RECORDSIZE);
+ if (err <= 0)
+ break;
+ n -= err;
+ ptr += err;
+ }
+ /* EOF */
+ if (err == 0)
+ {
+ if (!f_compress_block)
+ blocksize -= n;
+ else
+ bzero (ar_block->charptr + blocksize - n, n);
+ err = rmtwrite (archive, ar_block->charptr, blocksize);
+ if (err != (blocksize))
+ writeerror (err);
+ if (!f_compress_block)
+ blocksize += n;
+ break;
+ }
+ if (n)
+ {
+ msg_perror ("can't read from compression program");
+ exit (EX_SYSTEM);
+ }
+ err = rmtwrite (archive, ar_block->charptr, (int) blocksize);
+ if (err != blocksize)
+ writeerror (err);
+ }
+ }
+
+ /* close_archive(); */
+ exit (0);
+ }
+ }
+ /* So we should exec compress (-d) */
+ if (ar_reading)
+ execlp (f_compressprog, f_compressprog, "-d", (char *) 0);
+ else
+ execlp (f_compressprog, f_compressprog, (char *) 0);
+ msg_perror ("can't exec %s", f_compressprog);
+ _exit (EX_SYSTEM);
+}
+
+
+/* return non-zero if p is the name of a directory */
+int
+isfile (p)
+ char *p;
+{
+ struct stat stbuf;
+
+ if (stat (p, &stbuf) < 0)
+ return 1;
+ if (S_ISREG (stbuf.st_mode))
+ return 1;
+ return 0;
+}
+
+#endif
+
+/*
+ * Open an archive file. The argument specifies whether we are
+ * reading or writing.
+ */
+/* JF if the arg is 2, open for reading and writing. */
+void
+open_archive (reading)
+ int reading;
+{
+ msg_file = f_exstdout ? stderr : stdout;
+
+ if (blocksize == 0)
+ {
+ msg ("invalid value for blocksize");
+ exit (EX_ARGSBAD);
+ }
+
+ if (n_ar_files == 0)
+ {
+ msg ("No archive name given, what should I do?");
+ exit (EX_BADARCH);
+ }
+
+ /*NOSTRICT*/
+ if (f_multivol)
+ {
+ ar_block = (union record *) valloc ((unsigned) (blocksize + (2 * RECORDSIZE)));
+ if (ar_block)
+ ar_block += 2;
+ }
+ else
+ ar_block = (union record *) valloc ((unsigned) blocksize);
+ if (!ar_block)
+ {
+ msg ("could not allocate memory for blocking factor %d",
+ blocking);
+ exit (EX_ARGSBAD);
+ }
+
+ ar_record = ar_block;
+ ar_last = ar_block + blocking;
+ ar_reading = reading;
+
+ if (f_multivol && f_verify)
+ {
+ msg ("cannot verify multi-volume archives");
+ exit (EX_ARGSBAD);
+ }
+
+ if (f_compressprog)
+ {
+ if (reading == 2 || f_verify)
+ {
+ msg ("cannot update or verify compressed archives");
+ exit (EX_ARGSBAD);
+ }
+ if (f_multivol)
+ {
+ msg ("cannot use multi-volume compressed archives");
+ exit (EX_ARGSBAD);
+ }
+ child_open ();
+ if (!reading && ar_files[0][0] == '-' && ar_files[0][1] == '\0')
+ msg_file = stderr;
+ /* child_open(rem_host, rem_file); */
+ }
+ else if (ar_files[0][0] == '-' && ar_files[0][1] == '\0')
+ {
+ f_reblock++; /* Could be a pipe, be safe */
+ if (f_verify)
+ {
+ msg ("can't verify stdin/stdout archive");
+ exit (EX_ARGSBAD);
+ }
+ if (reading == 2)
+ {
+ archive = STDIN;
+ msg_file = stderr;
+ write_archive_to_stdout++;
+ }
+ else if (reading)
+ archive = STDIN;
+ else
+ {
+ archive = STDOUT;
+ msg_file = stderr;
+ }
+ }
+ else if (reading == 2 || f_verify)
+ {
+ archive = rmtopen (ar_files[0], O_RDWR | O_CREAT | O_BINARY, 0666);
+ }
+ else if (reading)
+ {
+ archive = rmtopen (ar_files[0], O_RDONLY | O_BINARY, 0666);
+ }
+ else
+ {
+ archive = rmtcreat (ar_files[0], 0666);
+ }
+ if (archive < 0)
+ {
+ msg_perror ("can't open %s", ar_files[0]);
+ exit (EX_BADARCH);
+ }
+#ifndef __MSDOS__
+ if (!_isrmt (archive))
+ {
+ struct stat tmp_stat;
+
+ fstat (archive, &tmp_stat);
+ if (S_ISREG (tmp_stat.st_mode))
+ {
+ ar_dev = tmp_stat.st_dev;
+ ar_ino = tmp_stat.st_ino;
+ }
+ }
+#endif
+
+#ifdef __MSDOS__
+ setmode (archive, O_BINARY);
+#endif
+
+ if (reading)
+ {
+ ar_last = ar_block; /* Set up for 1st block = # 0 */
+ (void) findrec (); /* Read it in, check for EOF */
+
+ if (f_volhdr)
+ {
+ union record *head;
+#if 0
+ char *ptr;
+
+ if (f_multivol)
+ {
+ ptr = malloc (strlen (f_volhdr) + 20);
+ sprintf (ptr, "%s Volume %d", f_volhdr, 1);
+ }
+ else
+ ptr = f_volhdr;
+#endif
+ head = findrec ();
+ if (!head)
+ {
+ msg ("Archive not labelled to match %s", f_volhdr);
+ exit (EX_BADVOL);
+ }
+ if (re_match (label_pattern, head->header.arch_name,
+ strlen (head->header.arch_name), 0, 0) < 0)
+ {
+ msg ("Volume mismatch! %s!=%s", f_volhdr,
+ head->header.arch_name);
+ exit (EX_BADVOL);
+ }
+#if 0
+ if (strcmp (ptr, head->header.name))
+ {
+ msg ("Volume mismatch! %s!=%s", ptr, head->header.name);
+ exit (EX_BADVOL);
+ }
+ if (ptr != f_volhdr)
+ free (ptr);
+#endif
+ }
+ }
+ else if (f_volhdr)
+ {
+ bzero ((void *) ar_block, RECORDSIZE);
+ if (f_multivol)
+ sprintf (ar_block->header.arch_name, "%s Volume 1", f_volhdr);
+ else
+ strcpy (ar_block->header.arch_name, f_volhdr);
+ current_file_name = ar_block->header.arch_name;
+ ar_block->header.linkflag = LF_VOLHDR;
+ to_oct (time (0), 1 + 12, ar_block->header.mtime);
+ finish_header (ar_block);
+ /* ar_record++; */
+ }
+}
+
+
+/*
+ * Remember a union record * as pointing to something that we
+ * need to keep when reading onward in the file. Only one such
+ * thing can be remembered at once, and it only works when reading
+ * an archive.
+ *
+ * We calculate "offset" then add it because some compilers end up
+ * adding (baserec+ar_record), doing a 9-bit shift of baserec, then
+ * subtracting ar_block from that, shifting it back, losing the top 9 bits.
+ */
+void
+saverec (pointer)
+ union record **pointer;
+{
+ long offset;
+
+ save_rec = pointer;
+ offset = ar_record - ar_block;
+ saved_recno = baserec + offset;
+}
+
+/*
+ * Perform a write to flush the buffer.
+ */
+
+/*send_buffer_to_file();
+ if(new_volume) {
+ deal_with_new_volume_stuff();
+ send_buffer_to_file();
+ }
+ */
+
+void
+fl_write ()
+{
+ int err;
+ int copy_back;
+ static long bytes_written = 0;
+
+ if (f_checkpoint && !(++checkpoint % 10))
+ msg ("Write checkpoint %d\n", checkpoint);
+ if (tape_length && bytes_written >= tape_length * 1024)
+ {
+ errno = ENOSPC;
+ err = 0;
+ }
+ else
+ err = rmtwrite (archive, ar_block->charptr, (int) blocksize);
+ if (err != blocksize && !f_multivol)
+ writeerror (err);
+ else if (f_totals)
+ tot_written += blocksize;
+
+ if (err > 0)
+ bytes_written += err;
+ if (err == blocksize)
+ {
+ if (f_multivol)
+ {
+ if (!save_name)
+ {
+ real_s_name[0] = '\0';
+ real_s_totsize = 0;
+ real_s_sizeleft = 0;
+ return;
+ }
+#ifdef __MSDOS__
+ if (save_name[1] == ':')
+ save_name += 2;
+#endif
+ while (*save_name == '/')
+ save_name++;
+
+ strcpy (real_s_name, save_name);
+ real_s_totsize = save_totsize;
+ real_s_sizeleft = save_sizeleft;
+ }
+ return;
+ }
+
+ /* We're multivol Panic if we didn't get the right kind of response */
+ /* ENXIO is for the UNIX PC */
+ if (err < 0 && errno != ENOSPC && errno != EIO && errno != ENXIO)
+ writeerror (err);
+
+ /* If error indicates a short write, we just move to the next tape. */
+
+ if (new_volume (0) < 0)
+ return;
+ bytes_written = 0;
+ if (f_volhdr && real_s_name[0])
+ {
+ copy_back = 2;
+ ar_block -= 2;
+ }
+ else if (f_volhdr || real_s_name[0])
+ {
+ copy_back = 1;
+ ar_block--;
+ }
+ else
+ copy_back = 0;
+ if (f_volhdr)
+ {
+ bzero ((void *) ar_block, RECORDSIZE);
+ sprintf (ar_block->header.arch_name, "%s Volume %d", f_volhdr, volno);
+ to_oct (time (0), 1 + 12, ar_block->header.mtime);
+ ar_block->header.linkflag = LF_VOLHDR;
+ finish_header (ar_block);
+ }
+ if (real_s_name[0])
+ {
+ int tmp;
+
+ if (f_volhdr)
+ ar_block++;
+ bzero ((void *) ar_block, RECORDSIZE);
+ strcpy (ar_block->header.arch_name, real_s_name);
+ ar_block->header.linkflag = LF_MULTIVOL;
+ to_oct ((long) real_s_sizeleft, 1 + 12,
+ ar_block->header.size);
+ to_oct ((long) real_s_totsize - real_s_sizeleft,
+ 1 + 12, ar_block->header.offset);
+ tmp = f_verbose;
+ f_verbose = 0;
+ finish_header (ar_block);
+ f_verbose = tmp;
+ if (f_volhdr)
+ ar_block--;
+ }
+
+ err = rmtwrite (archive, ar_block->charptr, (int) blocksize);
+ if (err != blocksize)
+ writeerror (err);
+ else if (f_totals)
+ tot_written += blocksize;
+
+
+ bytes_written = blocksize;
+ if (copy_back)
+ {
+ ar_block += copy_back;
+ bcopy ((void *) (ar_block + blocking - copy_back),
+ (void *) ar_record,
+ copy_back * RECORDSIZE);
+ ar_record += copy_back;
+
+ if (real_s_sizeleft >= copy_back * RECORDSIZE)
+ real_s_sizeleft -= copy_back * RECORDSIZE;
+ else if ((real_s_sizeleft + RECORDSIZE - 1) / RECORDSIZE <= copy_back)
+ real_s_name[0] = '\0';
+ else
+ {
+#ifdef __MSDOS__
+ if (save_name[1] == ':')
+ save_name += 2;
+#endif
+ while (*save_name == '/')
+ save_name++;
+
+ strcpy (real_s_name, save_name);
+ real_s_sizeleft = save_sizeleft;
+ real_s_totsize = save_totsize;
+ }
+ copy_back = 0;
+ }
+}
+
+/* Handle write errors on the archive. Write errors are always fatal */
+/* Hitting the end of a volume does not cause a write error unless the write
+* was the first block of the volume */
+
+void
+writeerror (err)
+ int err;
+{
+ if (err < 0)
+ {
+ msg_perror ("can't write to %s", ar_files[cur_ar_file]);
+ exit (EX_BADARCH);
+ }
+ else
+ {
+ msg ("only wrote %u of %u bytes to %s", err, blocksize, ar_files[cur_ar_file]);
+ exit (EX_BADARCH);
+ }
+}
+
+/*
+ * Handle read errors on the archive.
+ *
+ * If the read should be retried, readerror() returns to the caller.
+ */
+void
+readerror ()
+{
+# define READ_ERROR_MAX 10
+
+ read_error_flag++; /* Tell callers */
+
+ msg_perror ("read error on %s", ar_files[cur_ar_file]);
+
+ if (baserec == 0)
+ {
+ /* First block of tape. Probably stupidity error */
+ exit (EX_BADARCH);
+ }
+
+ /*
+ * Read error in mid archive. We retry up to READ_ERROR_MAX times
+ * and then give up on reading the archive. We set read_error_flag
+ * for our callers, so they can cope if they want.
+ */
+ if (r_error_count++ > READ_ERROR_MAX)
+ {
+ msg ("Too many errors, quitting.");
+ exit (EX_BADARCH);
+ }
+ return;
+}
+
+
+/*
+ * Perform a read to flush the buffer.
+ */
+void
+fl_read ()
+{
+ int err; /* Result from system call */
+ int left; /* Bytes left */
+ char *more; /* Pointer to next byte to read */
+
+ if (f_checkpoint && !(++checkpoint % 10))
+ msg ("Read checkpoint %d\n", checkpoint);
+
+ /*
+ * Clear the count of errors. This only applies to a single
+ * call to fl_read. We leave read_error_flag alone; it is
+ * only turned off by higher level software.
+ */
+ r_error_count = 0; /* Clear error count */
+
+ /*
+ * If we are about to wipe out a record that
+ * somebody needs to keep, copy it out to a holding
+ * area and adjust somebody's pointer to it.
+ */
+ if (save_rec &&
+ *save_rec >= ar_record &&
+ *save_rec < ar_last)
+ {
+ record_save_area = **save_rec;
+ *save_rec = &record_save_area;
+ }
+ if (write_archive_to_stdout && baserec != 0)
+ {
+ err = rmtwrite (1, ar_block->charptr, blocksize);
+ if (err != blocksize)
+ writeerror (err);
+ }
+ if (f_multivol)
+ {
+ if (save_name)
+ {
+ if (save_name != real_s_name)
+ {
+#ifdef __MSDOS__
+ if (save_name[1] == ':')
+ save_name += 2;
+#endif
+ while (*save_name == '/')
+ save_name++;
+
+ strcpy (real_s_name, save_name);
+ save_name = real_s_name;
+ }
+ real_s_totsize = save_totsize;
+ real_s_sizeleft = save_sizeleft;
+
+ }
+ else
+ {
+ real_s_name[0] = '\0';
+ real_s_totsize = 0;
+ real_s_sizeleft = 0;
+ }
+ }
+
+error_loop:
+ err = rmtread (archive, ar_block->charptr, (int) blocksize);
+ if (err == blocksize)
+ return;
+
+ if ((err == 0 || (err < 0 && errno == ENOSPC) || (err > 0 && !f_reblock)) && f_multivol)
+ {
+ union record *head;
+
+ try_volume:
+ if (new_volume ((cmd_mode == CMD_APPEND || cmd_mode == CMD_CAT || cmd_mode == CMD_UPDATE) ? 2 : 1) < 0)
+ return;
+ vol_error:
+ err = rmtread (archive, ar_block->charptr, (int) blocksize);
+ if (err < 0)
+ {
+ readerror ();
+ goto vol_error;
+ }
+ if (err != blocksize)
+ goto short_read;
+
+ head = ar_block;
+
+ if (head->header.linkflag == LF_VOLHDR)
+ {
+ if (f_volhdr)
+ {
+#if 0
+ char *ptr;
+
+ ptr = (char *) malloc (strlen (f_volhdr) + 20);
+ sprintf (ptr, "%s Volume %d", f_volhdr, volno);
+#endif
+ if (re_match (label_pattern, head->header.arch_name,
+ strlen (head->header.arch_name),
+ 0, 0) < 0)
+ {
+ msg ("Volume mismatch! %s!=%s", f_volhdr,
+ head->header.arch_name);
+ --volno;
+ --global_volno;
+ goto try_volume;
+ }
+
+#if 0
+ if (strcmp (ptr, head->header.name))
+ {
+ msg ("Volume mismatch! %s!=%s", ptr, head->header.name);
+ --volno;
+ --global_volno;
+ free (ptr);
+ goto try_volume;
+ }
+ free (ptr);
+#endif
+ }
+ if (f_verbose)
+ fprintf (msg_file, "Reading %s\n", head->header.arch_name);
+ head++;
+ }
+ else if (f_volhdr)
+ {
+ msg ("Warning: No volume header!");
+ }
+
+ if (real_s_name[0])
+ {
+ long from_oct ();
+
+ if (head->header.linkflag != LF_MULTIVOL || strcmp (head->header.arch_name, real_s_name))
+ {
+ msg ("%s is not continued on this volume!", real_s_name);
+ --volno;
+ --global_volno;
+ goto try_volume;
+ }
+ if (real_s_totsize != from_oct (1 + 12, head->header.size) + from_oct (1 + 12, head->header.offset))
+ {
+ msg ("%s is the wrong size (%ld!=%ld+%ld)",
+ head->header.arch_name, save_totsize,
+ from_oct (1 + 12, head->header.size),
+ from_oct (1 + 12, head->header.offset));
+ --volno;
+ --global_volno;
+ goto try_volume;
+ }
+ if (real_s_totsize - real_s_sizeleft != from_oct (1 + 12, head->header.offset))
+ {
+ msg ("This volume is out of sequence");
+ --volno;
+ --global_volno;
+ goto try_volume;
+ }
+ head++;
+ }
+ ar_record = head;
+ return;
+ }
+ else if (err < 0)
+ {
+ readerror ();
+ goto error_loop; /* Try again */
+ }
+
+short_read:
+ more = ar_block->charptr + err;
+ left = blocksize - err;
+
+again:
+ if (0 == (((unsigned) left) % RECORDSIZE))
+ {
+ /* FIXME, for size=0, multi vol support */
+ /* On the first block, warn about the problem */
+ if (!f_reblock && baserec == 0 && f_verbose && err > 0)
+ {
+ /* msg("Blocksize = %d record%s",
+ err / RECORDSIZE, (err > RECORDSIZE)? "s": "");*/
+ msg ("Blocksize = %d records", err / RECORDSIZE);
+ }
+ ar_last = ar_block + ((unsigned) (blocksize - left)) / RECORDSIZE;
+ return;
+ }
+ if (f_reblock)
+ {
+ /*
+ * User warned us about this. Fix up.
+ */
+ if (left > 0)
+ {
+ error2loop:
+ err = rmtread (archive, more, (int) left);
+ if (err < 0)
+ {
+ readerror ();
+ goto error2loop; /* Try again */
+ }
+ if (err == 0)
+ {
+ msg ("archive %s EOF not on block boundary", ar_files[cur_ar_file]);
+ exit (EX_BADARCH);
+ }
+ left -= err;
+ more += err;
+ goto again;
+ }
+ }
+ else
+ {
+ msg ("only read %d bytes from archive %s", err, ar_files[cur_ar_file]);
+ exit (EX_BADARCH);
+ }
+}
+
+
+/*
+ * Flush the current buffer to/from the archive.
+ */
+void
+flush_archive ()
+{
+ int c;
+
+ baserec += ar_last - ar_block;/* Keep track of block #s */
+ ar_record = ar_block; /* Restore pointer to start */
+ ar_last = ar_block + blocking;/* Restore pointer to end */
+
+ if (ar_reading)
+ {
+ if (time_to_start_writing)
+ {
+ time_to_start_writing = 0;
+ ar_reading = 0;
+
+ if (file_to_switch_to >= 0)
+ {
+ if ((c = rmtclose (archive)) < 0)
+ msg_perror ("Warning: can't close %s(%d,%d)", ar_files[cur_ar_file], archive, c);
+
+ archive = file_to_switch_to;
+ }
+ else
+ (void) backspace_output ();
+ fl_write ();
+ }
+ else
+ fl_read ();
+ }
+ else
+ {
+ fl_write ();
+ }
+}
+
+/* Backspace the archive descriptor by one blocks worth.
+ If its a tape, MTIOCTOP will work. If its something else,
+ we try to seek on it. If we can't seek, we lose! */
+int
+backspace_output ()
+{
+ long cur;
+ /* int er; */
+ extern char *output_start;
+
+#ifdef MTIOCTOP
+ struct mtop t;
+
+ t.mt_op = MTBSR;
+ t.mt_count = 1;
+ if ((rmtioctl (archive, MTIOCTOP, &t)) >= 0)
+ return 1;
+ if (errno == EIO && (rmtioctl (archive, MTIOCTOP, &t)) >= 0)
+ return 1;
+#endif
+
+ cur = rmtlseek (archive, 0L, 1);
+ cur -= blocksize;
+ /* Seek back to the beginning of this block and
+ start writing there. */
+
+ if (rmtlseek (archive, cur, 0) != cur)
+ {
+ /* Lseek failed. Try a different method */
+ msg ("Couldn't backspace archive file. It may be unreadable without -i.");
+ /* Replace the first part of the block with nulls */
+ if (ar_block->charptr != output_start)
+ bzero (ar_block->charptr, output_start - ar_block->charptr);
+ return 2;
+ }
+ return 3;
+}
+
+
+/*
+ * Close the archive file.
+ */
+void
+close_archive ()
+{
+ int child;
+ int status;
+ int c;
+
+ if (time_to_start_writing || !ar_reading)
+ flush_archive ();
+ if (cmd_mode == CMD_DELETE)
+ {
+ off_t pos;
+
+ pos = rmtlseek (archive, 0L, 1);
+#ifndef __MSDOS__
+ (void) ftruncate (archive, pos);
+#else
+ (void) rmtwrite (archive, "", 0);
+#endif
+ }
+ if (f_verify)
+ verify_volume ();
+
+ if ((c = rmtclose (archive)) < 0)
+ msg_perror ("Warning: can't close %s(%d,%d)", ar_files[cur_ar_file], archive, c);
+
+#ifndef __MSDOS__
+ if (childpid)
+ {
+ /*
+ * Loop waiting for the right child to die, or for
+ * no more kids.
+ */
+ while (((child = wait (&status)) != childpid) && child != -1)
+ ;
+
+ if (child != -1)
+ {
+ if (WIFSIGNALED (status))
+ {
+ /* SIGPIPE is OK, everything else is a problem. */
+ if (WTERMSIG (status) != SIGPIPE)
+ msg ("child died with signal %d%s", WTERMSIG (status),
+ WIFCOREDUMPED (status) ? " (core dumped)" : "");
+ }
+ else
+ {
+ /* Child voluntarily terminated -- but why? */
+ if (WEXITSTATUS (status) == MAGIC_STAT)
+ {
+ exit (EX_SYSTEM); /* Child had trouble */
+ }
+ if (WEXITSTATUS (status) == (SIGPIPE + 128))
+ {
+ /*
+ * /bin/sh returns this if its child
+ * dies with SIGPIPE. 'Sok.
+ */
+ /* Do nothing. */
+ }
+ else if (WEXITSTATUS (status))
+ msg ("child returned status %d",
+ WEXITSTATUS (status));
+ }
+ }
+ }
+#endif /* __MSDOS__ */
+}
+
+
+#ifdef DONTDEF
+/*
+ * Message management.
+ *
+ * anno writes a message prefix on stream (eg stdout, stderr).
+ *
+ * The specified prefix is normally output followed by a colon and a space.
+ * However, if other command line options are set, more output can come
+ * out, such as the record # within the archive.
+ *
+ * If the specified prefix is NULL, no output is produced unless the
+ * command line option(s) are set.
+ *
+ * If the third argument is 1, the "saved" record # is used; if 0, the
+ * "current" record # is used.
+ */
+void
+anno (stream, prefix, savedp)
+ FILE *stream;
+ char *prefix;
+ int savedp;
+{
+# define MAXANNO 50
+ char buffer[MAXANNO]; /* Holds annorecment */
+# define ANNOWIDTH 13
+ int space;
+ long offset;
+ int save_e;
+
+ save_e = errno;
+ /* Make sure previous output gets out in sequence */
+ if (stream == stderr)
+ fflush (stdout);
+ if (f_sayblock)
+ {
+ if (prefix)
+ {
+ fputs (prefix, stream);
+ putc (' ', stream);
+ }
+ offset = ar_record - ar_block;
+ (void) sprintf (buffer, "rec %d: ",
+ savedp ? saved_recno :
+ baserec + offset);
+ fputs (buffer, stream);
+ space = ANNOWIDTH - strlen (buffer);
+ if (space > 0)
+ {
+ fprintf (stream, "%*s", space, "");
+ }
+ }
+ else if (prefix)
+ {
+ fputs (prefix, stream);
+ fputs (": ", stream);
+ }
+ errno = save_e;
+}
+
+#endif
+
+/* Called to initialize the global volume number. */
+void
+init_volume_number ()
+{
+ FILE *vf;
+
+ vf = fopen (f_volno_file, "r");
+ if (!vf && errno != ENOENT)
+ msg_perror ("%s", f_volno_file);
+
+ if (vf)
+ {
+ fscanf (vf, "%d", &global_volno);
+ fclose (vf);
+ }
+}
+
+/* Called to write out the closing global volume number. */
+void
+closeout_volume_number ()
+{
+ FILE *vf;
+
+ vf = fopen (f_volno_file, "w");
+ if (!vf)
+ msg_perror ("%s", f_volno_file);
+ else
+ {
+ fprintf (vf, "%d\n", global_volno);
+ fclose (vf);
+ }
+}
+
+/* We've hit the end of the old volume. Close it and open the next one */
+/* Values for type: 0: writing 1: reading 2: updating */
+int
+new_volume (type)
+ int type;
+{
+ int c;
+ char inbuf[80];
+ char *p;
+ static FILE *read_file = 0;
+ extern int now_verifying;
+ extern char TTY_NAME[];
+ static int looped = 0;
+
+ if (!read_file && !f_run_script_at_end)
+ read_file = (archive == 0) ? fopen (TTY_NAME, "r") : stdin;
+
+ if (now_verifying)
+ return -1;
+ if (f_verify)
+ verify_volume ();
+ if ((c = rmtclose (archive)) < 0)
+ msg_perror ("Warning: can't close %s(%d,%d)", ar_files[cur_ar_file], archive, c);
+
+ global_volno++;
+ volno++;
+ cur_ar_file++;
+ if (cur_ar_file == n_ar_files)
+ {
+ cur_ar_file = 0;
+ looped = 1;
+ }
+
+tryagain:
+ if (looped)
+ {
+ /* We have to prompt from now on. */
+ if (f_run_script_at_end)
+ {
+ closeout_volume_number ();
+ system (info_script);
+ }
+ else
+ for (;;)
+ {
+ fprintf (msg_file, "\007Prepare volume #%d for %s and hit return: ", global_volno, ar_files[cur_ar_file]);
+ fflush (msg_file);
+ if (fgets (inbuf, sizeof (inbuf), read_file) == 0)
+ {
+ fprintf (msg_file, "EOF? What does that mean?");
+ if (cmd_mode != CMD_EXTRACT && cmd_mode != CMD_LIST && cmd_mode != CMD_DIFF)
+ msg ("Warning: Archive is INCOMPLETE!");
+ exit (EX_BADARCH);
+ }
+ if (inbuf[0] == '\n' || inbuf[0] == 'y' || inbuf[0] == 'Y')
+ break;
+
+ switch (inbuf[0])
+ {
+ case '?':
+ {
+ fprintf (msg_file, "\
+ n [name] Give a new filename for the next (and subsequent) volume(s)\n\
+ q Abort tar\n\
+ ! Spawn a subshell\n\
+ ? Print this list\n");
+ }
+ break;
+
+ case 'q': /* Quit */
+ fprintf (msg_file, "No new volume; exiting.\n");
+ if (cmd_mode != CMD_EXTRACT && cmd_mode != CMD_LIST && cmd_mode != CMD_DIFF)
+ msg ("Warning: Archive is INCOMPLETE!");
+ exit (EX_BADARCH);
+
+ case 'n': /* Get new file name */
+ {
+ char *q, *r;
+ static char *old_name;
+
+ for (q = &inbuf[1]; *q == ' ' || *q == '\t'; q++)
+ ;
+ for (r = q; *r; r++)
+ if (*r == '\n')
+ *r = '\0';
+ old_name = p = (char *) malloc ((unsigned) (strlen (q) + 2));
+ if (p == 0)
+ {
+ msg ("Can't allocate memory for name");
+ exit (EX_SYSTEM);
+ }
+ (void) strcpy (p, q);
+ ar_files[cur_ar_file] = p;
+ }
+ break;
+
+ case '!':
+#ifdef __MSDOS__
+ spawnl (P_WAIT, getenv ("COMSPEC"), "-", 0);
+#else
+ /* JF this needs work! */
+ switch (fork ())
+ {
+ case -1:
+ msg_perror ("can't fork!");
+ break;
+ case 0:
+ p = getenv ("SHELL");
+ if (p == 0)
+ p = "/bin/sh";
+ execlp (p, "-sh", "-i", 0);
+ msg_perror ("can't exec a shell %s", p);
+ _exit (55);
+ default:
+ wait (0);
+ break;
+ }
+#endif
+ break;
+ }
+ }
+ }
+
+
+ if (type == 2 || f_verify)
+ archive = rmtopen (ar_files[cur_ar_file], O_RDWR | O_CREAT, 0666);
+ else if (type == 1)
+ archive = rmtopen (ar_files[cur_ar_file], O_RDONLY, 0666);
+ else if (type == 0)
+ archive = rmtcreat (ar_files[cur_ar_file], 0666);
+ else
+ archive = -1;
+
+ if (archive < 0)
+ {
+ msg_perror ("can't open %s", ar_files[cur_ar_file]);
+ goto tryagain;
+ }
+#ifdef __MSDOS__
+ setmode (archive, O_BINARY);
+#endif
+ return 0;
+}
+
+/* this is a useless function that takes a buffer returned by wantbytes
+ and does nothing with it. If the function called by wantbytes returns
+ an error indicator (non-zero), this function is called for the rest of
+ the file.
+ */
+int
+no_op (size, data)
+ int size;
+ char *data;
+{
+ return 0;
+}
+
+/* Some other routine wants SIZE bytes in the archive. For each chunk of
+ the archive, call FUNC with the size of the chunk, and the address of
+ the chunk it can work with.
+ */
+int
+wantbytes (size, func)
+ long size;
+ int (*func) ();
+{
+ char *data;
+ long data_size;
+
+ while (size)
+ {
+ data = findrec ()->charptr;
+ if (data == NULL)
+ { /* Check it... */
+ msg ("Unexpected EOF on archive file");
+ return -1;
+ }
+ data_size = endofrecs ()->charptr - data;
+ if (data_size > size)
+ data_size = size;
+ if ((*func) (data_size, data))
+ func = no_op;
+ userec ((union record *) (data + data_size - 1));
+ size -= data_size;
+ }
+ return 0;
+}
diff --git a/gnu/usr.bin/tar/create.c b/gnu/usr.bin/tar/create.c
new file mode 100644
index 000000000000..62b9c51178f4
--- /dev/null
+++ b/gnu/usr.bin/tar/create.c
@@ -0,0 +1,1454 @@
+/* Create a tar archive.
+ Copyright (C) 1985, 1992, 1993 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ * Create a tar archive.
+ *
+ * Written 25 Aug 1985 by John Gilmore, ihnp4!hoptoad!gnu.
+ */
+
+#ifdef _AIX
+ #pragma alloca
+#endif
+#include <sys/types.h>
+#include <stdio.h>
+#include <errno.h>
+#ifndef STDC_HEADERS
+extern int errno;
+#endif
+
+#ifdef BSD42
+#include <sys/file.h>
+#else
+#ifndef V7
+#include <fcntl.h>
+#endif
+#endif
+
+#include "tar.h"
+#include "port.h"
+
+#ifndef __MSDOS__
+#include <pwd.h>
+#include <grp.h>
+#endif
+
+#if defined (_POSIX_VERSION)
+#include <utime.h>
+#else
+struct utimbuf
+{
+ long actime;
+ long modtime;
+};
+
+#endif
+
+extern struct stat hstat; /* Stat struct corresponding */
+
+#ifndef __MSDOS__
+extern dev_t ar_dev;
+extern ino_t ar_ino;
+#endif
+
+/* JF */
+extern struct name *gnu_list_name;
+
+/*
+ * If there are no symbolic links, there is no lstat(). Use stat().
+ */
+#ifndef S_ISLNK
+#define lstat stat
+#endif
+
+extern void print_header ();
+
+union record *start_header ();
+void blank_name_list ();
+int check_exclude ();
+PTR ck_malloc ();
+PTR ck_realloc ();
+void clear_buffer ();
+void close_archive ();
+void collect_and_sort_names ();
+int confirm ();
+int deal_with_sparse ();
+void find_new_file_size ();
+void finish_header ();
+int finish_sparse_file ();
+void finduname ();
+void findgname ();
+int is_dot_or_dotdot ();
+void open_archive ();
+char *name_next ();
+void name_close ();
+void to_oct ();
+void dump_file ();
+void write_dir_file ();
+void write_eot ();
+void write_long ();
+int zero_record ();
+
+/* This code moved from tar.h since create.c is the only file that cares
+ about 'struct link's. This means that other files might not have to
+ include sys/types.h any more. */
+
+struct link
+ {
+ struct link *next;
+ dev_t dev;
+ ino_t ino;
+ short linkcount;
+ char name[1];
+ };
+
+struct link *linklist; /* Points to first link in list */
+
+static nolinks; /* Gets set if we run out of RAM */
+
+/*
+ * "Scratch" space to store the information about a sparse file before
+ * writing the info into the header or extended header
+ */
+/* struct sp_array *sparsearray;*/
+
+/* number of elts storable in the sparsearray */
+/*int sparse_array_size = 10;*/
+
+void
+create_archive ()
+{
+ register char *p;
+ char *name_from_list ();
+
+ open_archive (0); /* Open for writing */
+
+ if (f_gnudump)
+ {
+ char *buf = ck_malloc (PATH_MAX);
+ char *q, *bufp;
+
+ collect_and_sort_names ();
+
+ while (p = name_from_list ())
+ dump_file (p, -1, 1);
+ /* if(!f_dironly) { */
+ blank_name_list ();
+ while (p = name_from_list ())
+ {
+ strcpy (buf, p);
+ if (p[strlen (p) - 1] != '/')
+ strcat (buf, "/");
+ bufp = buf + strlen (buf);
+ for (q = gnu_list_name->dir_contents; q && *q; q += strlen (q) + 1)
+ {
+ if (*q == 'Y')
+ {
+ strcpy (bufp, q + 1);
+ dump_file (buf, -1, 1);
+ }
+ }
+ }
+ /* } */
+ free (buf);
+ }
+ else
+ {
+ while (p = name_next (1))
+ dump_file (p, -1, 1);
+ }
+
+ write_eot ();
+ close_archive ();
+ if (f_gnudump)
+ write_dir_file ();
+ name_close ();
+}
+
+/*
+ * Dump a single file. If it's a directory, recurse.
+ * Result is 1 for success, 0 for failure.
+ * Sets global "hstat" to stat() output for this file.
+ */
+void
+dump_file (p, curdev, toplevel)
+ char *p; /* File name to dump */
+ int curdev; /* Device our parent dir was on */
+ int toplevel; /* Whether we are a toplevel call */
+{
+ union record *header;
+ char type;
+ extern char *save_name; /* JF for multi-volume support */
+ extern long save_totsize;
+ extern long save_sizeleft;
+ union record *exhdr;
+ char save_linkflag;
+ extern time_t new_time;
+ int critical_error = 0;
+ struct utimbuf restore_times;
+ /* int sparse_ind = 0;*/
+
+
+ if (f_confirm && !confirm ("add", p))
+ return;
+
+ /*
+ * Use stat if following (rather than dumping) 4.2BSD's
+ * symbolic links. Otherwise, use lstat (which, on non-4.2
+ * systems, is #define'd to stat anyway.
+ */
+#ifdef STX_HIDDEN /* AIX */
+ if (0 != f_follow_links ?
+ statx (p, &hstat, STATSIZE, STX_HIDDEN) :
+ statx (p, &hstat, STATSIZE, STX_HIDDEN | STX_LINK))
+#else
+ if (0 != f_follow_links ? stat (p, &hstat) : lstat (p, &hstat))
+#endif
+ {
+ badperror:
+ msg_perror ("can't add file %s", p);
+ badfile:
+ if (!f_ignore_failed_read || critical_error)
+ errors++;
+ return;
+ }
+
+ restore_times.actime = hstat.st_atime;
+ restore_times.modtime = hstat.st_mtime;
+
+#ifdef S_ISHIDDEN
+ if (S_ISHIDDEN (hstat.st_mode))
+ {
+ char *new = (char *) alloca (strlen (p) + 2);
+ if (new)
+ {
+ strcpy (new, p);
+ strcat (new, "@");
+ p = new;
+ }
+ }
+#endif
+
+ /* See if we only want new files, and check if this one is too old to
+ put in the archive. */
+ if (f_new_files
+ && !f_gnudump
+ && new_time > hstat.st_mtime
+ && !S_ISDIR (hstat.st_mode)
+ && (f_new_files > 1 || new_time > hstat.st_ctime))
+ {
+ if (curdev == -1)
+ {
+ msg ("%s: is unchanged; not dumped", p);
+ }
+ return;
+ }
+
+#ifndef __MSDOS__
+ /* See if we are trying to dump the archive */
+ if (ar_dev && hstat.st_dev == ar_dev && hstat.st_ino == ar_ino)
+ {
+ msg ("%s is the archive; not dumped", p);
+ return;
+ }
+#endif
+ /*
+ * Check for multiple links.
+ *
+ * We maintain a list of all such files that we've written so
+ * far. Any time we see another, we check the list and
+ * avoid dumping the data again if we've done it once already.
+ */
+ if (hstat.st_nlink > 1
+ && (S_ISREG (hstat.st_mode)
+#ifdef S_ISCTG
+ || S_ISCTG (hstat.st_mode)
+#endif
+#ifdef S_ISCHR
+ || S_ISCHR (hstat.st_mode)
+#endif
+#ifdef S_ISBLK
+ || S_ISBLK (hstat.st_mode)
+#endif
+#ifdef S_ISFIFO
+ || S_ISFIFO (hstat.st_mode)
+#endif
+ ))
+ {
+ register struct link *lp;
+
+ /* First quick and dirty. Hashing, etc later FIXME */
+ for (lp = linklist; lp; lp = lp->next)
+ {
+ if (lp->ino == hstat.st_ino &&
+ lp->dev == hstat.st_dev)
+ {
+ char *link_name = lp->name;
+
+ /* We found a link. */
+ while (!f_absolute_paths && *link_name == '/')
+ {
+ static int link_warn = 0;
+
+ if (!link_warn)
+ {
+ msg ("Removing leading / from absolute links");
+ link_warn++;
+ }
+ link_name++;
+ }
+ if (link_name - lp->name >= NAMSIZ)
+ write_long (link_name, LF_LONGLINK);
+ current_link_name = link_name;
+
+ hstat.st_size = 0;
+ header = start_header (p, &hstat);
+ if (header == NULL)
+ {
+ critical_error = 1;
+ goto badfile;
+ }
+ strncpy (header->header.arch_linkname,
+ link_name, NAMSIZ);
+
+ /* Force null truncated */
+ header->header.arch_linkname[NAMSIZ - 1] = 0;
+
+ header->header.linkflag = LF_LINK;
+ finish_header (header);
+ /* FIXME: Maybe remove from list after all links found? */
+ if (f_remove_files)
+ {
+ if (unlink (p) == -1)
+ msg_perror ("cannot remove %s", p);
+ }
+ return; /* We dumped it */
+ }
+ }
+
+ /* Not found. Add it to the list of possible links. */
+ lp = (struct link *) ck_malloc ((unsigned) (sizeof (struct link) + strlen (p)));
+ if (!lp)
+ {
+ if (!nolinks)
+ {
+ msg (
+ "no memory for links, they will be dumped as separate files");
+ nolinks++;
+ }
+ }
+ lp->ino = hstat.st_ino;
+ lp->dev = hstat.st_dev;
+ strcpy (lp->name, p);
+ lp->next = linklist;
+ linklist = lp;
+ }
+
+ /*
+ * This is not a link to a previously dumped file, so dump it.
+ */
+ if (S_ISREG (hstat.st_mode)
+#ifdef S_ISCTG
+ || S_ISCTG (hstat.st_mode)
+#endif
+ )
+ {
+ int f; /* File descriptor */
+ long bufsize, count;
+ long sizeleft;
+ register union record *start;
+ int header_moved;
+ char isextended = 0;
+ int upperbound;
+ /* int end_nulls = 0; */
+
+ header_moved = 0;
+
+#ifdef BSD42
+ if (f_sparse_files)
+ {
+ /*
+ * JK - This is the test for sparseness: whether the
+ * "size" of the file matches the number of blocks
+ * allocated for it. If there is a smaller number
+ * of blocks that would be necessary to accommodate
+ * a file of this size, we have a sparse file, i.e.,
+ * at least one of those records in the file is just
+ * a useless hole.
+ */
+#ifdef hpux /* Nice of HPUX to gratuitiously change it, huh? - mib */
+ if (hstat.st_size - (hstat.st_blocks * 1024) > 1024)
+#else
+ if (hstat.st_size - (hstat.st_blocks * RECORDSIZE) > RECORDSIZE)
+#endif
+ {
+ int filesize = hstat.st_size;
+ register int i;
+
+ header = start_header (p, &hstat);
+ if (header == NULL)
+ {
+ critical_error = 1;
+ goto badfile;
+ }
+ header->header.linkflag = LF_SPARSE;
+ header_moved++;
+
+ /*
+ * Call the routine that figures out the
+ * layout of the sparse file in question.
+ * UPPERBOUND is the index of the last
+ * element of the "sparsearray," i.e.,
+ * the number of elements it needed to
+ * describe the file.
+ */
+
+ upperbound = deal_with_sparse (p, header);
+
+ /*
+ * See if we'll need an extended header
+ * later
+ */
+ if (upperbound > SPARSE_IN_HDR - 1)
+ header->header.isextended++;
+ /*
+ * We store the "real" file size so
+ * we can show that in case someone wants
+ * to list the archive, i.e., tar tvf <file>.
+ * It might be kind of disconcerting if the
+ * shrunken file size was the one that showed
+ * up.
+ */
+ to_oct ((long) hstat.st_size, 1 + 12,
+ header->header.realsize);
+
+ /*
+ * This will be the new "size" of the
+ * file, i.e., the size of the file
+ * minus the records of holes that we're
+ * skipping over.
+ */
+
+ find_new_file_size (&filesize, upperbound);
+ hstat.st_size = filesize;
+ to_oct ((long) filesize, 1 + 12,
+ header->header.size);
+ /* to_oct((long) end_nulls, 1+12,
+ header->header.ending_blanks);*/
+
+ for (i = 0; i < SPARSE_IN_HDR; i++)
+ {
+ if (!sparsearray[i].numbytes)
+ break;
+ to_oct (sparsearray[i].offset, 1 + 12,
+ header->header.sp[i].offset);
+ to_oct (sparsearray[i].numbytes, 1 + 12,
+ header->header.sp[i].numbytes);
+ }
+
+ }
+ }
+#else
+ upperbound = SPARSE_IN_HDR - 1;
+#endif
+
+ sizeleft = hstat.st_size;
+ /* Don't bother opening empty, world readable files. */
+ if (sizeleft > 0 || 0444 != (0444 & hstat.st_mode))
+ {
+ f = open (p, O_RDONLY | O_BINARY);
+ if (f < 0)
+ goto badperror;
+ }
+ else
+ {
+ f = -1;
+ }
+
+ /* If the file is sparse, we've already taken care of this */
+ if (!header_moved)
+ {
+ header = start_header (p, &hstat);
+ if (header == NULL)
+ {
+ if (f >= 0)
+ (void) close (f);
+ critical_error = 1;
+ goto badfile;
+ }
+ }
+#ifdef S_ISCTG
+ /* Mark contiguous files, if we support them */
+ if (f_standard && S_ISCTG (hstat.st_mode))
+ {
+ header->header.linkflag = LF_CONTIG;
+ }
+#endif
+ isextended = header->header.isextended;
+ save_linkflag = header->header.linkflag;
+ finish_header (header);
+ if (isextended)
+ {
+ /* int sum = 0;*/
+ register int i;
+ /* register union record *exhdr;*/
+ /* int arraybound = SPARSE_EXT_HDR;*/
+ /* static */ int index_offset = SPARSE_IN_HDR;
+
+ extend:exhdr = findrec ();
+
+ if (exhdr == NULL)
+ {
+ critical_error = 1;
+ goto badfile;
+ }
+ bzero (exhdr->charptr, RECORDSIZE);
+ for (i = 0; i < SPARSE_EXT_HDR; i++)
+ {
+ if (i + index_offset > upperbound)
+ break;
+ to_oct ((long) sparsearray[i + index_offset].numbytes,
+ 1 + 12,
+ exhdr->ext_hdr.sp[i].numbytes);
+ to_oct ((long) sparsearray[i + index_offset].offset,
+ 1 + 12,
+ exhdr->ext_hdr.sp[i].offset);
+ }
+ userec (exhdr);
+ /* sum += i;
+ if (sum < upperbound)
+ goto extend;*/
+ if (index_offset + i <= upperbound)
+ {
+ index_offset += i;
+ exhdr->ext_hdr.isextended++;
+ goto extend;
+ }
+
+ }
+ if (save_linkflag == LF_SPARSE)
+ {
+ if (finish_sparse_file (f, &sizeleft, hstat.st_size, p))
+ goto padit;
+ }
+ else
+ while (sizeleft > 0)
+ {
+
+ if (f_multivol)
+ {
+ save_name = p;
+ save_sizeleft = sizeleft;
+ save_totsize = hstat.st_size;
+ }
+ start = findrec ();
+
+ bufsize = endofrecs ()->charptr - start->charptr;
+
+ if (sizeleft < bufsize)
+ {
+ /* Last read -- zero out area beyond */
+ bufsize = (int) sizeleft;
+ count = bufsize % RECORDSIZE;
+ if (count)
+ bzero (start->charptr + sizeleft,
+ (int) (RECORDSIZE - count));
+ }
+ count = read (f, start->charptr, bufsize);
+ if (count < 0)
+ {
+ msg_perror ("read error at byte %ld, reading\
+ %d bytes, in file %s", hstat.st_size - sizeleft, bufsize, p);
+ goto padit;
+ }
+ sizeleft -= count;
+
+ /* This is nonportable (the type of userec's arg). */
+ userec (start + (count - 1) / RECORDSIZE);
+
+ if (count == bufsize)
+ continue;
+ msg ("file %s shrunk by %d bytes, padding with zeros.", p, sizeleft);
+ goto padit; /* Short read */
+ }
+
+ if (f_multivol)
+ save_name = 0;
+
+ if (f >= 0)
+ (void) close (f);
+
+ if (f_remove_files)
+ {
+ if (unlink (p) == -1)
+ msg_perror ("cannot remove %s", p);
+ }
+ if (f_atime_preserve)
+ utime (p, &restore_times);
+ return;
+
+ /*
+ * File shrunk or gave error, pad out tape to match
+ * the size we specified in the header.
+ */
+ padit:
+ while (sizeleft > 0)
+ {
+ save_sizeleft = sizeleft;
+ start = findrec ();
+ bzero (start->charptr, RECORDSIZE);
+ userec (start);
+ sizeleft -= RECORDSIZE;
+ }
+ if (f_multivol)
+ save_name = 0;
+ if (f >= 0)
+ (void) close (f);
+ if (f_atime_preserve)
+ utime (p, &restore_times);
+ return;
+ }
+
+#ifdef S_ISLNK
+ else if (S_ISLNK (hstat.st_mode))
+ {
+ int size;
+ char *buf = alloca (PATH_MAX + 1);
+
+ size = readlink (p, buf, PATH_MAX + 1);
+ if (size < 0)
+ goto badperror;
+ buf[size] = '\0';
+ if (size >= NAMSIZ)
+ write_long (buf, LF_LONGLINK);
+ current_link_name = buf;
+
+ hstat.st_size = 0; /* Force 0 size on symlink */
+ header = start_header (p, &hstat);
+ if (header == NULL)
+ {
+ critical_error = 1;
+ goto badfile;
+ }
+ strncpy (header->header.arch_linkname, buf, NAMSIZ);
+ header->header.arch_linkname[NAMSIZ - 1] = '\0';
+ header->header.linkflag = LF_SYMLINK;
+ finish_header (header); /* Nothing more to do to it */
+ if (f_remove_files)
+ {
+ if (unlink (p) == -1)
+ msg_perror ("cannot remove %s", p);
+ }
+ return;
+ }
+#endif
+
+ else if (S_ISDIR (hstat.st_mode))
+ {
+ register DIR *dirp;
+ register struct dirent *d;
+ char *namebuf;
+ int buflen;
+ register int len;
+ int our_device = hstat.st_dev;
+
+ /* Build new prototype name */
+ len = strlen (p);
+ buflen = len + NAMSIZ;
+ namebuf = ck_malloc (buflen + 1);
+ strncpy (namebuf, p, buflen);
+ while (len >= 1 && '/' == namebuf[len - 1])
+ len--; /* Delete trailing slashes */
+ namebuf[len++] = '/'; /* Now add exactly one back */
+ namebuf[len] = '\0'; /* Make sure null-terminated */
+
+ /*
+ * Output directory header record with permissions
+ * FIXME, do this AFTER files, to avoid R/O dir problems?
+ * If old archive format, don't write record at all.
+ */
+ if (!f_oldarch)
+ {
+ hstat.st_size = 0; /* Force 0 size on dir */
+ /*
+ * If people could really read standard archives,
+ * this should be: (FIXME)
+ header = start_header(f_standard? p: namebuf, &hstat);
+ * but since they'd interpret LF_DIR records as
+ * regular files, we'd better put the / on the name.
+ */
+ header = start_header (namebuf, &hstat);
+ if (header == NULL)
+ {
+ critical_error = 1;
+ goto badfile; /* eg name too long */
+ }
+
+ if (f_gnudump)
+ header->header.linkflag = LF_DUMPDIR;
+ else if (f_standard)
+ header->header.linkflag = LF_DIR;
+
+ /* If we're gnudumping, we aren't done yet so don't close it. */
+ if (!f_gnudump)
+ finish_header (header); /* Done with directory header */
+ }
+
+ if (f_gnudump)
+ {
+ int sizeleft;
+ int totsize;
+ int bufsize;
+ union record *start;
+ int count;
+ char *buf, *p_buf;
+
+ buf = gnu_list_name->dir_contents; /* FOO */
+ totsize = 0;
+ for (p_buf = buf; p_buf && *p_buf;)
+ {
+ int tmp;
+
+ tmp = strlen (p_buf) + 1;
+ totsize += tmp;
+ p_buf += tmp;
+ }
+ totsize++;
+ to_oct ((long) totsize, 1 + 12, header->header.size);
+ finish_header (header);
+ p_buf = buf;
+ sizeleft = totsize;
+ while (sizeleft > 0)
+ {
+ if (f_multivol)
+ {
+ save_name = p;
+ save_sizeleft = sizeleft;
+ save_totsize = totsize;
+ }
+ start = findrec ();
+ bufsize = endofrecs ()->charptr - start->charptr;
+ if (sizeleft < bufsize)
+ {
+ bufsize = sizeleft;
+ count = bufsize % RECORDSIZE;
+ if (count)
+ bzero (start->charptr + sizeleft, RECORDSIZE - count);
+ }
+ bcopy (p_buf, start->charptr, bufsize);
+ sizeleft -= bufsize;
+ p_buf += bufsize;
+ userec (start + (bufsize - 1) / RECORDSIZE);
+ }
+ if (f_multivol)
+ save_name = 0;
+ if (f_atime_preserve)
+ utime (p, &restore_times);
+ return;
+ }
+
+ /* Now output all the files in the directory */
+#if 0
+ if (f_dironly)
+ return; /* Unless the cmdline said not to */
+#endif
+ /*
+ * See if we are crossing from one file system to another,
+ * and avoid doing so if the user only wants to dump one file system.
+ */
+ if (f_local_filesys && !toplevel && curdev != hstat.st_dev)
+ {
+ if (f_verbose)
+ msg ("%s: is on a different filesystem; not dumped", p);
+ return;
+ }
+
+
+ errno = 0;
+ dirp = opendir (p);
+ if (!dirp)
+ {
+ if (errno)
+ {
+ msg_perror ("can't open directory %s", p);
+ }
+ else
+ {
+ msg ("error opening directory %s",
+ p);
+ }
+ return;
+ }
+
+ /* Hack to remove "./" from the front of all the file names */
+ if (len == 2 && namebuf[0] == '.' && namebuf[1] == '/')
+ len = 0;
+
+ /* Should speed this up by cd-ing into the dir, FIXME */
+ while (NULL != (d = readdir (dirp)))
+ {
+ /* Skip . and .. */
+ if (is_dot_or_dotdot (d->d_name))
+ continue;
+
+ if (NLENGTH (d) + len >= buflen)
+ {
+ buflen = len + NLENGTH (d);
+ namebuf = ck_realloc (namebuf, buflen + 1);
+ /* namebuf[len]='\0';
+ msg("file name %s%s too long",
+ namebuf, d->d_name);
+ continue; */
+ }
+ strcpy (namebuf + len, d->d_name);
+ if (f_exclude && check_exclude (namebuf))
+ continue;
+ dump_file (namebuf, our_device, 0);
+ }
+
+ closedir (dirp);
+ free (namebuf);
+ if (f_atime_preserve)
+ utime (p, &restore_times);
+ return;
+ }
+
+#ifdef S_ISCHR
+ else if (S_ISCHR (hstat.st_mode))
+ {
+ type = LF_CHR;
+ }
+#endif
+
+#ifdef S_ISBLK
+ else if (S_ISBLK (hstat.st_mode))
+ {
+ type = LF_BLK;
+ }
+#endif
+
+ /* Avoid screwy apollo lossage where S_IFIFO == S_IFSOCK */
+#if (_ISP__M68K == 0) && (_ISP__A88K == 0) && defined(S_ISFIFO)
+ else if (S_ISFIFO (hstat.st_mode))
+ {
+ type = LF_FIFO;
+ }
+#endif
+
+#ifdef S_ISSOCK
+ else if (S_ISSOCK (hstat.st_mode))
+ {
+ type = LF_FIFO;
+ }
+#endif
+ else
+ goto unknown;
+
+ if (!f_standard)
+ goto unknown;
+
+ hstat.st_size = 0; /* Force 0 size */
+ header = start_header (p, &hstat);
+ if (header == NULL)
+ {
+ critical_error = 1;
+ goto badfile; /* eg name too long */
+ }
+
+ header->header.linkflag = type;
+#if defined(S_IFBLK) || defined(S_IFCHR)
+ if (type != LF_FIFO)
+ {
+ to_oct ((long) major (hstat.st_rdev), 8,
+ header->header.devmajor);
+ to_oct ((long) minor (hstat.st_rdev), 8,
+ header->header.devminor);
+ }
+#endif
+
+ finish_header (header);
+ if (f_remove_files)
+ {
+ if (unlink (p) == -1)
+ msg_perror ("cannot remove %s", p);
+ }
+ return;
+
+unknown:
+ msg ("%s: Unknown file type; file ignored.", p);
+}
+
+int
+finish_sparse_file (fd, sizeleft, fullsize, name)
+ int fd;
+ long *sizeleft, fullsize;
+ char *name;
+{
+ union record *start;
+ char tempbuf[RECORDSIZE];
+ int bufsize, sparse_ind = 0, count;
+ long pos;
+ long nwritten = 0;
+
+
+ while (*sizeleft > 0)
+ {
+ start = findrec ();
+ bzero (start->charptr, RECORDSIZE);
+ bufsize = sparsearray[sparse_ind].numbytes;
+ if (!bufsize)
+ { /* we blew it, maybe */
+ msg ("Wrote %ld of %ld bytes to file %s",
+ fullsize - *sizeleft, fullsize, name);
+ break;
+ }
+ pos = lseek (fd, sparsearray[sparse_ind++].offset, 0);
+ /*
+ * If the number of bytes to be written here exceeds
+ * the size of the temporary buffer, do it in steps.
+ */
+ while (bufsize > RECORDSIZE)
+ {
+ /* if (amt_read) {
+ count = read(fd, start->charptr+amt_read, RECORDSIZE-amt_read);
+ bufsize -= RECORDSIZE - amt_read;
+ amt_read = 0;
+ userec(start);
+ start = findrec();
+ bzero(start->charptr, RECORDSIZE);
+ }*/
+ /* store the data */
+ count = read (fd, start->charptr, RECORDSIZE);
+ if (count < 0)
+ {
+ msg_perror ("read error at byte %ld, reading %d bytes, in file %s",
+ fullsize - *sizeleft, bufsize, name);
+ return 1;
+ }
+ bufsize -= count;
+ *sizeleft -= count;
+ userec (start);
+ nwritten += RECORDSIZE; /* XXX */
+ start = findrec ();
+ bzero (start->charptr, RECORDSIZE);
+ }
+
+
+ clear_buffer (tempbuf);
+ count = read (fd, tempbuf, bufsize);
+ bcopy (tempbuf, start->charptr, RECORDSIZE);
+ if (count < 0)
+ {
+ msg_perror ("read error at byte %ld, reading %d bytes, in file %s",
+ fullsize - *sizeleft, bufsize, name);
+ return 1;
+ }
+ /* if (amt_read >= RECORDSIZE) {
+ amt_read = 0;
+ userec(start+(count-1)/RECORDSIZE);
+ if (count != bufsize) {
+ msg("file %s shrunk by %d bytes, padding with zeros.", name, sizeleft);
+ return 1;
+ }
+ start = findrec();
+ } else
+ amt_read += bufsize;*/
+ nwritten += count; /* XXX */
+ *sizeleft -= count;
+ userec (start);
+
+ }
+ free (sparsearray);
+ /* printf ("Amount actually written is (I hope) %d.\n", nwritten); */
+ /* userec(start+(count-1)/RECORDSIZE);*/
+ return 0;
+
+}
+
+void
+init_sparsearray ()
+{
+ register int i;
+
+ sp_array_size = 10;
+ /*
+ * Make room for our scratch space -- initially is 10 elts long
+ */
+ sparsearray = (struct sp_array *) ck_malloc (sp_array_size * sizeof (struct sp_array));
+ for (i = 0; i < sp_array_size; i++)
+ {
+ sparsearray[i].offset = 0;
+ sparsearray[i].numbytes = 0;
+ }
+}
+
+
+
+/*
+ * Okay, we've got a sparse file on our hands -- now, what we need to do is
+ * make a pass through the file and carefully note where any data is, i.e.,
+ * we want to find how far into the file each instance of data is, and how
+ * many bytes are there. We store this information in the sparsearray,
+ * which will later be translated into header information. For now, we use
+ * the sparsearray as convenient storage.
+ *
+ * As a side note, this routine is a mess. If I could have found a cleaner
+ * way to do it, I would have. If anyone wants to find a nicer way to do
+ * this, feel free.
+ */
+
+/* There is little point in trimming small amounts of null data at the */
+/* head and tail of blocks -- it's ok if we only avoid dumping blocks */
+/* of complete null data */
+int
+deal_with_sparse (name, header, nulls_at_end)
+ char *name;
+ union record *header;
+ int nulls_at_end;
+{
+ long numbytes = 0;
+ long offset = 0;
+ /* long save_offset;*/
+ int fd;
+ /* int current_size = hstat.st_size;*/
+ int sparse_ind = 0, cc;
+ char buf[RECORDSIZE];
+#if 0
+ int read_last_data = 0; /* did we just read the last record? */
+#endif
+ int amidst_data = 0;
+
+ header->header.isextended = 0;
+ /*
+ * Can't open the file -- this problem will be caught later on,
+ * so just return.
+ */
+ if ((fd = open (name, O_RDONLY)) < 0)
+ return 0;
+
+ init_sparsearray ();
+ clear_buffer (buf);
+
+ while ((cc = read (fd, buf, sizeof buf)) != 0)
+ {
+
+ if (sparse_ind > sp_array_size - 1)
+ {
+
+ /*
+ * realloc the scratch area, since we've run out of room --
+ */
+ sparsearray = (struct sp_array *)
+ ck_realloc (sparsearray,
+ 2 * sp_array_size * (sizeof (struct sp_array)));
+ sp_array_size *= 2;
+ }
+ if (cc == sizeof buf)
+ {
+ if (zero_record (buf))
+ {
+ if (amidst_data)
+ {
+ sparsearray[sparse_ind++].numbytes
+ = numbytes;
+ amidst_data = 0;
+ }
+ }
+ else
+ { /* !zero_record(buf) */
+ if (amidst_data)
+ numbytes += cc;
+ else
+ {
+ amidst_data = 1;
+ numbytes = cc;
+ sparsearray[sparse_ind].offset
+ = offset;
+ }
+ }
+ }
+ else if (cc < sizeof buf)
+ {
+ /* This has to be the last bit of the file, so this */
+ /* is somewhat shorter than the above. */
+ if (!zero_record (buf))
+ {
+ if (!amidst_data)
+ {
+ amidst_data = 1;
+ numbytes = cc;
+ sparsearray[sparse_ind].offset
+ = offset;
+ }
+ else
+ numbytes += cc;
+ }
+ }
+ offset += cc;
+ clear_buffer (buf);
+ }
+ if (amidst_data)
+ sparsearray[sparse_ind++].numbytes = numbytes;
+ else
+ {
+ sparsearray[sparse_ind].offset = offset-1;
+ sparsearray[sparse_ind++].numbytes = 1;
+ }
+ close (fd);
+
+ return sparse_ind - 1;
+}
+
+/*
+ * Just zeroes out the buffer so we don't confuse ourselves with leftover
+ * data.
+ */
+void
+clear_buffer (buf)
+ char *buf;
+{
+ register int i;
+
+ for (i = 0; i < RECORDSIZE; i++)
+ buf[i] = '\0';
+}
+
+#if 0 /* I'm leaving this as a monument to Joy Kendall, who wrote it -mib */
+/*
+ * JK -
+ * This routine takes a character array, and tells where within that array
+ * the data can be found. It skips over any zeros, and sets the first
+ * non-zero point in the array to be the "start", and continues until it
+ * finds non-data again, which is marked as the "end." This routine is
+ * mainly for 1) seeing how far into a file we must lseek to data, given
+ * that we have a sparse file, and 2) determining the "real size" of the
+ * file, i.e., the number of bytes in the sparse file that are data, as
+ * opposed to the zeros we are trying to skip.
+ */
+where_is_data (from, to, buffer)
+ int *from, *to;
+ char *buffer;
+{
+ register int i = 0;
+ register int save_to = *to;
+ int amidst_data = 0;
+
+
+ while (!buffer[i])
+ i++;
+ *from = i;
+
+ if (*from < 16) /* don't bother */
+ *from = 0;
+ /* keep going to make sure there isn't more real
+ data in this record */
+ while (i < RECORDSIZE)
+ {
+ if (!buffer[i])
+ {
+ if (amidst_data)
+ {
+ save_to = i;
+ amidst_data = 0;
+ }
+ i++;
+ }
+ else if (buffer[i])
+ {
+ if (!amidst_data)
+ amidst_data = 1;
+ i++;
+ }
+ }
+ if (i == RECORDSIZE)
+ *to = i;
+ else
+ *to = save_to;
+
+}
+
+#endif
+
+/* Note that this routine is only called if zero_record returned true */
+#if 0 /* But we actually don't need it at all. */
+where_is_data (from, to, buffer)
+ int *from, *to;
+ char *buffer;
+{
+ char *fp, *tp;
+
+ for (fp = buffer; !*fp; fp++)
+ ;
+ for (tp = buffer + RECORDSIZE - 1; !*tp; tp--)
+ ;
+ *from = fp - buffer;
+ *to = tp - buffer + 1;
+}
+
+#endif
+
+
+
+/*
+ * Takes a recordful of data and basically cruises through it to see if
+ * it's made *entirely* of zeros, returning a 0 the instant it finds
+ * something that is a non-zero, i.e., useful data.
+ */
+int
+zero_record (buffer)
+ char *buffer;
+{
+ register int i;
+
+ for (i = 0; i < RECORDSIZE; i++)
+ if (buffer[i] != '\000')
+ return 0;
+ return 1;
+}
+
+void
+find_new_file_size (filesize, highest_index)
+ int *filesize;
+ int highest_index;
+{
+ register int i;
+
+ *filesize = 0;
+ for (i = 0; sparsearray[i].numbytes && i <= highest_index; i++)
+ *filesize += sparsearray[i].numbytes;
+}
+
+/*
+ * Make a header block for the file name whose stat info is st .
+ * Return header pointer for success, NULL if the name is too long.
+ */
+union record *
+start_header (name, st)
+ char *name;
+ register struct stat *st;
+{
+ register union record *header;
+
+ if (strlen (name) >= NAMSIZ)
+ write_long (name, LF_LONGNAME);
+
+ header = (union record *) findrec ();
+ bzero (header->charptr, sizeof (*header)); /* XXX speed up */
+
+ /*
+ * Check the file name and put it in the record.
+ */
+ if (!f_absolute_paths)
+ {
+ static int warned_once = 0;
+#ifdef __MSDOS__
+ if (name[1] == ':')
+ {
+ name += 2;
+ if (!warned_once++)
+ msg ("Removing drive spec from names in the archive");
+ }
+#endif
+ while ('/' == *name)
+ {
+ name++; /* Force relative path */
+ if (!warned_once++)
+ msg ("Removing leading / from absolute path names in the archive.");
+ }
+ }
+ current_file_name = name;
+ strncpy (header->header.arch_name, name, NAMSIZ);
+ header->header.arch_name[NAMSIZ - 1] = '\0';
+
+ to_oct ((long) (f_oldarch ? (st->st_mode & 07777) : st->st_mode),
+ 8, header->header.mode);
+ to_oct ((long) st->st_uid, 8, header->header.uid);
+ to_oct ((long) st->st_gid, 8, header->header.gid);
+ to_oct ((long) st->st_size, 1 + 12, header->header.size);
+ to_oct ((long) st->st_mtime, 1 + 12, header->header.mtime);
+ /* header->header.linkflag is left as null */
+ if (f_gnudump)
+ {
+ to_oct ((long) st->st_atime, 1 + 12, header->header.atime);
+ to_oct ((long) st->st_ctime, 1 + 12, header->header.ctime);
+ }
+
+#ifndef NONAMES
+ /* Fill in new Unix Standard fields if desired. */
+ if (f_standard)
+ {
+ header->header.linkflag = LF_NORMAL; /* New default */
+ strcpy (header->header.magic, TMAGIC); /* Mark as Unix Std */
+ finduname (header->header.uname, st->st_uid);
+ findgname (header->header.gname, st->st_gid);
+ }
+#endif
+ return header;
+}
+
+/*
+ * Finish off a filled-in header block and write it out.
+ * We also print the file name and/or full info if verbose is on.
+ */
+void
+finish_header (header)
+ register union record *header;
+{
+ register int i, sum;
+ register char *p;
+
+ bcopy (CHKBLANKS, header->header.chksum, sizeof (header->header.chksum));
+
+ sum = 0;
+ p = header->charptr;
+ for (i = sizeof (*header); --i >= 0;)
+ {
+ /*
+ * We can't use unsigned char here because of old compilers,
+ * e.g. V7.
+ */
+ sum += 0xFF & *p++;
+ }
+
+ /*
+ * Fill in the checksum field. It's formatted differently
+ * from the other fields: it has [6] digits, a null, then a
+ * space -- rather than digits, a space, then a null.
+ * We use to_oct then write the null in over to_oct's space.
+ * The final space is already there, from checksumming, and
+ * to_oct doesn't modify it.
+ *
+ * This is a fast way to do:
+ * (void) sprintf(header->header.chksum, "%6o", sum);
+ */
+ to_oct ((long) sum, 8, header->header.chksum);
+ header->header.chksum[6] = '\0'; /* Zap the space */
+
+ userec (header);
+
+ if (f_verbose)
+ {
+ extern union record *head;/* Points to current tape header */
+ extern int head_standard; /* Tape header is in ANSI format */
+
+ /* These globals are parameters to print_header, sigh */
+ head = header;
+ /* hstat is already set up */
+ head_standard = f_standard;
+ print_header ();
+ }
+
+ return;
+}
+
+
+/*
+ * Quick and dirty octal conversion.
+ * Converts long "value" into a "digs"-digit field at "where",
+ * including a trailing space and room for a null. "digs"==3 means
+ * 1 digit, a space, and room for a null.
+ *
+ * We assume the trailing null is already there and don't fill it in.
+ * This fact is used by start_header and finish_header, so don't change it!
+ *
+ * This should be equivalent to:
+ * (void) sprintf(where, "%*lo ", digs-2, value);
+ * except that sprintf fills in the trailing null and we don't.
+ */
+void
+to_oct (value, digs, where)
+ register long value;
+ register int digs;
+ register char *where;
+{
+
+ --digs; /* Trailing null slot is left alone */
+ where[--digs] = ' '; /* Put in the space, though */
+
+ /* Produce the digits -- at least one */
+ do
+ {
+ where[--digs] = '0' + (char) (value & 7); /* one octal digit */
+ value >>= 3;
+ }
+ while (digs > 0 && value != 0);
+
+ /* Leading spaces, if necessary */
+ while (digs > 0)
+ where[--digs] = ' ';
+
+}
+
+
+/*
+ * Write the EOT record(s).
+ * We actually zero at least one record, through the end of the block.
+ * Old tar writes garbage after two zeroed records -- and PDtar used to.
+ */
+void
+write_eot ()
+{
+ union record *p;
+ int bufsize;
+
+ p = findrec ();
+ if (p)
+ {
+ bufsize = endofrecs ()->charptr - p->charptr;
+ bzero (p->charptr, bufsize);
+ userec (p);
+ }
+}
+
+/* Write a LF_LONGLINK or LF_LONGNAME record. */
+void
+write_long (p, type)
+ char *p;
+ char type;
+{
+ int size = strlen (p) + 1;
+ int bufsize;
+ union record *header;
+ struct stat foo;
+
+
+ bzero (&foo, sizeof foo);
+ foo.st_size = size;
+
+ header = start_header ("././@LongLink", &foo);
+ header->header.linkflag = type;
+ finish_header (header);
+
+ header = findrec ();
+
+ bufsize = endofrecs ()->charptr - header->charptr;
+
+ while (bufsize < size)
+ {
+ bcopy (p, header->charptr, bufsize);
+ p += bufsize;
+ size -= bufsize;
+ userec (header + (bufsize - 1) / RECORDSIZE);
+ header = findrec ();
+ bufsize = endofrecs ()->charptr - header->charptr;
+ }
+ bcopy (p, header->charptr, size);
+ bzero (header->charptr + size, bufsize - size);
+ userec (header + (size - 1) / RECORDSIZE);
+}
diff --git a/gnu/usr.bin/tar/diffarch.c b/gnu/usr.bin/tar/diffarch.c
new file mode 100644
index 000000000000..ce47d9d6cab5
--- /dev/null
+++ b/gnu/usr.bin/tar/diffarch.c
@@ -0,0 +1,759 @@
+/* Diff files from a tar archive.
+ Copyright (C) 1988, 1992, 1993 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ * Diff files from a tar archive.
+ *
+ * Written 30 April 1987 by John Gilmore, ihnp4!hoptoad!gnu.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#ifndef STDC_HEADERS
+extern int errno;
+#endif
+#include <sys/types.h>
+
+#ifdef BSD42
+#include <sys/file.h>
+#else
+#ifndef V7
+#include <fcntl.h>
+#endif
+#endif
+
+#ifdef HAVE_SYS_MTIO_H
+#include <sys/ioctl.h>
+#include <sys/mtio.h>
+#endif
+
+#include "tar.h"
+#include "port.h"
+#include "rmt.h"
+
+#ifndef S_ISLNK
+#define lstat stat
+#endif
+
+extern void *valloc ();
+
+extern union record *head; /* Points to current tape header */
+extern struct stat hstat; /* Stat struct corresponding */
+extern int head_standard; /* Tape header is in ANSI format */
+
+void decode_header ();
+void diff_sparse_files ();
+void fill_in_sparse_array ();
+void fl_read ();
+long from_oct ();
+int do_stat ();
+extern void print_header ();
+int read_header ();
+void saverec ();
+void sigh ();
+extern void skip_file ();
+extern void skip_extended_headers ();
+int wantbytes ();
+
+extern FILE *msg_file;
+
+int now_verifying = 0; /* Are we verifying at the moment? */
+
+int diff_fd; /* Descriptor of file we're diffing */
+
+char *diff_buf = 0; /* Pointer to area for reading
+ file contents into */
+
+char *diff_dir; /* Directory contents for LF_DUMPDIR */
+
+int different = 0;
+
+/*struct sp_array *sparsearray;
+int sp_ar_size = 10;*/
+/*
+ * Initialize for a diff operation
+ */
+void
+diff_init ()
+{
+ /*NOSTRICT*/
+ diff_buf = (char *) valloc ((unsigned) blocksize);
+ if (!diff_buf)
+ {
+ msg ("could not allocate memory for diff buffer of %d bytes",
+ blocksize);
+ exit (EX_ARGSBAD);
+ }
+}
+
+/*
+ * Diff a file against the archive.
+ */
+void
+diff_archive ()
+{
+ register char *data;
+ int check, namelen;
+ int err;
+ long offset;
+ struct stat filestat;
+ int compare_chunk ();
+ int compare_dir ();
+ int no_op ();
+#ifndef __MSDOS__
+ dev_t dev;
+ ino_t ino;
+#endif
+ char *get_dir_contents ();
+ long from_oct ();
+
+ errno = EPIPE; /* FIXME, remove perrors */
+
+ saverec (&head); /* Make sure it sticks around */
+ userec (head); /* And go past it in the archive */
+ decode_header (head, &hstat, &head_standard, 1); /* Snarf fields */
+
+ /* Print the record from 'head' and 'hstat' */
+ if (f_verbose)
+ {
+ if (now_verifying)
+ fprintf (msg_file, "Verify ");
+ print_header ();
+ }
+
+ switch (head->header.linkflag)
+ {
+
+ default:
+ msg ("Unknown file type '%c' for %s, diffed as normal file",
+ head->header.linkflag, current_file_name);
+ /* FALL THRU */
+
+ case LF_OLDNORMAL:
+ case LF_NORMAL:
+ case LF_SPARSE:
+ case LF_CONTIG:
+ /*
+ * Appears to be a file.
+ * See if it's really a directory.
+ */
+ namelen = strlen (current_file_name) - 1;
+ if (current_file_name[namelen] == '/')
+ goto really_dir;
+
+
+ if (do_stat (&filestat))
+ {
+ if (head->header.isextended)
+ skip_extended_headers ();
+ skip_file ((long) hstat.st_size);
+ different++;
+ goto quit;
+ }
+
+ if (!S_ISREG (filestat.st_mode))
+ {
+ fprintf (msg_file, "%s: not a regular file\n",
+ current_file_name);
+ skip_file ((long) hstat.st_size);
+ different++;
+ goto quit;
+ }
+
+ filestat.st_mode &= 07777;
+ if (filestat.st_mode != hstat.st_mode)
+ sigh ("mode");
+ if (filestat.st_uid != hstat.st_uid)
+ sigh ("uid");
+ if (filestat.st_gid != hstat.st_gid)
+ sigh ("gid");
+ if (filestat.st_mtime != hstat.st_mtime)
+ sigh ("mod time");
+ if (head->header.linkflag != LF_SPARSE &&
+ filestat.st_size != hstat.st_size)
+ {
+ sigh ("size");
+ skip_file ((long) hstat.st_size);
+ goto quit;
+ }
+
+ diff_fd = open (current_file_name, O_NDELAY | O_RDONLY | O_BINARY);
+
+ if (diff_fd < 0 && !f_absolute_paths)
+ {
+ char tmpbuf[NAMSIZ + 2];
+
+ tmpbuf[0] = '/';
+ strcpy (&tmpbuf[1], current_file_name);
+ diff_fd = open (tmpbuf, O_NDELAY | O_RDONLY);
+ }
+ if (diff_fd < 0)
+ {
+ msg_perror ("cannot open %s", current_file_name);
+ if (head->header.isextended)
+ skip_extended_headers ();
+ skip_file ((long) hstat.st_size);
+ different++;
+ goto quit;
+ }
+ /*
+ * Need to treat sparse files completely differently here.
+ */
+ if (head->header.linkflag == LF_SPARSE)
+ diff_sparse_files (hstat.st_size);
+ else
+ wantbytes ((long) (hstat.st_size), compare_chunk);
+
+ check = close (diff_fd);
+ if (check < 0)
+ msg_perror ("Error while closing %s", current_file_name);
+
+ quit:
+ break;
+
+#ifndef __MSDOS__
+ case LF_LINK:
+ if (do_stat (&filestat))
+ break;
+ dev = filestat.st_dev;
+ ino = filestat.st_ino;
+ err = stat (current_link_name, &filestat);
+ if (err < 0)
+ {
+ if (errno == ENOENT)
+ {
+ fprintf (msg_file, "%s: does not exist\n", current_file_name);
+ }
+ else
+ {
+ msg_perror ("cannot stat file %s", current_file_name);
+ }
+ different++;
+ break;
+ }
+ if (filestat.st_dev != dev || filestat.st_ino != ino)
+ {
+ fprintf (msg_file, "%s not linked to %s\n", current_file_name, current_link_name);
+ break;
+ }
+ break;
+#endif
+
+#ifdef S_ISLNK
+ case LF_SYMLINK:
+ {
+ char linkbuf[NAMSIZ + 3];
+ check = readlink (current_file_name, linkbuf,
+ (sizeof linkbuf) - 1);
+
+ if (check < 0)
+ {
+ if (errno == ENOENT)
+ {
+ fprintf (msg_file,
+ "%s: no such file or directory\n",
+ current_file_name);
+ }
+ else
+ {
+ msg_perror ("cannot read link %s", current_file_name);
+ }
+ different++;
+ break;
+ }
+
+ linkbuf[check] = '\0'; /* Null-terminate it */
+ if (strncmp (current_link_name, linkbuf, check) != 0)
+ {
+ fprintf (msg_file, "%s: symlink differs\n",
+ current_link_name);
+ different++;
+ }
+ }
+ break;
+#endif
+
+#ifdef S_IFCHR
+ case LF_CHR:
+ hstat.st_mode |= S_IFCHR;
+ goto check_node;
+#endif
+
+#ifdef S_IFBLK
+ /* If local system doesn't support block devices, use default case */
+ case LF_BLK:
+ hstat.st_mode |= S_IFBLK;
+ goto check_node;
+#endif
+
+#ifdef S_ISFIFO
+ /* If local system doesn't support FIFOs, use default case */
+ case LF_FIFO:
+#ifdef S_IFIFO
+ hstat.st_mode |= S_IFIFO;
+#endif
+ hstat.st_rdev = 0; /* FIXME, do we need this? */
+ goto check_node;
+#endif
+
+ check_node:
+ /* FIXME, deal with umask */
+ if (do_stat (&filestat))
+ break;
+ if (hstat.st_rdev != filestat.st_rdev)
+ {
+ fprintf (msg_file, "%s: device numbers changed\n", current_file_name);
+ different++;
+ break;
+ }
+#ifdef S_IFMT
+ if (hstat.st_mode != filestat.st_mode)
+#else /* POSIX lossage */
+ if ((hstat.st_mode & 07777) != (filestat.st_mode & 07777))
+#endif
+ {
+ fprintf (msg_file, "%s: mode or device-type changed\n", current_file_name);
+ different++;
+ break;
+ }
+ break;
+
+ case LF_DUMPDIR:
+ data = diff_dir = get_dir_contents (current_file_name, 0);
+ if (data)
+ {
+ wantbytes ((long) (hstat.st_size), compare_dir);
+ free (data);
+ }
+ else
+ wantbytes ((long) (hstat.st_size), no_op);
+ /* FALL THROUGH */
+
+ case LF_DIR:
+ /* Check for trailing / */
+ namelen = strlen (current_file_name) - 1;
+ really_dir:
+ while (namelen && current_file_name[namelen] == '/')
+ current_file_name[namelen--] = '\0'; /* Zap / */
+
+ if (do_stat (&filestat))
+ break;
+ if (!S_ISDIR (filestat.st_mode))
+ {
+ fprintf (msg_file, "%s is no longer a directory\n", current_file_name);
+ different++;
+ break;
+ }
+ if ((filestat.st_mode & 07777) != (hstat.st_mode & 07777))
+ sigh ("mode");
+ break;
+
+ case LF_VOLHDR:
+ break;
+
+ case LF_MULTIVOL:
+ namelen = strlen (current_file_name) - 1;
+ if (current_file_name[namelen] == '/')
+ goto really_dir;
+
+ if (do_stat (&filestat))
+ break;
+
+ if (!S_ISREG (filestat.st_mode))
+ {
+ fprintf (msg_file, "%s: not a regular file\n",
+ current_file_name);
+ skip_file ((long) hstat.st_size);
+ different++;
+ break;
+ }
+
+ filestat.st_mode &= 07777;
+ offset = from_oct (1 + 12, head->header.offset);
+ if (filestat.st_size != hstat.st_size + offset)
+ {
+ sigh ("size");
+ skip_file ((long) hstat.st_size);
+ different++;
+ break;
+ }
+
+ diff_fd = open (current_file_name, O_NDELAY | O_RDONLY | O_BINARY);
+
+ if (diff_fd < 0)
+ {
+ msg_perror ("cannot open file %s", current_file_name);
+ skip_file ((long) hstat.st_size);
+ different++;
+ break;
+ }
+ err = lseek (diff_fd, offset, 0);
+ if (err != offset)
+ {
+ msg_perror ("cannot seek to %ld in file %s", offset, current_file_name);
+ different++;
+ break;
+ }
+
+ wantbytes ((long) (hstat.st_size), compare_chunk);
+
+ check = close (diff_fd);
+ if (check < 0)
+ {
+ msg_perror ("Error while closing %s", current_file_name);
+ }
+ break;
+
+ }
+
+ /* We don't need to save it any longer. */
+ saverec ((union record **) 0);/* Unsave it */
+}
+
+int
+compare_chunk (bytes, buffer)
+ long bytes;
+ char *buffer;
+{
+ int err;
+
+ err = read (diff_fd, diff_buf, bytes);
+ if (err != bytes)
+ {
+ if (err < 0)
+ {
+ msg_perror ("can't read %s", current_file_name);
+ }
+ else
+ {
+ fprintf (msg_file, "%s: could only read %d of %d bytes\n", current_file_name, err, bytes);
+ }
+ different++;
+ return -1;
+ }
+ if (bcmp (buffer, diff_buf, bytes))
+ {
+ fprintf (msg_file, "%s: data differs\n", current_file_name);
+ different++;
+ return -1;
+ }
+ return 0;
+}
+
+int
+compare_dir (bytes, buffer)
+ long bytes;
+ char *buffer;
+{
+ if (bcmp (buffer, diff_dir, bytes))
+ {
+ fprintf (msg_file, "%s: data differs\n", current_file_name);
+ different++;
+ return -1;
+ }
+ diff_dir += bytes;
+ return 0;
+}
+
+/*
+ * Sigh about something that differs.
+ */
+void
+sigh (what)
+ char *what;
+{
+
+ fprintf (msg_file, "%s: %s differs\n",
+ current_file_name, what);
+}
+
+void
+verify_volume ()
+{
+ int status;
+#ifdef MTIOCTOP
+ struct mtop t;
+ int er;
+#endif
+
+ if (!diff_buf)
+ diff_init ();
+#ifdef MTIOCTOP
+ t.mt_op = MTBSF;
+ t.mt_count = 1;
+ if ((er = rmtioctl (archive, MTIOCTOP, &t)) < 0)
+ {
+ if (errno != EIO || (er = rmtioctl (archive, MTIOCTOP, &t)) < 0)
+ {
+#endif
+ if (rmtlseek (archive, 0L, 0) != 0)
+ {
+ /* Lseek failed. Try a different method */
+ msg_perror ("Couldn't rewind archive file for verify");
+ return;
+ }
+#ifdef MTIOCTOP
+ }
+ }
+#endif
+ ar_reading = 1;
+ now_verifying = 1;
+ fl_read ();
+ for (;;)
+ {
+ status = read_header ();
+ if (status == 0)
+ {
+ unsigned n;
+
+ n = 0;
+ do
+ {
+ n++;
+ status = read_header ();
+ }
+ while (status == 0);
+ msg ("VERIFY FAILURE: %d invalid header%s detected!", n, n == 1 ? "" : "s");
+ }
+ if (status == 2 || status == EOF)
+ break;
+ diff_archive ();
+ }
+ ar_reading = 0;
+ now_verifying = 0;
+
+}
+
+int
+do_stat (statp)
+ struct stat *statp;
+{
+ int err;
+
+ err = f_follow_links ? stat (current_file_name, statp) : lstat (current_file_name, statp);
+ if (err < 0)
+ {
+ if (errno == ENOENT)
+ {
+ fprintf (msg_file, "%s: does not exist\n", current_file_name);
+ }
+ else
+ msg_perror ("can't stat file %s", current_file_name);
+ /* skip_file((long)hstat.st_size);
+ different++;*/
+ return 1;
+ }
+ else
+ return 0;
+}
+
+/*
+ * JK
+ * Diff'ing a sparse file with its counterpart on the tar file is a
+ * bit of a different story than a normal file. First, we must know
+ * what areas of the file to skip through, i.e., we need to contruct
+ * a sparsearray, which will hold all the information we need. We must
+ * compare small amounts of data at a time as we find it.
+ */
+
+void
+diff_sparse_files (filesize)
+ int filesize;
+
+{
+ int sparse_ind = 0;
+ char *buf;
+ int buf_size = RECORDSIZE;
+ union record *datarec;
+ int err;
+ long numbytes;
+ /* int amt_read = 0;*/
+ int size = filesize;
+
+ buf = (char *) ck_malloc (buf_size * sizeof (char));
+
+ fill_in_sparse_array ();
+
+
+ while (size > 0)
+ {
+ datarec = findrec ();
+ if (!sparsearray[sparse_ind].numbytes)
+ break;
+
+ /*
+ * 'numbytes' is nicer to write than
+ * 'sparsearray[sparse_ind].numbytes' all the time ...
+ */
+ numbytes = sparsearray[sparse_ind].numbytes;
+
+ lseek (diff_fd, sparsearray[sparse_ind].offset, 0);
+ /*
+ * take care to not run out of room in our buffer
+ */
+ while (buf_size < numbytes)
+ {
+ buf = (char *) ck_realloc (buf, buf_size * 2 * sizeof (char));
+ buf_size *= 2;
+ }
+ while (numbytes > RECORDSIZE)
+ {
+ if ((err = read (diff_fd, buf, RECORDSIZE)) != RECORDSIZE)
+ {
+ if (err < 0)
+ msg_perror ("can't read %s", current_file_name);
+ else
+ fprintf (msg_file, "%s: could only read %d of %d bytes\n",
+ current_file_name, err, numbytes);
+ break;
+ }
+ if (bcmp (buf, datarec->charptr, RECORDSIZE))
+ {
+ different++;
+ break;
+ }
+ numbytes -= err;
+ size -= err;
+ userec (datarec);
+ datarec = findrec ();
+ }
+ if ((err = read (diff_fd, buf, numbytes)) != numbytes)
+ {
+ if (err < 0)
+ msg_perror ("can't read %s", current_file_name);
+ else
+ fprintf (msg_file, "%s: could only read %d of %d bytes\n",
+ current_file_name, err, numbytes);
+ break;
+ }
+
+ if (bcmp (buf, datarec->charptr, numbytes))
+ {
+ different++;
+ break;
+ }
+ /* amt_read += numbytes;
+ if (amt_read >= RECORDSIZE) {
+ amt_read = 0;
+ userec(datarec);
+ datarec = findrec();
+ }*/
+ userec (datarec);
+ sparse_ind++;
+ size -= numbytes;
+ }
+ /*
+ * if the number of bytes read isn't the
+ * number of bytes supposedly in the file,
+ * they're different
+ */
+ /* if (amt_read != filesize)
+ different++;*/
+ userec (datarec);
+ free (sparsearray);
+ if (different)
+ fprintf (msg_file, "%s: data differs\n", current_file_name);
+
+}
+
+/*
+ * JK
+ * This routine should be used more often than it is ... look into
+ * that. Anyhow, what it does is translate the sparse information
+ * on the header, and in any subsequent extended headers, into an
+ * array of structures with true numbers, as opposed to character
+ * strings. It simply makes our life much easier, doing so many
+ * comparisong and such.
+ */
+void
+fill_in_sparse_array ()
+{
+ int ind;
+
+ /*
+ * allocate space for our scratch space; it's initially
+ * 10 elements long, but can change in this routine if
+ * necessary
+ */
+ sp_array_size = 10;
+ sparsearray = (struct sp_array *) ck_malloc (sp_array_size * sizeof (struct sp_array));
+
+ /*
+ * there are at most five of these structures in the header
+ * itself; read these in first
+ */
+ for (ind = 0; ind < SPARSE_IN_HDR; ind++)
+ {
+ if (!head->header.sp[ind].numbytes)
+ break;
+ sparsearray[ind].offset =
+ from_oct (1 + 12, head->header.sp[ind].offset);
+ sparsearray[ind].numbytes =
+ from_oct (1 + 12, head->header.sp[ind].numbytes);
+ }
+ /*
+ * if the header's extended, we gotta read in exhdr's till
+ * we're done
+ */
+ if (head->header.isextended)
+ {
+ /* how far into the sparsearray we are 'so far' */
+ static int so_far_ind = SPARSE_IN_HDR;
+ union record *exhdr;
+
+ for (;;)
+ {
+ exhdr = findrec ();
+ for (ind = 0; ind < SPARSE_EXT_HDR; ind++)
+ {
+ if (ind + so_far_ind > sp_array_size - 1)
+ {
+ /*
+ * we just ran out of room in our
+ * scratch area - realloc it
+ */
+ sparsearray = (struct sp_array *)
+ ck_realloc (sparsearray,
+ sp_array_size * 2 * sizeof (struct sp_array));
+ sp_array_size *= 2;
+ }
+ /*
+ * convert the character strings into longs
+ */
+ sparsearray[ind + so_far_ind].offset =
+ from_oct (1 + 12, exhdr->ext_hdr.sp[ind].offset);
+ sparsearray[ind + so_far_ind].numbytes =
+ from_oct (1 + 12, exhdr->ext_hdr.sp[ind].numbytes);
+ }
+ /*
+ * if this is the last extended header for this
+ * file, we can stop
+ */
+ if (!exhdr->ext_hdr.isextended)
+ break;
+ else
+ {
+ so_far_ind += SPARSE_EXT_HDR;
+ userec (exhdr);
+ }
+ }
+ /* be sure to skip past the last one */
+ userec (exhdr);
+ }
+}
diff --git a/gnu/usr.bin/tar/extract.c b/gnu/usr.bin/tar/extract.c
new file mode 100644
index 000000000000..d162cab04ec6
--- /dev/null
+++ b/gnu/usr.bin/tar/extract.c
@@ -0,0 +1,907 @@
+/* Extract files from a tar archive.
+ Copyright (C) 1988, 1992, 1993 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ * Extract files from a tar archive.
+ *
+ * Written 19 Nov 1985 by John Gilmore, ihnp4!hoptoad!gnu.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#ifndef STDC_HEADERS
+extern int errno;
+#endif
+#include <sys/types.h>
+#include <time.h>
+time_t time ();
+
+#ifdef BSD42
+#include <sys/file.h>
+#else
+#ifndef V7
+#include <fcntl.h>
+#endif
+#endif
+
+#ifdef NO_OPEN3
+/* We need the #define's even though we don't use them. */
+#include "open3.h"
+#endif
+
+#ifdef EMUL_OPEN3
+/* Simulated 3-argument open for systems that don't have it */
+#include "open3.h"
+#endif
+
+#include "tar.h"
+#include "port.h"
+
+#if defined(_POSIX_VERSION)
+#include <utime.h>
+#else
+struct utimbuf
+{
+ long actime;
+ long modtime;
+};
+
+#endif
+
+extern FILE *msg_file;
+
+extern union record *head; /* Points to current tape header */
+extern struct stat hstat; /* Stat struct corresponding */
+extern int head_standard; /* Tape header is in ANSI format */
+
+extern char *save_name;
+extern long save_totsize;
+extern long save_sizeleft;
+
+int confirm ();
+void decode_header ();
+void extract_mangle ();
+void extract_sparse_file ();
+long from_oct ();
+void gnu_restore ();
+extern void print_header ();
+extern void skip_file ();
+extern void skip_extended_headers ();
+extern void pr_mkdir ();
+void saverec ();
+
+int make_dirs (); /* Makes required directories */
+
+static time_t now = 0; /* Current time */
+static we_are_root = 0; /* True if our effective uid == 0 */
+static int notumask = ~0; /* Masks out bits user doesn't want */
+
+/*
+ * "Scratch" space to store the information about a sparse file before
+ * writing the info into the header or extended header
+ */
+/*struct sp_array *sparsearray;*/
+
+/* number of elts storable in the sparsearray */
+/*int sp_array_size = 10;*/
+
+struct saved_dir_info
+{
+ char *path;
+ int mode;
+ int atime;
+ int mtime;
+ struct saved_dir_info *next;
+};
+
+struct saved_dir_info *saved_dir_info_head;
+
+/*
+ * Set up to extract files.
+ */
+void
+extr_init ()
+{
+ int ourmask;
+
+ now = time ((time_t *) 0);
+ if (geteuid () == 0)
+ we_are_root = 1;
+
+ /*
+ * We need to know our umask. But if f_use_protection is set,
+ * leave our kernel umask at 0, and our "notumask" at ~0.
+ */
+ ourmask = umask (0); /* Read it */
+ if (!f_use_protection)
+ {
+ (void) umask (ourmask); /* Set it back how it was */
+ notumask = ~ourmask; /* Make umask override permissions */
+ }
+}
+
+
+/*
+ * Extract a file from the archive.
+ */
+void
+extract_archive ()
+{
+ register char *data;
+ int fd, check, namelen, written, openflag;
+ long size;
+ struct utimbuf acc_upd_times;
+ register int skipcrud;
+ register int i;
+ /* int sparse_ind = 0;*/
+ union record *exhdr;
+ struct saved_dir_info *tmp;
+ /* int end_nulls; */
+
+ saverec (&head); /* Make sure it sticks around */
+ userec (head); /* And go past it in the archive */
+ decode_header (head, &hstat, &head_standard, 1); /* Snarf fields */
+
+ if (f_confirm && !confirm ("extract", current_file_name))
+ {
+ if (head->header.isextended)
+ skip_extended_headers ();
+ skip_file ((long) hstat.st_size);
+ saverec ((union record **) 0);
+ return;
+ }
+
+ /* Print the record from 'head' and 'hstat' */
+ if (f_verbose)
+ print_header ();
+
+ /*
+ * Check for fully specified pathnames and other atrocities.
+ *
+ * Note, we can't just make a pointer to the new file name,
+ * since saverec() might move the header and adjust "head".
+ * We have to start from "head" every time we want to touch
+ * the header record.
+ */
+ skipcrud = 0;
+ while (!f_absolute_paths
+ && '/' == current_file_name[skipcrud])
+ {
+ static int warned_once = 0;
+
+ skipcrud++; /* Force relative path */
+ if (!warned_once++)
+ {
+ msg ("Removing leading / from absolute path names in the archive.");
+ }
+ }
+
+ switch (head->header.linkflag)
+ {
+
+ default:
+ msg ("Unknown file type '%c' for %s, extracted as normal file",
+ head->header.linkflag, skipcrud + current_file_name);
+ /* FALL THRU */
+
+ /*
+ * JK - What we want to do if the file is sparse is loop through
+ * the array of sparse structures in the header and read in
+ * and translate the character strings representing 1) the offset
+ * at which to write and 2) how many bytes to write into numbers,
+ * which we store into the scratch array, "sparsearray". This
+ * array makes our life easier the same way it did in creating
+ * the tar file that had to deal with a sparse file.
+ *
+ * After we read in the first five (at most) sparse structures,
+ * we check to see if the file has an extended header, i.e.,
+ * if more sparse structures are needed to describe the contents
+ * of the new file. If so, we read in the extended headers
+ * and continue to store their contents into the sparsearray.
+ */
+ case LF_SPARSE:
+ sp_array_size = 10;
+ sparsearray = (struct sp_array *) ck_malloc (sp_array_size * sizeof (struct sp_array));
+ for (i = 0; i < SPARSE_IN_HDR; i++)
+ {
+ sparsearray[i].offset =
+ from_oct (1 + 12, head->header.sp[i].offset);
+ sparsearray[i].numbytes =
+ from_oct (1 + 12, head->header.sp[i].numbytes);
+ if (!sparsearray[i].numbytes)
+ break;
+ }
+
+ /* end_nulls = from_oct(1+12, head->header.ending_blanks);*/
+
+ if (head->header.isextended)
+ {
+ /* read in the list of extended headers
+ and translate them into the sparsearray
+ as before */
+
+ /* static */ int ind = SPARSE_IN_HDR;
+
+ for (;;)
+ {
+
+ exhdr = findrec ();
+ for (i = 0; i < SPARSE_EXT_HDR; i++)
+ {
+
+ if (i + ind > sp_array_size - 1)
+ {
+ /*
+ * realloc the scratch area
+ * since we've run out of room --
+ */
+ sparsearray = (struct sp_array *)
+ ck_realloc (sparsearray,
+ 2 * sp_array_size * (sizeof (struct sp_array)));
+ sp_array_size *= 2;
+ }
+ if (!exhdr->ext_hdr.sp[i].numbytes)
+ break;
+ sparsearray[i + ind].offset =
+ from_oct (1 + 12, exhdr->ext_hdr.sp[i].offset);
+ sparsearray[i + ind].numbytes =
+ from_oct (1 + 12, exhdr->ext_hdr.sp[i].numbytes);
+ }
+ if (!exhdr->ext_hdr.isextended)
+ break;
+ else
+ {
+ ind += SPARSE_EXT_HDR;
+ userec (exhdr);
+ }
+ }
+ userec (exhdr);
+ }
+
+ /* FALL THRU */
+ case LF_OLDNORMAL:
+ case LF_NORMAL:
+ case LF_CONTIG:
+ /*
+ * Appears to be a file.
+ * See if it's really a directory.
+ */
+ namelen = strlen (skipcrud + current_file_name) - 1;
+ if (current_file_name[skipcrud + namelen] == '/')
+ goto really_dir;
+
+ /* FIXME, deal with protection issues */
+ again_file:
+ openflag = (f_keep ?
+ O_BINARY | O_NDELAY | O_WRONLY | O_CREAT | O_EXCL :
+ O_BINARY | O_NDELAY | O_WRONLY | O_CREAT | O_TRUNC)
+ | ((head->header.linkflag == LF_SPARSE) ? 0 : O_APPEND);
+ /*
+ * JK - The last | is a kludge to solve the problem
+ * the O_APPEND flag causes with files we are
+ * trying to make sparse: when a file is opened
+ * with O_APPEND, it writes to the last place
+ * that something was written, thereby ignoring
+ * any lseeks that we have done. We add this
+ * extra condition to make it able to lseek when
+ * a file is sparse, i.e., we don't open the new
+ * file with this flag. (Grump -- this bug caused
+ * me to waste a good deal of time, I might add)
+ */
+
+ if (f_exstdout)
+ {
+ fd = 1;
+ goto extract_file;
+ }
+#ifdef O_CTG
+ /*
+ * Contiguous files (on the Masscomp) have to specify
+ * the size in the open call that creates them.
+ */
+ if (head->header.linkflag == LF_CONTIG)
+ fd = open ((longname ? longname : head->header.name)
+ + skipcrud,
+ openflag | O_CTG,
+ hstat.st_mode, hstat.st_size);
+ else
+#endif
+ {
+#ifdef NO_OPEN3
+ /*
+ * On raw V7 we won't let them specify -k (f_keep), but
+ * we just bull ahead and create the files.
+ */
+ fd = creat ((longname
+ ? longname
+ : head->header.name) + skipcrud,
+ hstat.st_mode);
+#else
+ /*
+ * With 3-arg open(), we can do this up right.
+ */
+ fd = open (skipcrud + current_file_name,
+ openflag, hstat.st_mode);
+#endif
+ }
+
+ if (fd < 0)
+ {
+ if (make_dirs (skipcrud + current_file_name))
+ goto again_file;
+ msg_perror ("Could not create file %s",
+ skipcrud + current_file_name);
+ if (head->header.isextended)
+ skip_extended_headers ();
+ skip_file ((long) hstat.st_size);
+ goto quit;
+ }
+
+ extract_file:
+ if (head->header.linkflag == LF_SPARSE)
+ {
+ char *name;
+ int namelen;
+
+ /*
+ * Kludge alert. NAME is assigned to header.name
+ * because during the extraction, the space that
+ * contains the header will get scribbled on, and
+ * the name will get munged, so any error messages
+ * that happen to contain the filename will look
+ * REAL interesting unless we do this.
+ */
+ namelen = strlen (skipcrud + current_file_name) + 1;
+ name = (char *) ck_malloc ((sizeof (char)) * namelen);
+ bcopy (skipcrud + current_file_name, name, namelen);
+ size = hstat.st_size;
+ extract_sparse_file (fd, &size, hstat.st_size, name);
+ }
+ else
+ for (size = hstat.st_size;
+ size > 0;
+ size -= written)
+ {
+
+ /* long offset,
+ numbytes;*/
+
+ if (f_multivol)
+ {
+ save_name = current_file_name;
+ save_totsize = hstat.st_size;
+ save_sizeleft = size;
+ }
+
+ /*
+ * Locate data, determine max length
+ * writeable, write it, record that
+ * we have used the data, then check
+ * if the write worked.
+ */
+ data = findrec ()->charptr;
+ if (data == NULL)
+ { /* Check it... */
+ msg ("Unexpected EOF on archive file");
+ break;
+ }
+ /*
+ * JK - If the file is sparse, use the sparsearray
+ * that we created before to lseek into the new
+ * file the proper amount, and to see how many
+ * bytes we want to write at that position.
+ */
+ /* if (head->header.linkflag == LF_SPARSE) {
+ off_t pos;
+
+ pos = lseek(fd, (off_t) sparsearray[sparse_ind].offset, 0);
+ printf("%d at %d\n", (int) pos, sparse_ind);
+ written = sparsearray[sparse_ind++].numbytes;
+ } else*/
+ written = endofrecs ()->charptr - data;
+ if (written > size)
+ written = size;
+ errno = 0;
+ check = write (fd, data, written);
+ /*
+ * The following is in violation of strict
+ * typing, since the arg to userec
+ * should be a struct rec *. FIXME.
+ */
+ userec ((union record *) (data + written - 1));
+ if (check == written)
+ continue;
+ /*
+ * Error in writing to file.
+ * Print it, skip to next file in archive.
+ */
+ if (check < 0)
+ msg_perror ("couldn't write to file %s",
+ skipcrud + current_file_name);
+ else
+ msg ("could only write %d of %d bytes to file %s",
+ check, written, skipcrud + current_file_name);
+ skip_file ((long) (size - written));
+ break; /* Still do the close, mod time, chmod, etc */
+ }
+
+ if (f_multivol)
+ save_name = 0;
+
+ /* If writing to stdout, don't try to do anything
+ to the filename; it doesn't exist, or we don't
+ want to touch it anyway */
+ if (f_exstdout)
+ break;
+
+ /* if (head->header.isextended) {
+ register union record *exhdr;
+ register int i;
+
+ for (i = 0; i < 21; i++) {
+ long offset;
+
+ if (!exhdr->ext_hdr.sp[i].numbytes)
+ break;
+ offset = from_oct(1+12,
+ exhdr->ext_hdr.sp[i].offset);
+ written = from_oct(1+12,
+ exhdr->ext_hdr.sp[i].numbytes);
+ lseek(fd, offset, 0);
+ check = write(fd, data, written);
+ if (check == written) continue;
+
+ }
+
+
+ }*/
+ check = close (fd);
+ if (check < 0)
+ {
+ msg_perror ("Error while closing %s",
+ skipcrud + current_file_name);
+ }
+
+
+ set_filestat:
+
+ /*
+ * If we are root, set the owner and group of the extracted
+ * file. This does what is wanted both on real Unix and on
+ * System V. If we are running as a user, we extract as that
+ * user; if running as root, we extract as the original owner.
+ */
+ if (we_are_root || f_do_chown)
+ {
+ if (chown (skipcrud + current_file_name,
+ hstat.st_uid, hstat.st_gid) < 0)
+ {
+ msg_perror ("cannot chown file %s to uid %d gid %d",
+ skipcrud + current_file_name,
+ hstat.st_uid, hstat.st_gid);
+ }
+ }
+
+ /*
+ * Set the modified time of the file.
+ *
+ * Note that we set the accessed time to "now", which
+ * is really "the time we started extracting files".
+ * unless f_gnudump is used, in which case .st_atime is used
+ */
+ if (!f_modified)
+ {
+ /* fixme if f_gnudump should set ctime too, but how? */
+ if (f_gnudump)
+ acc_upd_times.actime = hstat.st_atime;
+ else
+ acc_upd_times.actime = now; /* Accessed now */
+ acc_upd_times.modtime = hstat.st_mtime; /* Mod'd */
+ if (utime (skipcrud + current_file_name,
+ &acc_upd_times) < 0)
+ {
+ msg_perror ("couldn't change access and modification times of %s", skipcrud + current_file_name);
+ }
+ }
+ /* We do the utime before the chmod because some versions of
+ utime are broken and trash the modes of the file. Since
+ we then change the mode anyway, we don't care. . . */
+
+ /*
+ * If '-k' is not set, open() or creat() could have saved
+ * the permission bits from a previously created file,
+ * ignoring the ones we specified.
+ * Even if -k is set, if the file has abnormal
+ * mode bits, we must chmod since writing or chown() has
+ * probably reset them.
+ *
+ * If -k is set, we know *we* created this file, so the mode
+ * bits were set by our open(). If the file is "normal", we
+ * skip the chmod. This works because we did umask(0) if -p
+ * is set, so umask will have left the specified mode alone.
+ */
+ if ((!f_keep)
+ || (hstat.st_mode & (S_ISUID | S_ISGID | S_ISVTX)))
+ {
+ if (chmod (skipcrud + current_file_name,
+ notumask & (int) hstat.st_mode) < 0)
+ {
+ msg_perror ("cannot change mode of file %s to %ld",
+ skipcrud + current_file_name,
+ notumask & (int) hstat.st_mode);
+ }
+ }
+
+ quit:
+ break;
+
+ case LF_LINK:
+ again_link:
+ {
+ struct stat st1, st2;
+
+ check = link (current_link_name, skipcrud + current_file_name);
+
+ if (check == 0)
+ break;
+ if (make_dirs (skipcrud + current_file_name))
+ goto again_link;
+ if (f_gnudump && errno == EEXIST)
+ break;
+ if (stat (current_link_name, &st1) == 0
+ && stat (current_file_name + skipcrud, &st2) == 0
+ && st1.st_dev == st2.st_dev
+ && st1.st_ino == st2.st_ino)
+ break;
+ msg_perror ("Could not link %s to %s",
+ skipcrud + current_file_name,
+ current_link_name);
+ }
+ break;
+
+#ifdef S_ISLNK
+ case LF_SYMLINK:
+ again_symlink:
+ check = symlink (current_link_name,
+ skipcrud + current_file_name);
+ /* FIXME, don't worry uid, gid, etc... */
+ if (check == 0)
+ break;
+ if (make_dirs (current_file_name + skipcrud))
+ goto again_symlink;
+ msg_perror ("Could not create symlink to %s",
+ current_link_name);
+ break;
+#endif
+
+#ifdef S_IFCHR
+ case LF_CHR:
+ hstat.st_mode |= S_IFCHR;
+ goto make_node;
+#endif
+
+#ifdef S_IFBLK
+ case LF_BLK:
+ hstat.st_mode |= S_IFBLK;
+#endif
+#if defined(S_IFCHR) || defined(S_IFBLK)
+ make_node:
+ check = mknod (current_file_name + skipcrud,
+ (int) hstat.st_mode, (int) hstat.st_rdev);
+ if (check != 0)
+ {
+ if (make_dirs (skipcrud + current_file_name))
+ goto make_node;
+ msg_perror ("Could not make %s",
+ current_file_name + skipcrud);
+ break;
+ };
+ goto set_filestat;
+#endif
+
+#ifdef S_ISFIFO
+ /* If local system doesn't support FIFOs, use default case */
+ case LF_FIFO:
+ make_fifo:
+ check = mkfifo (current_file_name + skipcrud,
+ (int) hstat.st_mode);
+ if (check != 0)
+ {
+ if (make_dirs (current_file_name + skipcrud))
+ goto make_fifo;
+ msg_perror ("Could not make %s",
+ skipcrud + current_file_name);
+ break;
+ };
+ goto set_filestat;
+#endif
+
+ case LF_DIR:
+ case LF_DUMPDIR:
+ namelen = strlen (current_file_name + skipcrud) - 1;
+ really_dir:
+ /* Check for trailing /, and zap as many as we find. */
+ while (namelen
+ && current_file_name[skipcrud + namelen] == '/')
+ current_file_name[skipcrud + namelen--] = '\0';
+ if (f_gnudump)
+ { /* Read the entry and delete files
+ that aren't listed in the archive */
+ gnu_restore (skipcrud);
+
+ }
+ else if (head->header.linkflag == LF_DUMPDIR)
+ skip_file ((long) (hstat.st_size));
+
+
+ again_dir:
+ check = mkdir (skipcrud + current_file_name,
+ (we_are_root ? 0 : 0300) | (int) hstat.st_mode);
+ if (check != 0)
+ {
+ struct stat st1;
+
+ if (make_dirs (skipcrud + current_file_name))
+ goto again_dir;
+ /* If we're trying to create '.', let it be. */
+ if (current_file_name[skipcrud + namelen] == '.' &&
+ (namelen == 0 ||
+ current_file_name[skipcrud + namelen - 1] == '/'))
+ goto check_perms;
+ if (errno == EEXIST
+ && stat (skipcrud + current_file_name, &st1) == 0
+ && (S_ISDIR (st1.st_mode)))
+ break;
+ msg_perror ("Could not create directory %s", skipcrud + current_file_name);
+ break;
+ }
+
+ check_perms:
+ if (!we_are_root && 0300 != (0300 & (int) hstat.st_mode))
+ {
+ hstat.st_mode |= 0300;
+ msg ("Added write and execute permission to directory %s",
+ skipcrud + current_file_name);
+ }
+
+ /*
+ * If we are root, set the owner and group of the extracted
+ * file. This does what is wanted both on real Unix and on
+ * System V. If we are running as a user, we extract as that
+ * user; if running as root, we extract as the original owner.
+ */
+ if (we_are_root || f_do_chown)
+ {
+ if (chown (skipcrud + current_file_name,
+ hstat.st_uid, hstat.st_gid) < 0)
+ {
+ msg_perror ("cannot chown file %s to uid %d gid %d",
+ skipcrud + current_file_name,
+ hstat.st_uid, hstat.st_gid);
+ }
+ }
+
+ if (!f_modified)
+ {
+ tmp = ((struct saved_dir_info *)
+ ck_malloc (sizeof (struct saved_dir_info)));
+ tmp->path = (char *) ck_malloc (strlen (skipcrud
+ + current_file_name) + 1);
+ strcpy (tmp->path, skipcrud + current_file_name);
+ tmp->mode = hstat.st_mode;
+ tmp->atime = hstat.st_atime;
+ tmp->mtime = hstat.st_mtime;
+ tmp->next = saved_dir_info_head;
+ saved_dir_info_head = tmp;
+ }
+ else
+ {
+ /* This functions exactly as the code for set_filestat above. */
+ if ((!f_keep)
+ || (hstat.st_mode & (S_ISUID | S_ISGID | S_ISVTX)))
+ {
+ if (chmod (skipcrud + current_file_name,
+ notumask & (int) hstat.st_mode) < 0)
+ {
+ msg_perror ("cannot change mode of file %s to %ld",
+ skipcrud + current_file_name,
+ notumask & (int) hstat.st_mode);
+ }
+ }
+ }
+ break;
+
+ case LF_VOLHDR:
+ if (f_verbose)
+ {
+ printf ("Reading %s\n", current_file_name);
+ }
+ break;
+
+ case LF_NAMES:
+ extract_mangle (head);
+ break;
+
+ case LF_MULTIVOL:
+ msg ("Can't extract '%s'--file is continued from another volume\n", current_file_name);
+ skip_file ((long) hstat.st_size);
+ break;
+
+ case LF_LONGNAME:
+ case LF_LONGLINK:
+ msg ("Visible long name error\n");
+ skip_file ((long) hstat.st_size);
+ break;
+ }
+
+ /* We don't need to save it any longer. */
+ saverec ((union record **) 0);/* Unsave it */
+}
+
+/*
+ * After a file/link/symlink/dir creation has failed, see if
+ * it's because some required directory was not present, and if
+ * so, create all required dirs.
+ */
+int
+make_dirs (pathname)
+ char *pathname;
+{
+ char *p; /* Points into path */
+ int madeone = 0; /* Did we do anything yet? */
+ int save_errno = errno; /* Remember caller's errno */
+ int check;
+
+ if (errno != ENOENT)
+ return 0; /* Not our problem */
+
+ for (p = index (pathname, '/'); p != NULL; p = index (p + 1, '/'))
+ {
+ /* Avoid mkdir of empty string, if leading or double '/' */
+ if (p == pathname || p[-1] == '/')
+ continue;
+ /* Avoid mkdir where last part of path is '.' */
+ if (p[-1] == '.' && (p == pathname + 1 || p[-2] == '/'))
+ continue;
+ *p = 0; /* Truncate the path there */
+ check = mkdir (pathname, 0777); /* Try to create it as a dir */
+ if (check == 0)
+ {
+ /* Fix ownership */
+ if (we_are_root)
+ {
+ if (chown (pathname, hstat.st_uid,
+ hstat.st_gid) < 0)
+ {
+ msg_perror ("cannot change owner of %s to uid %d gid %d", pathname, hstat.st_uid, hstat.st_gid);
+ }
+ }
+ pr_mkdir (pathname, p - pathname, notumask & 0777);
+ madeone++; /* Remember if we made one */
+ *p = '/';
+ continue;
+ }
+ *p = '/';
+ if (errno == EEXIST) /* Directory already exists */
+ continue;
+ /*
+ * Some other error in the mkdir. We return to the caller.
+ */
+ break;
+ }
+
+ errno = save_errno; /* Restore caller's errno */
+ return madeone; /* Tell them to retry if we made one */
+}
+
+void
+extract_sparse_file (fd, sizeleft, totalsize, name)
+ int fd;
+ long *sizeleft, totalsize;
+ char *name;
+{
+ /* register char *data;*/
+ union record *datarec;
+ int sparse_ind = 0;
+ int written, count;
+
+ /* assuming sizeleft is initially totalsize */
+
+
+ while (*sizeleft > 0)
+ {
+ datarec = findrec ();
+ if (datarec == NULL)
+ {
+ msg ("Unexpected EOF on archive file");
+ return;
+ }
+ lseek (fd, sparsearray[sparse_ind].offset, 0);
+ written = sparsearray[sparse_ind++].numbytes;
+ while (written > RECORDSIZE)
+ {
+ count = write (fd, datarec->charptr, RECORDSIZE);
+ if (count < 0)
+ msg_perror ("couldn't write to file %s", name);
+ written -= count;
+ *sizeleft -= count;
+ userec (datarec);
+ datarec = findrec ();
+ }
+
+ count = write (fd, datarec->charptr, written);
+
+ if (count < 0)
+ {
+ msg_perror ("couldn't write to file %s", name);
+ }
+ else if (count != written)
+ {
+ msg ("could only write %d of %d bytes to file %s", count,
+ totalsize, name);
+ skip_file ((long) (*sizeleft));
+ }
+
+ written -= count;
+ *sizeleft -= count;
+ userec (datarec);
+ }
+ free (sparsearray);
+ /* if (end_nulls) {
+ register int i;
+
+ printf("%d\n", (int) end_nulls);
+ for (i = 0; i < end_nulls; i++)
+ write(fd, "\000", 1);
+ }*/
+ userec (datarec);
+}
+
+/* Set back the utime and mode for all the extracted directories. */
+void
+restore_saved_dir_info ()
+{
+ struct utimbuf acc_upd_times;
+
+ while (saved_dir_info_head != NULL)
+ {
+ /* fixme if f_gnudump should set ctime too, but how? */
+ if (f_gnudump)
+ acc_upd_times.actime = saved_dir_info_head->atime;
+ else
+ acc_upd_times.actime = now; /* Accessed now */
+ acc_upd_times.modtime = saved_dir_info_head->mtime; /* Mod'd */
+ if (utime (saved_dir_info_head->path, &acc_upd_times) < 0)
+ {
+ msg_perror ("couldn't change access and modification times of %s",
+ saved_dir_info_head->path);
+ }
+ if ((!f_keep) || (saved_dir_info_head->mode & (S_ISUID | S_ISGID | S_ISVTX)))
+ {
+ if (chmod (saved_dir_info_head->path,
+ notumask & saved_dir_info_head->mode) < 0)
+ {
+ msg_perror ("cannot change mode of file %s to %ld",
+ saved_dir_info_head->path,
+ notumask & saved_dir_info_head->mode);
+ }
+ }
+ saved_dir_info_head = saved_dir_info_head->next;
+ }
+}
diff --git a/gnu/usr.bin/tar/fnmatch.c b/gnu/usr.bin/tar/fnmatch.c
new file mode 100644
index 000000000000..ed8c9eea7e35
--- /dev/null
+++ b/gnu/usr.bin/tar/fnmatch.c
@@ -0,0 +1,173 @@
+/* Copyright (C) 1991, 1992 Free Software Foundation, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <errno.h>
+#include <fnmatch.h>
+
+#if !defined(__GNU_LIBRARY__) && !defined(STDC_HEADERS)
+extern int errno;
+#endif
+
+/* Match STRING against the filename pattern PATTERN, returning zero if
+ it matches, nonzero if not. */
+int
+fnmatch (pattern, string, flags)
+ const char *pattern;
+ const char *string;
+ int flags;
+{
+ register const char *p = pattern, *n = string;
+ register char c;
+
+ if ((flags & ~__FNM_FLAGS) != 0)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ while ((c = *p++) != '\0')
+ {
+ switch (c)
+ {
+ case '?':
+ if (*n == '\0')
+ return FNM_NOMATCH;
+ else if ((flags & FNM_PATHNAME) && *n == '/')
+ return FNM_NOMATCH;
+ else if ((flags & FNM_PERIOD) && *n == '.' &&
+ (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
+ return FNM_NOMATCH;
+ break;
+
+ case '\\':
+ if (!(flags & FNM_NOESCAPE))
+ c = *p++;
+ if (*n != c)
+ return FNM_NOMATCH;
+ break;
+
+ case '*':
+ if ((flags & FNM_PERIOD) && *n == '.' &&
+ (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
+ return FNM_NOMATCH;
+
+ for (c = *p++; c == '?' || c == '*'; c = *p++, ++n)
+ if (((flags & FNM_PATHNAME) && *n == '/') ||
+ (c == '?' && *n == '\0'))
+ return FNM_NOMATCH;
+
+ if (c == '\0')
+ return 0;
+
+ {
+ char c1 = (!(flags & FNM_NOESCAPE) && c == '\\') ? *p : c;
+ for (--p; *n != '\0'; ++n)
+ if ((c == '[' || *n == c1) &&
+ fnmatch (p, n, flags & ~FNM_PERIOD) == 0)
+ return 0;
+ return FNM_NOMATCH;
+ }
+
+ case '[':
+ {
+ /* Nonzero if the sense of the character class is inverted. */
+ register int not;
+
+ if (*n == '\0')
+ return FNM_NOMATCH;
+
+ if ((flags & FNM_PERIOD) && *n == '.' &&
+ (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
+ return FNM_NOMATCH;
+
+ not = (*p == '!' || *p == '^');
+ if (not)
+ ++p;
+
+ c = *p++;
+ for (;;)
+ {
+ register char cstart = c, cend = c;
+
+ if (!(flags & FNM_NOESCAPE) && c == '\\')
+ cstart = cend = *p++;
+
+ if (c == '\0')
+ /* [ (unterminated) loses. */
+ return FNM_NOMATCH;
+
+ c = *p++;
+
+ if ((flags & FNM_PATHNAME) && c == '/')
+ /* [/] can never match. */
+ return FNM_NOMATCH;
+
+ if (c == '-' && *p != ']')
+ {
+ cend = *p++;
+ if (!(flags & FNM_NOESCAPE) && cend == '\\')
+ cend = *p++;
+ if (cend == '\0')
+ return FNM_NOMATCH;
+ c = *p++;
+ }
+
+ if (*n >= cstart && *n <= cend)
+ goto matched;
+
+ if (c == ']')
+ break;
+ }
+ if (!not)
+ return FNM_NOMATCH;
+ break;
+
+ matched:;
+ /* Skip the rest of the [...] that already matched. */
+ while (c != ']')
+ {
+ if (c == '\0')
+ /* [... (unterminated) loses. */
+ return FNM_NOMATCH;
+
+ c = *p++;
+ if (!(flags & FNM_NOESCAPE) && c == '\\')
+ /* 1003.2d11 is unclear if this is right. %%% */
+ ++p;
+ }
+ if (not)
+ return FNM_NOMATCH;
+ }
+ break;
+
+ default:
+ if (c != *n)
+ return FNM_NOMATCH;
+ }
+
+ ++n;
+ }
+
+ if (*n == '\0')
+ return 0;
+
+ if ((flags & FNM_LEADING_DIR) && *n == '/')
+ /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
+ return 0;
+
+ return FNM_NOMATCH;
+}
diff --git a/gnu/usr.bin/tar/fnmatch.h b/gnu/usr.bin/tar/fnmatch.h
new file mode 100644
index 000000000000..d4150a9a9969
--- /dev/null
+++ b/gnu/usr.bin/tar/fnmatch.h
@@ -0,0 +1,62 @@
+/* Copyright (C) 1991, 1992 Free Software Foundation, Inc.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#ifndef _FNMATCH_H
+
+#define _FNMATCH_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined (__cplusplus) || (defined (__STDC__) && __STDC__)
+#undef __P
+#define __P(args) args
+#else /* Not C++ or ANSI C. */
+#undef __P
+#define __P(args) ()
+#undef const
+#define const
+#endif /* C++ or ANSI C. */
+
+/* Bits set in the FLAGS argument to `fnmatch'. */
+#ifdef FNM_PATHNAME /* Because it is already defined in <unistd.h> */
+#undef FNM_PATHNAME
+#endif
+#define FNM_PATHNAME (1 << 0)/* No wildcard can ever match `/'. */
+#define FNM_NOESCAPE (1 << 1)/* Backslashes don't quote special chars. */
+#define FNM_PERIOD (1 << 2)/* Leading `.' is matched only explicitly. */
+#define __FNM_FLAGS (FNM_PATHNAME|FNM_NOESCAPE|FNM_PERIOD|FNM_LEADING_DIR)
+
+#if !defined (_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 2 || defined (_BSD_SOURCE)
+#define FNM_LEADING_DIR (1 << 3)/* Ignore `/...' after a match. */
+#define FNM_FILE_NAME FNM_PATHNAME
+#endif
+
+/* Value returned by `fnmatch' if STRING does not match PATTERN. */
+#define FNM_NOMATCH 1
+
+/* Match STRING against the filename pattern PATTERN,
+ returning zero if it matches, FNM_NOMATCH if not. */
+extern int fnmatch __P ((const char *__pattern, const char *__string,
+ int __flags));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* fnmatch.h */
diff --git a/gnu/usr.bin/tar/getdate.y b/gnu/usr.bin/tar/getdate.y
new file mode 100644
index 000000000000..7b0ac79924f4
--- /dev/null
+++ b/gnu/usr.bin/tar/getdate.y
@@ -0,0 +1,969 @@
+%{
+/* $Revision: 2.1 $
+**
+** Originally written by Steven M. Bellovin <smb@research.att.com> while
+** at the University of North Carolina at Chapel Hill. Later tweaked by
+** a couple of people on Usenet. Completely overhauled by Rich $alz
+** <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990;
+** send any email to Rich.
+**
+** This grammar has eight shift/reduce conflicts.
+**
+** This code is in the public domain and has no copyright.
+*/
+/* SUPPRESS 287 on yaccpar_sccsid *//* Unusd static variable */
+/* SUPPRESS 288 on yyerrlab *//* Label unused */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else
+#ifdef HAVE_ALLOCA_H
+#include <alloca.h>
+#else
+#ifdef _AIX /* for Bison */
+ #pragma alloca
+#else
+char *alloca ();
+#endif
+#endif
+#endif
+
+#include <stdio.h>
+#include <ctype.h>
+
+/* The code at the top of get_date which figures out the offset of the
+ current time zone checks various CPP symbols to see if special
+ tricks are need, but defaults to using the gettimeofday system call.
+ Include <sys/time.h> if that will be used. */
+
+#if !defined (USG) && !defined (sgi) && !defined (__386BSD__)
+#include <sys/time.h>
+#endif
+
+#if defined(vms)
+
+#include <types.h>
+#include <time.h>
+
+#else
+
+#include <sys/types.h>
+
+#if defined(USG) || !defined(HAVE_FTIME)
+/*
+** If you need to do a tzset() call to set the
+** timezone, and don't have ftime().
+*/
+struct timeb {
+ time_t time; /* Seconds since the epoch */
+ unsigned short millitm; /* Field not used */
+ short timezone;
+ short dstflag; /* Field not used */
+};
+
+#else
+
+#include <sys/timeb.h>
+
+#endif /* defined(USG) && !defined(HAVE_FTIME) */
+
+#if defined(BSD4_2) || defined(BSD4_1C) || (defined (hp9000) && !defined (hpux))
+#include <sys/time.h>
+#else
+#if defined(_AIX)
+#include <sys/time.h>
+#endif
+#include <time.h>
+#endif /* defined(BSD4_2) */
+
+#endif /* defined(vms) */
+
+#if defined (STDC_HEADERS) || defined (USG)
+#include <string.h>
+#endif
+
+#if sgi
+#undef timezone
+#endif
+
+extern struct tm *localtime();
+
+#define yyparse getdate_yyparse
+#define yylex getdate_yylex
+#define yyerror getdate_yyerror
+
+#if !defined(lint) && !defined(SABER)
+static char RCS[] =
+ "$Header: str2date.y,v 2.1 90/09/06 08:15:06 cronan Exp $";
+#endif /* !defined(lint) && !defined(SABER) */
+
+
+#define EPOCH 1970
+#define HOUR(x) ((time_t)(x) * 60)
+#define SECSPERDAY (24L * 60L * 60L)
+
+
+/*
+** An entry in the lexical lookup table.
+*/
+typedef struct _TABLE {
+ char *name;
+ int type;
+ time_t value;
+} TABLE;
+
+
+/*
+** Daylight-savings mode: on, off, or not yet known.
+*/
+typedef enum _DSTMODE {
+ DSTon, DSToff, DSTmaybe
+} DSTMODE;
+
+/*
+** Meridian: am, pm, or 24-hour style.
+*/
+typedef enum _MERIDIAN {
+ MERam, MERpm, MER24
+} MERIDIAN;
+
+
+/*
+** Global variables. We could get rid of most of these by using a good
+** union as the yacc stack. (This routine was originally written before
+** yacc had the %union construct.) Maybe someday; right now we only use
+** the %union very rarely.
+*/
+static char *yyInput;
+static DSTMODE yyDSTmode;
+static time_t yyDayOrdinal;
+static time_t yyDayNumber;
+static int yyHaveDate;
+static int yyHaveDay;
+static int yyHaveRel;
+static int yyHaveTime;
+static int yyHaveZone;
+static time_t yyTimezone;
+static time_t yyDay;
+static time_t yyHour;
+static time_t yyMinutes;
+static time_t yyMonth;
+static time_t yySeconds;
+static time_t yyYear;
+static MERIDIAN yyMeridian;
+static time_t yyRelMonth;
+static time_t yyRelSeconds;
+
+%}
+
+%union {
+ time_t Number;
+ enum _MERIDIAN Meridian;
+}
+
+%token tAGO tDAY tDAYZONE tID tMERIDIAN tMINUTE_UNIT tMONTH tMONTH_UNIT
+%token tSEC_UNIT tSNUMBER tUNUMBER tZONE tDST
+
+%type <Number> tDAY tDAYZONE tMINUTE_UNIT tMONTH tMONTH_UNIT
+%type <Number> tSEC_UNIT tSNUMBER tUNUMBER tZONE
+%type <Meridian> tMERIDIAN o_merid
+
+%%
+
+spec : /* NULL */
+ | spec item
+ ;
+
+item : time {
+ yyHaveTime++;
+ }
+ | zone {
+ yyHaveZone++;
+ }
+ | date {
+ yyHaveDate++;
+ }
+ | day {
+ yyHaveDay++;
+ }
+ | rel {
+ yyHaveRel++;
+ }
+ | number
+ ;
+
+time : tUNUMBER tMERIDIAN {
+ yyHour = $1;
+ yyMinutes = 0;
+ yySeconds = 0;
+ yyMeridian = $2;
+ }
+ | tUNUMBER ':' tUNUMBER o_merid {
+ yyHour = $1;
+ yyMinutes = $3;
+ yySeconds = 0;
+ yyMeridian = $4;
+ }
+ | tUNUMBER ':' tUNUMBER tSNUMBER {
+ yyHour = $1;
+ yyMinutes = $3;
+ yyMeridian = MER24;
+ yyDSTmode = DSToff;
+ yyTimezone = - ($4 % 100 + ($4 / 100) * 60);
+ }
+ | tUNUMBER ':' tUNUMBER ':' tUNUMBER o_merid {
+ yyHour = $1;
+ yyMinutes = $3;
+ yySeconds = $5;
+ yyMeridian = $6;
+ }
+ | tUNUMBER ':' tUNUMBER ':' tUNUMBER tSNUMBER {
+ yyHour = $1;
+ yyMinutes = $3;
+ yySeconds = $5;
+ yyMeridian = MER24;
+ yyDSTmode = DSToff;
+ yyTimezone = - ($6 % 100 + ($6 / 100) * 60);
+ }
+ ;
+
+zone : tZONE {
+ yyTimezone = $1;
+ yyDSTmode = DSToff;
+ }
+ | tDAYZONE {
+ yyTimezone = $1;
+ yyDSTmode = DSTon;
+ }
+ |
+ tZONE tDST {
+ yyTimezone = $1;
+ yyDSTmode = DSTon;
+ }
+ ;
+
+day : tDAY {
+ yyDayOrdinal = 1;
+ yyDayNumber = $1;
+ }
+ | tDAY ',' {
+ yyDayOrdinal = 1;
+ yyDayNumber = $1;
+ }
+ | tUNUMBER tDAY {
+ yyDayOrdinal = $1;
+ yyDayNumber = $2;
+ }
+ ;
+
+date : tUNUMBER '/' tUNUMBER {
+ yyMonth = $1;
+ yyDay = $3;
+ }
+ | tUNUMBER '/' tUNUMBER '/' tUNUMBER {
+ yyMonth = $1;
+ yyDay = $3;
+ yyYear = $5;
+ }
+ | tUNUMBER tSNUMBER tSNUMBER {
+ /* ISO 8601 format. yyyy-mm-dd. */
+ yyYear = $1;
+ yyMonth = -$2;
+ yyDay = -$3;
+ }
+ | tMONTH tUNUMBER {
+ yyMonth = $1;
+ yyDay = $2;
+ }
+ | tMONTH tUNUMBER ',' tUNUMBER {
+ yyMonth = $1;
+ yyDay = $2;
+ yyYear = $4;
+ }
+ | tUNUMBER tMONTH {
+ yyMonth = $2;
+ yyDay = $1;
+ }
+ | tUNUMBER tMONTH tUNUMBER {
+ yyMonth = $2;
+ yyDay = $1;
+ yyYear = $3;
+ }
+ ;
+
+rel : relunit tAGO {
+ yyRelSeconds = -yyRelSeconds;
+ yyRelMonth = -yyRelMonth;
+ }
+ | relunit
+ ;
+
+relunit : tUNUMBER tMINUTE_UNIT {
+ yyRelSeconds += $1 * $2 * 60L;
+ }
+ | tSNUMBER tMINUTE_UNIT {
+ yyRelSeconds += $1 * $2 * 60L;
+ }
+ | tMINUTE_UNIT {
+ yyRelSeconds += $1 * 60L;
+ }
+ | tSNUMBER tSEC_UNIT {
+ yyRelSeconds += $1;
+ }
+ | tUNUMBER tSEC_UNIT {
+ yyRelSeconds += $1;
+ }
+ | tSEC_UNIT {
+ yyRelSeconds++;
+ }
+ | tSNUMBER tMONTH_UNIT {
+ yyRelMonth += $1 * $2;
+ }
+ | tUNUMBER tMONTH_UNIT {
+ yyRelMonth += $1 * $2;
+ }
+ | tMONTH_UNIT {
+ yyRelMonth += $1;
+ }
+ ;
+
+number : tUNUMBER {
+ if (yyHaveTime && yyHaveDate && !yyHaveRel)
+ yyYear = $1;
+ else {
+ if($1>10000) {
+ time_t date_part;
+
+ date_part= $1/10000;
+ yyHaveDate++;
+ yyDay= (date_part)%100;
+ yyMonth= (date_part/100)%100;
+ yyYear = date_part/10000;
+ }
+ yyHaveTime++;
+ if ($1 < 100) {
+ yyHour = $1;
+ yyMinutes = 0;
+ }
+ else {
+ yyHour = $1 / 100;
+ yyMinutes = $1 % 100;
+ }
+ yySeconds = 0;
+ yyMeridian = MER24;
+ }
+ }
+ ;
+
+o_merid : /* NULL */ {
+ $$ = MER24;
+ }
+ | tMERIDIAN {
+ $$ = $1;
+ }
+ ;
+
+%%
+
+/* Month and day table. */
+static TABLE const MonthDayTable[] = {
+ { "january", tMONTH, 1 },
+ { "february", tMONTH, 2 },
+ { "march", tMONTH, 3 },
+ { "april", tMONTH, 4 },
+ { "may", tMONTH, 5 },
+ { "june", tMONTH, 6 },
+ { "july", tMONTH, 7 },
+ { "august", tMONTH, 8 },
+ { "september", tMONTH, 9 },
+ { "sept", tMONTH, 9 },
+ { "october", tMONTH, 10 },
+ { "november", tMONTH, 11 },
+ { "december", tMONTH, 12 },
+ { "sunday", tDAY, 0 },
+ { "monday", tDAY, 1 },
+ { "tuesday", tDAY, 2 },
+ { "tues", tDAY, 2 },
+ { "wednesday", tDAY, 3 },
+ { "wednes", tDAY, 3 },
+ { "thursday", tDAY, 4 },
+ { "thur", tDAY, 4 },
+ { "thurs", tDAY, 4 },
+ { "friday", tDAY, 5 },
+ { "saturday", tDAY, 6 },
+ { NULL }
+};
+
+/* Time units table. */
+static TABLE const UnitsTable[] = {
+ { "year", tMONTH_UNIT, 12 },
+ { "month", tMONTH_UNIT, 1 },
+ { "fortnight", tMINUTE_UNIT, 14 * 24 * 60 },
+ { "week", tMINUTE_UNIT, 7 * 24 * 60 },
+ { "day", tMINUTE_UNIT, 1 * 24 * 60 },
+ { "hour", tMINUTE_UNIT, 60 },
+ { "minute", tMINUTE_UNIT, 1 },
+ { "min", tMINUTE_UNIT, 1 },
+ { "second", tSEC_UNIT, 1 },
+ { "sec", tSEC_UNIT, 1 },
+ { NULL }
+};
+
+/* Assorted relative-time words. */
+static TABLE const OtherTable[] = {
+ { "tomorrow", tMINUTE_UNIT, 1 * 24 * 60 },
+ { "yesterday", tMINUTE_UNIT, -1 * 24 * 60 },
+ { "today", tMINUTE_UNIT, 0 },
+ { "now", tMINUTE_UNIT, 0 },
+ { "last", tUNUMBER, -1 },
+ { "this", tMINUTE_UNIT, 0 },
+ { "next", tUNUMBER, 2 },
+ { "first", tUNUMBER, 1 },
+/* { "second", tUNUMBER, 2 }, */
+ { "third", tUNUMBER, 3 },
+ { "fourth", tUNUMBER, 4 },
+ { "fifth", tUNUMBER, 5 },
+ { "sixth", tUNUMBER, 6 },
+ { "seventh", tUNUMBER, 7 },
+ { "eighth", tUNUMBER, 8 },
+ { "ninth", tUNUMBER, 9 },
+ { "tenth", tUNUMBER, 10 },
+ { "eleventh", tUNUMBER, 11 },
+ { "twelfth", tUNUMBER, 12 },
+ { "ago", tAGO, 1 },
+ { NULL }
+};
+
+/* The timezone table. */
+/* Some of these are commented out because a time_t can't store a float. */
+static TABLE const TimezoneTable[] = {
+ { "gmt", tZONE, HOUR( 0) }, /* Greenwich Mean */
+ { "ut", tZONE, HOUR( 0) }, /* Universal (Coordinated) */
+ { "utc", tZONE, HOUR( 0) },
+ { "wet", tZONE, HOUR( 0) }, /* Western European */
+ { "bst", tDAYZONE, HOUR( 0) }, /* British Summer */
+ { "wat", tZONE, HOUR( 1) }, /* West Africa */
+ { "at", tZONE, HOUR( 2) }, /* Azores */
+#if 0
+ /* For completeness. BST is also British Summer, and GST is
+ * also Guam Standard. */
+ { "bst", tZONE, HOUR( 3) }, /* Brazil Standard */
+ { "gst", tZONE, HOUR( 3) }, /* Greenland Standard */
+#endif
+#if 0
+ { "nft", tZONE, HOUR(3.5) }, /* Newfoundland */
+ { "nst", tZONE, HOUR(3.5) }, /* Newfoundland Standard */
+ { "ndt", tDAYZONE, HOUR(3.5) }, /* Newfoundland Daylight */
+#endif
+ { "ast", tZONE, HOUR( 4) }, /* Atlantic Standard */
+ { "adt", tDAYZONE, HOUR( 4) }, /* Atlantic Daylight */
+ { "est", tZONE, HOUR( 5) }, /* Eastern Standard */
+ { "edt", tDAYZONE, HOUR( 5) }, /* Eastern Daylight */
+ { "cst", tZONE, HOUR( 6) }, /* Central Standard */
+ { "cdt", tDAYZONE, HOUR( 6) }, /* Central Daylight */
+ { "mst", tZONE, HOUR( 7) }, /* Mountain Standard */
+ { "mdt", tDAYZONE, HOUR( 7) }, /* Mountain Daylight */
+ { "pst", tZONE, HOUR( 8) }, /* Pacific Standard */
+ { "pdt", tDAYZONE, HOUR( 8) }, /* Pacific Daylight */
+ { "yst", tZONE, HOUR( 9) }, /* Yukon Standard */
+ { "ydt", tDAYZONE, HOUR( 9) }, /* Yukon Daylight */
+ { "hst", tZONE, HOUR(10) }, /* Hawaii Standard */
+ { "hdt", tDAYZONE, HOUR(10) }, /* Hawaii Daylight */
+ { "cat", tZONE, HOUR(10) }, /* Central Alaska */
+ { "ahst", tZONE, HOUR(10) }, /* Alaska-Hawaii Standard */
+ { "nt", tZONE, HOUR(11) }, /* Nome */
+ { "idlw", tZONE, HOUR(12) }, /* International Date Line West */
+ { "cet", tZONE, -HOUR(1) }, /* Central European */
+ { "met", tZONE, -HOUR(1) }, /* Middle European */
+ { "mewt", tZONE, -HOUR(1) }, /* Middle European Winter */
+ { "mest", tDAYZONE, -HOUR(1) }, /* Middle European Summer */
+ { "swt", tZONE, -HOUR(1) }, /* Swedish Winter */
+ { "sst", tDAYZONE, -HOUR(1) }, /* Swedish Summer */
+ { "fwt", tZONE, -HOUR(1) }, /* French Winter */
+ { "fst", tDAYZONE, -HOUR(1) }, /* French Summer */
+ { "eet", tZONE, -HOUR(2) }, /* Eastern Europe, USSR Zone 1 */
+ { "bt", tZONE, -HOUR(3) }, /* Baghdad, USSR Zone 2 */
+#if 0
+ { "it", tZONE, -HOUR(3.5) },/* Iran */
+#endif
+ { "zp4", tZONE, -HOUR(4) }, /* USSR Zone 3 */
+ { "zp5", tZONE, -HOUR(5) }, /* USSR Zone 4 */
+#if 0
+ { "ist", tZONE, -HOUR(5.5) },/* Indian Standard */
+#endif
+ { "zp6", tZONE, -HOUR(6) }, /* USSR Zone 5 */
+#if 0
+ /* For completeness. NST is also Newfoundland Stanard, and SST is
+ * also Swedish Summer. */
+ { "nst", tZONE, -HOUR(6.5) },/* North Sumatra */
+ { "sst", tZONE, -HOUR(7) }, /* South Sumatra, USSR Zone 6 */
+#endif /* 0 */
+ { "wast", tZONE, -HOUR(7) }, /* West Australian Standard */
+ { "wadt", tDAYZONE, -HOUR(7) }, /* West Australian Daylight */
+#if 0
+ { "jt", tZONE, -HOUR(7.5) },/* Java (3pm in Cronusland!) */
+#endif
+ { "cct", tZONE, -HOUR(8) }, /* China Coast, USSR Zone 7 */
+ { "jst", tZONE, -HOUR(9) }, /* Japan Standard, USSR Zone 8 */
+#if 0
+ { "cast", tZONE, -HOUR(9.5) },/* Central Australian Standard */
+ { "cadt", tDAYZONE, -HOUR(9.5) },/* Central Australian Daylight */
+#endif
+ { "east", tZONE, -HOUR(10) }, /* Eastern Australian Standard */
+ { "eadt", tDAYZONE, -HOUR(10) }, /* Eastern Australian Daylight */
+ { "gst", tZONE, -HOUR(10) }, /* Guam Standard, USSR Zone 9 */
+ { "nzt", tZONE, -HOUR(12) }, /* New Zealand */
+ { "nzst", tZONE, -HOUR(12) }, /* New Zealand Standard */
+ { "nzdt", tDAYZONE, -HOUR(12) }, /* New Zealand Daylight */
+ { "idle", tZONE, -HOUR(12) }, /* International Date Line East */
+ { NULL }
+};
+
+/* Military timezone table. */
+static TABLE const MilitaryTable[] = {
+ { "a", tZONE, HOUR( 1) },
+ { "b", tZONE, HOUR( 2) },
+ { "c", tZONE, HOUR( 3) },
+ { "d", tZONE, HOUR( 4) },
+ { "e", tZONE, HOUR( 5) },
+ { "f", tZONE, HOUR( 6) },
+ { "g", tZONE, HOUR( 7) },
+ { "h", tZONE, HOUR( 8) },
+ { "i", tZONE, HOUR( 9) },
+ { "k", tZONE, HOUR( 10) },
+ { "l", tZONE, HOUR( 11) },
+ { "m", tZONE, HOUR( 12) },
+ { "n", tZONE, HOUR(- 1) },
+ { "o", tZONE, HOUR(- 2) },
+ { "p", tZONE, HOUR(- 3) },
+ { "q", tZONE, HOUR(- 4) },
+ { "r", tZONE, HOUR(- 5) },
+ { "s", tZONE, HOUR(- 6) },
+ { "t", tZONE, HOUR(- 7) },
+ { "u", tZONE, HOUR(- 8) },
+ { "v", tZONE, HOUR(- 9) },
+ { "w", tZONE, HOUR(-10) },
+ { "x", tZONE, HOUR(-11) },
+ { "y", tZONE, HOUR(-12) },
+ { "z", tZONE, HOUR( 0) },
+ { NULL }
+};
+
+
+
+
+/* ARGSUSED */
+static int
+yyerror(s)
+ char *s;
+{
+ return 0;
+}
+
+
+static time_t
+ToSeconds(Hours, Minutes, Seconds, Meridian)
+ time_t Hours;
+ time_t Minutes;
+ time_t Seconds;
+ MERIDIAN Meridian;
+{
+ if (Minutes < 0 || Minutes > 59 || Seconds < 0 || Seconds > 59)
+ return -1;
+ switch (Meridian) {
+ case MER24:
+ if (Hours < 0 || Hours > 23)
+ return -1;
+ return (Hours * 60L + Minutes) * 60L + Seconds;
+ case MERam:
+ if (Hours < 1 || Hours > 12)
+ return -1;
+ return (Hours * 60L + Minutes) * 60L + Seconds;
+ case MERpm:
+ if (Hours < 1 || Hours > 12)
+ return -1;
+ return ((Hours + 12) * 60L + Minutes) * 60L + Seconds;
+ }
+ /* NOTREACHED */
+}
+
+
+static time_t
+Convert(Month, Day, Year, Hours, Minutes, Seconds, Meridian, DSTmode)
+ time_t Month;
+ time_t Day;
+ time_t Year;
+ time_t Hours;
+ time_t Minutes;
+ time_t Seconds;
+ MERIDIAN Meridian;
+ DSTMODE DSTmode;
+{
+ static int DaysInMonth[12] = {
+ 31, 0, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+ };
+ time_t tod;
+ time_t Julian;
+ int i;
+
+ if (Year < 0)
+ Year = -Year;
+ if (Year < 100)
+ Year += 1900;
+ DaysInMonth[1] = Year % 4 == 0 && (Year % 100 != 0 || Year % 400 == 0)
+ ? 29 : 28;
+ if (Year < EPOCH || Year > 1999
+ || Month < 1 || Month > 12
+ /* Lint fluff: "conversion from long may lose accuracy" */
+ || Day < 1 || Day > DaysInMonth[(int)--Month])
+ return -1;
+
+ for (Julian = Day - 1, i = 0; i < Month; i++)
+ Julian += DaysInMonth[i];
+ for (i = EPOCH; i < Year; i++)
+ Julian += 365 + (i % 4 == 0);
+ Julian *= SECSPERDAY;
+ Julian += yyTimezone * 60L;
+ if ((tod = ToSeconds(Hours, Minutes, Seconds, Meridian)) < 0)
+ return -1;
+ Julian += tod;
+ if (DSTmode == DSTon
+ || (DSTmode == DSTmaybe && localtime(&Julian)->tm_isdst))
+ Julian -= 60 * 60;
+ return Julian;
+}
+
+
+static time_t
+DSTcorrect(Start, Future)
+ time_t Start;
+ time_t Future;
+{
+ time_t StartDay;
+ time_t FutureDay;
+
+ StartDay = (localtime(&Start)->tm_hour + 1) % 24;
+ FutureDay = (localtime(&Future)->tm_hour + 1) % 24;
+ return (Future - Start) + (StartDay - FutureDay) * 60L * 60L;
+}
+
+
+static time_t
+RelativeDate(Start, DayOrdinal, DayNumber)
+ time_t Start;
+ time_t DayOrdinal;
+ time_t DayNumber;
+{
+ struct tm *tm;
+ time_t now;
+
+ now = Start;
+ tm = localtime(&now);
+ now += SECSPERDAY * ((DayNumber - tm->tm_wday + 7) % 7);
+ now += 7 * SECSPERDAY * (DayOrdinal <= 0 ? DayOrdinal : DayOrdinal - 1);
+ return DSTcorrect(Start, now);
+}
+
+
+static time_t
+RelativeMonth(Start, RelMonth)
+ time_t Start;
+ time_t RelMonth;
+{
+ struct tm *tm;
+ time_t Month;
+ time_t Year;
+
+ if (RelMonth == 0)
+ return 0;
+ tm = localtime(&Start);
+ Month = 12 * tm->tm_year + tm->tm_mon + RelMonth;
+ Year = Month / 12;
+ Month = Month % 12 + 1;
+ return DSTcorrect(Start,
+ Convert(Month, (time_t)tm->tm_mday, Year,
+ (time_t)tm->tm_hour, (time_t)tm->tm_min, (time_t)tm->tm_sec,
+ MER24, DSTmaybe));
+}
+
+
+static int
+LookupWord(buff)
+ char *buff;
+{
+ register char *p;
+ register char *q;
+ register const TABLE *tp;
+ int i;
+ int abbrev;
+
+ /* Make it lowercase. */
+ for (p = buff; *p; p++)
+ if (isupper(*p))
+ *p = tolower(*p);
+
+ if (strcmp(buff, "am") == 0 || strcmp(buff, "a.m.") == 0) {
+ yylval.Meridian = MERam;
+ return tMERIDIAN;
+ }
+ if (strcmp(buff, "pm") == 0 || strcmp(buff, "p.m.") == 0) {
+ yylval.Meridian = MERpm;
+ return tMERIDIAN;
+ }
+
+ /* See if we have an abbreviation for a month. */
+ if (strlen(buff) == 3)
+ abbrev = 1;
+ else if (strlen(buff) == 4 && buff[3] == '.') {
+ abbrev = 1;
+ buff[3] = '\0';
+ }
+ else
+ abbrev = 0;
+
+ for (tp = MonthDayTable; tp->name; tp++) {
+ if (abbrev) {
+ if (strncmp(buff, tp->name, 3) == 0) {
+ yylval.Number = tp->value;
+ return tp->type;
+ }
+ }
+ else if (strcmp(buff, tp->name) == 0) {
+ yylval.Number = tp->value;
+ return tp->type;
+ }
+ }
+
+ for (tp = TimezoneTable; tp->name; tp++)
+ if (strcmp(buff, tp->name) == 0) {
+ yylval.Number = tp->value;
+ return tp->type;
+ }
+
+ if (strcmp(buff, "dst") == 0)
+ return tDST;
+
+ for (tp = UnitsTable; tp->name; tp++)
+ if (strcmp(buff, tp->name) == 0) {
+ yylval.Number = tp->value;
+ return tp->type;
+ }
+
+ /* Strip off any plural and try the units table again. */
+ i = strlen(buff) - 1;
+ if (buff[i] == 's') {
+ buff[i] = '\0';
+ for (tp = UnitsTable; tp->name; tp++)
+ if (strcmp(buff, tp->name) == 0) {
+ yylval.Number = tp->value;
+ return tp->type;
+ }
+ buff[i] = 's'; /* Put back for "this" in OtherTable. */
+ }
+
+ for (tp = OtherTable; tp->name; tp++)
+ if (strcmp(buff, tp->name) == 0) {
+ yylval.Number = tp->value;
+ return tp->type;
+ }
+
+ /* Military timezones. */
+ if (buff[1] == '\0' && isalpha(*buff)) {
+ for (tp = MilitaryTable; tp->name; tp++)
+ if (strcmp(buff, tp->name) == 0) {
+ yylval.Number = tp->value;
+ return tp->type;
+ }
+ }
+
+ /* Drop out any periods and try the timezone table again. */
+ for (i = 0, p = q = buff; *q; q++)
+ if (*q != '.')
+ *p++ = *q;
+ else
+ i++;
+ *p = '\0';
+ if (i)
+ for (tp = TimezoneTable; tp->name; tp++)
+ if (strcmp(buff, tp->name) == 0) {
+ yylval.Number = tp->value;
+ return tp->type;
+ }
+
+ return tID;
+}
+
+
+static int
+yylex()
+{
+ register char c;
+ register char *p;
+ char buff[20];
+ int Count;
+ int sign;
+
+ for ( ; ; ) {
+ while (isspace(*yyInput))
+ yyInput++;
+
+ if (isdigit(c = *yyInput) || c == '-' || c == '+') {
+ if (c == '-' || c == '+') {
+ sign = c == '-' ? -1 : 1;
+ if (!isdigit(*++yyInput))
+ /* skip the '-' sign */
+ continue;
+ }
+ else
+ sign = 0;
+ for (yylval.Number = 0; isdigit(c = *yyInput++); )
+ yylval.Number = 10 * yylval.Number + c - '0';
+ yyInput--;
+ if (sign < 0)
+ yylval.Number = -yylval.Number;
+ return sign ? tSNUMBER : tUNUMBER;
+ }
+ if (isalpha(c)) {
+ for (p = buff; isalpha(c = *yyInput++) || c == '.'; )
+ if (p < &buff[sizeof buff - 1])
+ *p++ = c;
+ *p = '\0';
+ yyInput--;
+ return LookupWord(buff);
+ }
+ if (c != '(')
+ return *yyInput++;
+ Count = 0;
+ do {
+ c = *yyInput++;
+ if (c == '\0')
+ return c;
+ if (c == '(')
+ Count++;
+ else if (c == ')')
+ Count--;
+ } while (Count > 0);
+ }
+}
+
+
+time_t
+get_date(p, now)
+ char *p;
+ struct timeb *now;
+{
+ struct tm *tm;
+ struct timeb ftz;
+ time_t Start;
+ time_t tod;
+
+ yyInput = p;
+ if (now == NULL) {
+ now = &ftz;
+#if !defined(HAVE_FTIME)
+ (void)time(&ftz.time);
+ /* Set the timezone global. */
+ tzset();
+ {
+#if sgi
+ ftz.timezone = (int) _timezone / 60;
+#else /* not sgi */
+#ifdef __386BSD__
+ ftz.timezone = 0;
+#else /* neither sgi nor 386BSD */
+#if defined (USG)
+ extern time_t timezone;
+
+ ftz.timezone = (int) timezone / 60;
+#else /* neither sgi nor 386BSD nor USG */
+ struct timeval tv;
+ struct timezone tz;
+
+ gettimeofday (&tv, &tz);
+ ftz.timezone = (int) tz.tz_minuteswest;
+#endif /* neither sgi nor 386BSD nor USG */
+#endif /* neither sgi nor 386BSD */
+#endif /* not sgi */
+ }
+#else /* HAVE_FTIME */
+ (void)ftime(&ftz);
+#endif /* HAVE_FTIME */
+ }
+
+ tm = localtime(&now->time);
+ yyYear = tm->tm_year;
+ yyMonth = tm->tm_mon + 1;
+ yyDay = tm->tm_mday;
+ yyTimezone = now->timezone;
+ yyDSTmode = DSTmaybe;
+ yyHour = 0;
+ yyMinutes = 0;
+ yySeconds = 0;
+ yyMeridian = MER24;
+ yyRelSeconds = 0;
+ yyRelMonth = 0;
+ yyHaveDate = 0;
+ yyHaveDay = 0;
+ yyHaveRel = 0;
+ yyHaveTime = 0;
+ yyHaveZone = 0;
+
+ if (yyparse()
+ || yyHaveTime > 1 || yyHaveZone > 1 || yyHaveDate > 1 || yyHaveDay > 1)
+ return -1;
+
+ if (yyHaveDate || yyHaveTime || yyHaveDay) {
+ Start = Convert(yyMonth, yyDay, yyYear, yyHour, yyMinutes, yySeconds,
+ yyMeridian, yyDSTmode);
+ if (Start < 0)
+ return -1;
+ }
+ else {
+ Start = now->time;
+ if (!yyHaveRel)
+ Start -= ((tm->tm_hour * 60L + tm->tm_min) * 60L) + tm->tm_sec;
+ }
+
+ Start += yyRelSeconds;
+ Start += RelativeMonth(Start, yyRelMonth);
+
+ if (yyHaveDay && !yyHaveDate) {
+ tod = RelativeDate(Start, yyDayOrdinal, yyDayNumber);
+ Start += tod;
+ }
+
+ /* Have to do *something* with a legitimate -1 so it's distinguishable
+ * from the error return value. (Alternately could set errno on error.) */
+ return Start == -1 ? 0 : Start;
+}
+
+
+#if defined(TEST)
+
+/* ARGSUSED */
+main(ac, av)
+ int ac;
+ char *av[];
+{
+ char buff[128];
+ time_t d;
+
+ (void)printf("Enter date, or blank line to exit.\n\t> ");
+ (void)fflush(stdout);
+ while (gets(buff) && buff[0]) {
+ d = get_date(buff, (struct timeb *)NULL);
+ if (d == -1)
+ (void)printf("Bad format - couldn't convert.\n");
+ else
+ (void)printf("%s", ctime(&d));
+ (void)printf("\t> ");
+ (void)fflush(stdout);
+ }
+ exit(0);
+ /* NOTREACHED */
+}
+#endif /* defined(TEST) */
diff --git a/gnu/usr.bin/tar/getoldopt.c b/gnu/usr.bin/tar/getoldopt.c
new file mode 100644
index 000000000000..27511b94b342
--- /dev/null
+++ b/gnu/usr.bin/tar/getoldopt.c
@@ -0,0 +1,96 @@
+/* Replacement for getopt() that can be used by tar.
+ Copyright (C) 1988 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ * Plug-compatible replacement for getopt() for parsing tar-like
+ * arguments. If the first argument begins with "-", it uses getopt;
+ * otherwise, it uses the old rules used by tar, dump, and ps.
+ *
+ * Written 25 August 1985 by John Gilmore (ihnp4!hoptoad!gnu)
+ */
+
+#include <stdio.h>
+#include "getopt.h"
+#include "tar.h" /* For msg() declaration if STDC_MSG. */
+#include <sys/types.h>
+#include "port.h"
+
+int
+getoldopt (argc, argv, optstring, long_options, opt_index)
+ int argc;
+ char **argv;
+ char *optstring;
+ struct option *long_options;
+ int *opt_index;
+{
+ extern char *optarg; /* Points to next arg */
+ extern int optind; /* Global argv index */
+ static char *key; /* Points to next keyletter */
+ static char use_getopt; /* !=0 if argv[1][0] was '-' */
+ char c;
+ char *place;
+
+ optarg = NULL;
+
+ if (key == NULL)
+ { /* First time */
+ if (argc < 2)
+ return EOF;
+ key = argv[1];
+ if ((*key == '-') || (*key == '+'))
+ use_getopt++;
+ else
+ optind = 2;
+ }
+
+ if (use_getopt)
+ return getopt_long (argc, argv, optstring,
+ long_options, opt_index);
+
+ c = *key++;
+ if (c == '\0')
+ {
+ key--;
+ return EOF;
+ }
+ place = index (optstring, c);
+
+ if (place == NULL || c == ':')
+ {
+ msg ("unknown option %c", c);
+ return ('?');
+ }
+
+ place++;
+ if (*place == ':')
+ {
+ if (optind < argc)
+ {
+ optarg = argv[optind];
+ optind++;
+ }
+ else
+ {
+ msg ("%c argument missing", c);
+ return ('?');
+ }
+ }
+
+ return (c);
+}
diff --git a/gnu/usr.bin/tar/getopt.c b/gnu/usr.bin/tar/getopt.c
new file mode 100644
index 000000000000..3db9abf121b9
--- /dev/null
+++ b/gnu/usr.bin/tar/getopt.c
@@ -0,0 +1,712 @@
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
+ before changing it!
+
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 1993
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* NOTE!!! AIX requires this to be the first thing in the file.
+ Do not put ANYTHING before it! */
+#if !defined (__GNUC__) && defined (_AIX)
+ #pragma alloca
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if defined (HAVE_ALLOCA_H) || (defined(sparc) && (defined(sun) || (!defined(USG) && !defined(SVR4) && !defined(__svr4__))))
+#include <alloca.h>
+#else
+#ifndef _AIX
+char *alloca ();
+#endif
+#endif /* alloca.h */
+#endif /* not __GNUC__ */
+
+#if !__STDC__ && !defined(const) && IN_GCC
+#define const
+#endif
+
+#include <stdio.h>
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#undef alloca
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
+#include <stdlib.h>
+#else /* Not GNU C library. */
+#define __alloca alloca
+#endif /* GNU C library. */
+
+/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a
+ long-named option. Because this is not POSIX.2 compliant, it is
+ being phased out. */
+/* #define GETOPT_COMPAT */
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg = 0;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+/* XXX 1003.2 says this must be 1 before any call. */
+int optind = 0;
+
+/* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we scan,
+ so that eventually all the non-options are at the end. This allows options
+ to be given in any order, even with programs that were not written to
+ expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were written
+ to expect options and other ARGV-elements in any order and that care about
+ the ordering of the two. We describe each non-option ARGV-element
+ as if it were the argument of an option with character code 1.
+ Using `-' as the first character of the list of option characters
+ selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return EOF with `optind' != ARGC. */
+
+static enum
+{
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+#ifdef __GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+ because there are many ways it can cause trouble.
+ On some systems, it contains special magic macros that don't work
+ in GCC. */
+#include <string.h>
+#define my_index strchr
+#define my_bcopy(src, dst, n) memcpy ((dst), (src), (n))
+#else
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+char *getenv ();
+
+static char *
+my_index (string, chr)
+ char *string;
+ int chr;
+{
+ while (*string)
+ {
+ if (*string == chr)
+ return string;
+ string++;
+ }
+ return 0;
+}
+
+static void
+my_bcopy (from, to, size)
+ char *from, *to;
+ int size;
+{
+ int i;
+ for (i = 0; i < size; i++)
+ to[i] = from[i];
+}
+#endif /* GNU C library. */
+
+/* Handle permutation of arguments. */
+
+/* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first of them;
+ `last_nonopt' is the index after the last of them. */
+
+static int first_nonopt;
+static int last_nonopt;
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+static void
+exchange (argv)
+ char **argv;
+{
+ int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *);
+ char **temp = (char **) __alloca (nonopts_size);
+
+ /* Interchange the two blocks of data in ARGV. */
+
+ my_bcopy ((char *) &argv[first_nonopt], (char *) temp, nonopts_size);
+ my_bcopy ((char *) &argv[last_nonopt], (char *) &argv[first_nonopt],
+ (optind - last_nonopt) * sizeof (char *));
+ my_bcopy ((char *) temp,
+ (char *) &argv[first_nonopt + optind - last_nonopt],
+ nonopts_size);
+
+ /* Update records for the slots the non-options now occupy. */
+
+ first_nonopt += (optind - last_nonopt);
+ last_nonopt = optind;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns `EOF'.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+ const struct option *longopts;
+ int *longind;
+ int long_only;
+{
+ int option_index;
+
+ optarg = 0;
+
+ /* Initialize the internal data when the first call is made.
+ Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ if (optind == 0)
+ {
+ first_nonopt = last_nonopt = optind = 1;
+
+ nextchar = NULL;
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (getenv ("POSIXLY_CORRECT") != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+ }
+
+ if (nextchar == NULL || *nextchar == '\0')
+ {
+ if (ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (last_nonopt != optind)
+ first_nonopt = optind;
+
+ /* Now skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (optind < argc
+ && (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#ifdef GETOPT_COMPAT
+ && (longopts == NULL
+ || argv[optind][0] != '+' || argv[optind][1] == '\0')
+#endif /* GETOPT_COMPAT */
+ )
+ optind++;
+ last_nonopt = optind;
+ }
+
+ /* Special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (optind != argc && !strcmp (argv[optind], "--"))
+ {
+ optind++;
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (first_nonopt == last_nonopt)
+ first_nonopt = optind;
+ last_nonopt = argc;
+
+ optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (first_nonopt != last_nonopt)
+ optind = first_nonopt;
+ return EOF;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if ((argv[optind][0] != '-' || argv[optind][1] == '\0')
+#ifdef GETOPT_COMPAT
+ && (longopts == NULL
+ || argv[optind][0] != '+' || argv[optind][1] == '\0')
+#endif /* GETOPT_COMPAT */
+ )
+ {
+ if (ordering == REQUIRE_ORDER)
+ return EOF;
+ optarg = argv[optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Start decoding its characters. */
+
+ nextchar = (argv[optind] + 1
+ + (longopts != NULL && argv[optind][1] == '-'));
+ }
+
+ if (longopts != NULL
+ && ((argv[optind][0] == '-'
+ && (argv[optind][1] == '-' || long_only))
+#ifdef GETOPT_COMPAT
+ || argv[optind][0] == '+'
+#endif /* GETOPT_COMPAT */
+ ))
+ {
+ const struct option *p;
+ char *s = nextchar;
+ int exact = 0;
+ int ambig = 0;
+ const struct option *pfound = NULL;
+ int indfound;
+
+ while (*s && *s != '=')
+ s++;
+
+ /* Test all options for either exact match or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name;
+ p++, option_index++)
+ if (!strncmp (p->name, nextchar, s - nextchar))
+ {
+ if (s - nextchar == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' is ambiguous\n",
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ optind++;
+ if (*s)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = s + 1;
+ else
+ {
+ if (opterr)
+ {
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ "%s: option `--%s' doesn't allow an argument\n",
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ "%s: option `%c%s' doesn't allow an argument\n",
+ argv[0], argv[optind - 1][0], pfound->name);
+ }
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' requires an argument\n",
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[optind][1] == '-'
+#ifdef GETOPT_COMPAT
+ || argv[optind][0] == '+'
+#endif /* GETOPT_COMPAT */
+ || my_index (optstring, *nextchar) == NULL)
+ {
+ if (opterr)
+ {
+ if (argv[optind][1] == '-')
+ /* --option */
+ fprintf (stderr, "%s: unrecognized option `--%s'\n",
+ argv[0], nextchar);
+ else
+ /* +option or -option */
+ fprintf (stderr, "%s: unrecognized option `%c%s'\n",
+ argv[0], argv[optind][0], nextchar);
+ }
+ nextchar = (char *) "";
+ optind++;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next option-character. */
+
+ {
+ char c = *nextchar++;
+ char *temp = my_index (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*nextchar == '\0')
+ ++optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (opterr)
+ {
+#if 0
+ if (c < 040 || c >= 0177)
+ fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
+ argv[0], c);
+ else
+ fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c);
+#else
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
+#endif
+ }
+ optopt = c;
+ return '?';
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ optind++;
+ }
+ else
+ optarg = 0;
+ nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+#if 0
+ fprintf (stderr, "%s: option `-%c' requires an argument\n",
+ argv[0], c);
+#else
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, "%s: option requires an argument -- %c\n",
+ argv[0], c);
+#endif
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+ nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+getopt (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/gnu/usr.bin/tar/getopt.h b/gnu/usr.bin/tar/getopt.h
new file mode 100644
index 000000000000..93a5cf77816e
--- /dev/null
+++ b/gnu/usr.bin/tar/getopt.h
@@ -0,0 +1,125 @@
+/* Declarations for getopt.
+ Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+#if __STDC__
+ const char *name;
+#else
+ char *name;
+#endif
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+#define no_argument 0
+#define required_argument 1
+#define optional_argument 2
+
+#if __STDC__
+#if defined(__GNU_LIBRARY__)
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* not __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+ const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind);
+
+/* Internal only. Users should not call this directly. */
+extern int _getopt_internal (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind,
+ int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* not __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _GETOPT_H */
diff --git a/gnu/usr.bin/tar/getopt1.c b/gnu/usr.bin/tar/getopt1.c
new file mode 100644
index 000000000000..c3582cfa5caf
--- /dev/null
+++ b/gnu/usr.bin/tar/getopt1.c
@@ -0,0 +1,161 @@
+/* Getopt for GNU.
+ Copyright (C) 1987, 88, 89, 90, 91, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "getopt.h"
+
+#if !__STDC__ && !defined(const) && IN_GCC
+#define const
+#endif
+
+#include <stdio.h>
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#else
+char *getenv ();
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/gnu/usr.bin/tar/getpagesize.h b/gnu/usr.bin/tar/getpagesize.h
new file mode 100644
index 000000000000..2d43f262c79d
--- /dev/null
+++ b/gnu/usr.bin/tar/getpagesize.h
@@ -0,0 +1,38 @@
+#ifdef BSD
+#ifndef BSD4_1
+#define HAVE_GETPAGESIZE
+#endif
+#endif
+
+#ifndef HAVE_GETPAGESIZE
+
+#ifdef VMS
+#define getpagesize() 512
+#endif
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef _SC_PAGESIZE
+#define getpagesize() sysconf(_SC_PAGESIZE)
+#else
+
+#include <sys/param.h>
+
+#ifdef EXEC_PAGESIZE
+#define getpagesize() EXEC_PAGESIZE
+#else
+#ifdef NBPG
+#define getpagesize() NBPG * CLSIZE
+#ifndef CLSIZE
+#define CLSIZE 1
+#endif /* no CLSIZE */
+#else /* no NBPG */
+#define getpagesize() NBPC
+#endif /* no NBPG */
+#endif /* no EXEC_PAGESIZE */
+#endif /* no _SC_PAGESIZE */
+
+#endif /* not HAVE_GETPAGESIZE */
+
diff --git a/gnu/usr.bin/tar/gnu.c b/gnu/usr.bin/tar/gnu.c
new file mode 100644
index 000000000000..ef51f2b5fedd
--- /dev/null
+++ b/gnu/usr.bin/tar/gnu.c
@@ -0,0 +1,677 @@
+/* GNU dump extensions to tar.
+ Copyright (C) 1988, 1992, 1993 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <ctype.h>
+#include <errno.h>
+#ifndef STDC_HEADERS
+extern int errno;
+#endif
+#include <time.h>
+time_t time ();
+
+#include "tar.h"
+#include "port.h"
+
+#ifndef S_ISLNK
+#define lstat stat
+#endif
+
+extern time_t new_time;
+extern FILE *msg_file;
+
+void addname ();
+int check_exclude ();
+extern PTR ck_malloc ();
+extern PTR ck_realloc ();
+int confirm ();
+extern PTR init_buffer ();
+extern char *get_buffer ();
+int is_dot_or_dotdot ();
+extern void add_buffer ();
+extern void flush_buffer ();
+void name_gather ();
+int recursively_delete ();
+void skip_file ();
+char *un_quote_string ();
+
+extern char *new_name ();
+
+static void add_dir_name ();
+
+struct dirname
+ {
+ struct dirname *next;
+ char *name;
+ char *dir_text;
+ int dev;
+ int ino;
+ int allnew;
+ };
+static struct dirname *dir_list;
+static time_t this_time;
+
+void
+add_dir (name, dev, ino, text)
+ char *name;
+ char *text;
+ dev_t dev;
+ ino_t ino;
+{
+ struct dirname *dp;
+
+ dp = (struct dirname *) ck_malloc (sizeof (struct dirname));
+ if (!dp)
+ abort ();
+ dp->next = dir_list;
+ dir_list = dp;
+ dp->dev = dev;
+ dp->ino = ino;
+ dp->name = ck_malloc (strlen (name) + 1);
+ strcpy (dp->name, name);
+ dp->dir_text = text;
+ dp->allnew = 0;
+}
+
+void
+read_dir_file ()
+{
+ int dev;
+ int ino;
+ char *strp;
+ FILE *fp;
+ char buf[512];
+ static char *path = 0;
+
+ if (path == 0)
+ path = ck_malloc (PATH_MAX);
+ time (&this_time);
+ if (gnu_dumpfile[0] != '/')
+ {
+#if defined(__MSDOS__) || defined(HAVE_GETCWD) || defined(_POSIX_VERSION)
+ if (!getcwd (path, PATH_MAX))
+ {
+ msg ("Couldn't get current directory.");
+ exit (EX_SYSTEM);
+ }
+#else
+ char *getwd ();
+
+ if (!getwd (path))
+ {
+ msg ("Couldn't get current directory: %s", path);
+ exit (EX_SYSTEM);
+ }
+#endif
+ /* If this doesn't fit, we're in serious trouble */
+ strcat (path, "/");
+ strcat (path, gnu_dumpfile);
+ gnu_dumpfile = path;
+ }
+ fp = fopen (gnu_dumpfile, "r");
+ if (fp == 0 && errno != ENOENT)
+ {
+ msg_perror ("Can't open %s", gnu_dumpfile);
+ return;
+ }
+ if (!fp)
+ return;
+ fgets (buf, sizeof (buf), fp);
+ if (!f_new_files)
+ {
+ f_new_files++;
+ new_time = atol (buf);
+ }
+ while (fgets (buf, sizeof (buf), fp))
+ {
+ strp = &buf[strlen (buf)];
+ if (strp[-1] == '\n')
+ strp[-1] = '\0';
+ strp = buf;
+ dev = atol (strp);
+ while (isdigit (*strp))
+ strp++;
+ ino = atol (strp);
+ while (isspace (*strp))
+ strp++;
+ while (isdigit (*strp))
+ strp++;
+ strp++;
+ add_dir (un_quote_string (strp), dev, ino, (char *) 0);
+ }
+ fclose (fp);
+}
+
+void
+write_dir_file ()
+{
+ FILE *fp;
+ struct dirname *dp;
+ char *str;
+ extern char *quote_copy_string ();
+
+ fp = fopen (gnu_dumpfile, "w");
+ if (fp == 0)
+ {
+ msg_perror ("Can't write to %s", gnu_dumpfile);
+ return;
+ }
+ fprintf (fp, "%lu\n", this_time);
+ for (dp = dir_list; dp; dp = dp->next)
+ {
+ if (!dp->dir_text)
+ continue;
+ str = quote_copy_string (dp->name);
+ if (str)
+ {
+ fprintf (fp, "%u %u %s\n", dp->dev, dp->ino, str);
+ free (str);
+ }
+ else
+ fprintf (fp, "%u %u %s\n", dp->dev, dp->ino, dp->name);
+ }
+ fclose (fp);
+}
+
+struct dirname *
+get_dir (name)
+ char *name;
+{
+ struct dirname *dp;
+
+ for (dp = dir_list; dp; dp = dp->next)
+ {
+ if (!strcmp (dp->name, name))
+ return dp;
+ }
+ return 0;
+}
+
+
+/* Collect all the names from argv[] (or whatever), then expand them into
+ a directory tree, and put all the directories at the beginning. */
+void
+collect_and_sort_names ()
+{
+ struct name *n, *n_next;
+ int num_names;
+ struct stat statbuf;
+ int name_cmp ();
+ char *merge_sort ();
+
+ name_gather ();
+
+ if (gnu_dumpfile)
+ read_dir_file ();
+ if (!namelist)
+ addname (".");
+ for (n = namelist; n; n = n_next)
+ {
+ n_next = n->next;
+ if (n->found || n->dir_contents)
+ continue;
+ if (n->regexp) /* FIXME just skip regexps for now */
+ continue;
+ if (n->change_dir)
+ if (chdir (n->change_dir) < 0)
+ {
+ msg_perror ("can't chdir to %s", n->change_dir);
+ continue;
+ }
+
+#ifdef AIX
+ if (statx (n->name, &statbuf, STATSIZE, STX_HIDDEN | STX_LINK))
+#else
+ if (lstat (n->name, &statbuf) < 0)
+#endif /* AIX */
+ {
+ msg_perror ("can't stat %s", n->name);
+ continue;
+ }
+ if (S_ISDIR (statbuf.st_mode))
+ {
+ n->found++;
+ add_dir_name (n->name, statbuf.st_dev);
+ }
+ }
+
+ num_names = 0;
+ for (n = namelist; n; n = n->next)
+ num_names++;
+ namelist = (struct name *) merge_sort ((PTR) namelist, num_names, (char *) (&(namelist->next)) - (char *) namelist, name_cmp);
+
+ for (n = namelist; n; n = n->next)
+ {
+ n->found = 0;
+ }
+ if (gnu_dumpfile)
+ write_dir_file ();
+}
+
+int
+name_cmp (n1, n2)
+ struct name *n1, *n2;
+{
+ if (n1->found)
+ {
+ if (n2->found)
+ return strcmp (n1->name, n2->name);
+ else
+ return -1;
+ }
+ else if (n2->found)
+ return 1;
+ else
+ return strcmp (n1->name, n2->name);
+}
+
+int
+dirent_cmp (p1, p2)
+ const PTR p1;
+ const PTR p2;
+{
+ char *frst, *scnd;
+
+ frst = (*(char **) p1) + 1;
+ scnd = (*(char **) p2) + 1;
+
+ return strcmp (frst, scnd);
+}
+
+char *
+get_dir_contents (p, device)
+ char *p;
+ int device;
+{
+ DIR *dirp;
+ register struct dirent *d;
+ char *new_buf;
+ char *namebuf;
+ int bufsiz;
+ int len;
+ PTR the_buffer;
+ char *buf;
+ size_t n_strs;
+ /* int n_size;*/
+ char *p_buf;
+ char **vec, **p_vec;
+
+ extern int errno;
+
+ errno = 0;
+ dirp = opendir (p);
+ bufsiz = strlen (p) + NAMSIZ;
+ namebuf = ck_malloc (bufsiz + 2);
+ if (!dirp)
+ {
+ if (errno)
+ msg_perror ("can't open directory %s", p);
+ else
+ msg ("error opening directory %s", p);
+ new_buf = NULL;
+ }
+ else
+ {
+ struct dirname *dp;
+ int all_children;
+
+ dp = get_dir (p);
+ all_children = dp ? dp->allnew : 0;
+ (void) strcpy (namebuf, p);
+ if (p[strlen (p) - 1] != '/')
+ (void) strcat (namebuf, "/");
+ len = strlen (namebuf);
+
+ the_buffer = init_buffer ();
+ while (d = readdir (dirp))
+ {
+ struct stat hs;
+
+ /* Skip . and .. */
+ if (is_dot_or_dotdot (d->d_name))
+ continue;
+ if (NLENGTH (d) + len >= bufsiz)
+ {
+ bufsiz += NAMSIZ;
+ namebuf = ck_realloc (namebuf, bufsiz + 2);
+ }
+ (void) strcpy (namebuf + len, d->d_name);
+#ifdef AIX
+ if (0 != f_follow_links ?
+ statx (namebuf, &hs, STATSIZE, STX_HIDDEN) :
+ statx (namebuf, &hs, STATSIZE, STX_HIDDEN | STX_LINK))
+#else
+ if (0 != f_follow_links ? stat (namebuf, &hs) : lstat (namebuf, &hs))
+#endif
+ {
+ msg_perror ("can't stat %s", namebuf);
+ continue;
+ }
+ if ((f_local_filesys && device != hs.st_dev)
+ || (f_exclude && check_exclude (namebuf)))
+ add_buffer (the_buffer, "N", 1);
+#ifdef AIX
+ else if (S_ISHIDDEN (hs.st_mode))
+ {
+ add_buffer (the_buffer, "D", 1);
+ strcat (d->d_name, "A");
+ d->d_namlen++;
+ }
+#endif /* AIX */
+ else if (S_ISDIR (hs.st_mode))
+ {
+ if (dp = get_dir (namebuf))
+ {
+ if (dp->dev != hs.st_dev
+ || dp->ino != hs.st_ino)
+ {
+ if (f_verbose)
+ msg ("directory %s has been renamed.", namebuf);
+ dp->allnew = 1;
+ dp->dev = hs.st_dev;
+ dp->ino = hs.st_ino;
+ }
+ dp->dir_text = "";
+ }
+ else
+ {
+ if (f_verbose)
+ msg ("Directory %s is new", namebuf);
+ add_dir (namebuf, hs.st_dev, hs.st_ino, "");
+ dp = get_dir (namebuf);
+ dp->allnew = 1;
+ }
+ if (all_children)
+ dp->allnew = 1;
+
+ add_buffer (the_buffer, "D", 1);
+ }
+ else if (!all_children
+ && f_new_files
+ && new_time > hs.st_mtime
+ && (f_new_files > 1
+ || new_time > hs.st_ctime))
+ add_buffer (the_buffer, "N", 1);
+ else
+ add_buffer (the_buffer, "Y", 1);
+ add_buffer (the_buffer, d->d_name, (int) (NLENGTH (d) + 1));
+ }
+ add_buffer (the_buffer, "\000\000", 2);
+ closedir (dirp);
+
+ /* Well, we've read in the contents of the dir, now sort them */
+ buf = get_buffer (the_buffer);
+ if (buf[0] == '\0')
+ {
+ flush_buffer (the_buffer);
+ new_buf = NULL;
+ }
+ else
+ {
+ n_strs = 0;
+ for (p_buf = buf; *p_buf;)
+ {
+ int tmp;
+
+ tmp = strlen (p_buf) + 1;
+ n_strs++;
+ p_buf += tmp;
+ }
+ vec = (char **) ck_malloc (sizeof (char *) * (n_strs + 1));
+ for (p_vec = vec, p_buf = buf; *p_buf; p_buf += strlen (p_buf) + 1)
+ *p_vec++ = p_buf;
+ *p_vec = 0;
+ qsort ((PTR) vec, n_strs, sizeof (char *), dirent_cmp);
+ new_buf = (char *) ck_malloc (p_buf - buf + 2);
+ for (p_vec = vec, p_buf = new_buf; *p_vec; p_vec++)
+ {
+ char *p_tmp;
+
+ for (p_tmp = *p_vec; *p_buf++ = *p_tmp++;)
+ ;
+ }
+ *p_buf++ = '\0';
+ free (vec);
+ flush_buffer (the_buffer);
+ }
+ }
+ free (namebuf);
+ return new_buf;
+}
+
+/* p is a directory. Add all the files in P to the namelist. If any of the
+ files is a directory, recurse on the subdirectory. . . */
+static void
+add_dir_name (p, device)
+ char *p;
+ int device;
+{
+ char *new_buf;
+ char *p_buf;
+
+ char *namebuf;
+ int buflen;
+ register int len;
+ int sublen;
+
+ /* PTR the_buffer;*/
+
+ /* char *buf;*/
+ /* char **vec,**p_vec;*/
+ /* int n_strs,n_size;*/
+
+ struct name *n;
+
+ int dirent_cmp ();
+
+ new_buf = get_dir_contents (p, device);
+
+ for (n = namelist; n; n = n->next)
+ {
+ if (!strcmp (n->name, p))
+ {
+ n->dir_contents = new_buf ? new_buf : "\0\0\0\0";
+ break;
+ }
+ }
+
+ if (new_buf)
+ {
+ len = strlen (p);
+ buflen = NAMSIZ <= len ? len + NAMSIZ : NAMSIZ;
+ namebuf = ck_malloc (buflen + 1);
+
+ (void) strcpy (namebuf, p);
+ if (namebuf[len - 1] != '/')
+ {
+ namebuf[len++] = '/';
+ namebuf[len] = '\0';
+ }
+ for (p_buf = new_buf; *p_buf; p_buf += sublen + 1)
+ {
+ sublen = strlen (p_buf);
+ if (*p_buf == 'D')
+ {
+ if (len + sublen >= buflen)
+ {
+ buflen += NAMSIZ;
+ namebuf = ck_realloc (namebuf, buflen + 1);
+ }
+ (void) strcpy (namebuf + len, p_buf + 1);
+ addname (namebuf);
+ add_dir_name (namebuf, device);
+ }
+ }
+ free (namebuf);
+ }
+}
+
+/* Returns non-zero if p is . or .. This could be a macro for speed. */
+int
+is_dot_or_dotdot (p)
+ char *p;
+{
+ return (p[0] == '.' && (p[1] == '\0' || (p[1] == '.' && p[2] == '\0')));
+}
+
+
+
+
+
+
+void
+gnu_restore (skipcrud)
+ int skipcrud;
+{
+ char *current_dir;
+ /* int current_dir_length; */
+
+ char *archive_dir;
+ /* int archive_dir_length; */
+ PTR the_buffer;
+ char *p;
+ DIR *dirp;
+ struct dirent *d;
+ char *cur, *arc;
+ extern struct stat hstat; /* Stat struct corresponding */
+ long size, copied;
+ char *from, *to;
+ extern union record *head;
+
+ dirp = opendir (skipcrud + current_file_name);
+
+ if (!dirp)
+ {
+ /* The directory doesn't exist now. It'll be created.
+ In any case, we don't have to delete any files out
+ of it */
+ skip_file ((long) hstat.st_size);
+ return;
+ }
+
+ the_buffer = init_buffer ();
+ while (d = readdir (dirp))
+ {
+ if (is_dot_or_dotdot (d->d_name))
+ continue;
+
+ add_buffer (the_buffer, d->d_name, (int) (NLENGTH (d) + 1));
+ }
+ closedir (dirp);
+ add_buffer (the_buffer, "", 1);
+
+ current_dir = get_buffer (the_buffer);
+ archive_dir = (char *) ck_malloc (hstat.st_size);
+ if (archive_dir == 0)
+ {
+ msg ("Can't allocate %d bytes for restore", hstat.st_size);
+ skip_file ((long) hstat.st_size);
+ return;
+ }
+ to = archive_dir;
+ for (size = hstat.st_size; size > 0; size -= copied)
+ {
+ from = findrec ()->charptr;
+ if (!from)
+ {
+ msg ("Unexpected EOF in archive\n");
+ break;
+ }
+ copied = endofrecs ()->charptr - from;
+ if (copied > size)
+ copied = size;
+ bcopy ((PTR) from, (PTR) to, (int) copied);
+ to += copied;
+ userec ((union record *) (from + copied - 1));
+ }
+
+ for (cur = current_dir; *cur; cur += strlen (cur) + 1)
+ {
+ for (arc = archive_dir; *arc; arc += strlen (arc) + 1)
+ {
+ arc++;
+ if (!strcmp (arc, cur))
+ break;
+ }
+ if (*arc == '\0')
+ {
+ p = new_name (skipcrud + current_file_name, cur);
+ if (f_confirm && !confirm ("delete", p))
+ {
+ free (p);
+ continue;
+ }
+ if (f_verbose)
+ fprintf (msg_file, "%s: deleting %s\n", tar, p);
+ if (recursively_delete (p))
+ {
+ msg ("%s: Error while deleting %s\n", tar, p);
+ }
+ free (p);
+ }
+
+ }
+ flush_buffer (the_buffer);
+ free (archive_dir);
+}
+
+int
+recursively_delete (path)
+ char *path;
+{
+ struct stat sbuf;
+ DIR *dirp;
+ struct dirent *dp;
+ char *path_buf;
+ /* int path_len; */
+
+
+ if (lstat (path, &sbuf) < 0)
+ return 1;
+ if (S_ISDIR (sbuf.st_mode))
+ {
+
+ /* path_len=strlen(path); */
+ dirp = opendir (path);
+ if (dirp == 0)
+ return 1;
+ while (dp = readdir (dirp))
+ {
+ if (is_dot_or_dotdot (dp->d_name))
+ continue;
+ path_buf = new_name (path, dp->d_name);
+ if (recursively_delete (path_buf))
+ {
+ free (path_buf);
+ closedir (dirp);
+ return 1;
+ }
+ free (path_buf);
+ }
+ closedir (dirp);
+
+ if (rmdir (path) < 0)
+ return 1;
+ return 0;
+ }
+ if (unlink (path) < 0)
+ return 1;
+ return 0;
+}
diff --git a/gnu/usr.bin/tar/list.c b/gnu/usr.bin/tar/list.c
new file mode 100644
index 000000000000..a0c65a334bf2
--- /dev/null
+++ b/gnu/usr.bin/tar/list.c
@@ -0,0 +1,881 @@
+/* List a tar archive.
+ Copyright (C) 1988, 1992, 1993 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ * List a tar archive.
+ *
+ * Also includes support routines for reading a tar archive.
+ *
+ * this version written 26 Aug 1985 by John Gilmore (ihnp4!hoptoad!gnu).
+ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <errno.h>
+#ifndef STDC_HEADERS
+extern int errno;
+#endif
+#include <time.h>
+
+#ifdef BSD42
+#include <sys/file.h>
+#else
+#ifndef V7
+#include <fcntl.h>
+#endif
+#endif
+
+#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
+
+#include "tar.h"
+#include "port.h"
+
+extern FILE *msg_file;
+
+long from_oct (); /* Decode octal number */
+void demode (); /* Print file mode */
+void restore_saved_dir_info ();
+PTR ck_malloc ();
+
+union record *head; /* Points to current archive header */
+struct stat hstat; /* Stat struct corresponding */
+int head_standard; /* Tape header is in ANSI format */
+
+int check_exclude ();
+void close_archive ();
+void decode_header ();
+int findgid ();
+int finduid ();
+void name_gather ();
+int name_match ();
+void names_notfound ();
+void open_archive ();
+void print_header ();
+int read_header ();
+void saverec ();
+void skip_file ();
+void skip_extended_headers ();
+
+extern char *quote_copy_string ();
+
+
+/*
+ * Main loop for reading an archive.
+ */
+void
+read_and (do_something)
+ void (*do_something) ();
+{
+ int status = 3; /* Initial status at start of archive */
+ int prev_status;
+ extern time_t new_time;
+ char save_linkflag;
+
+ name_gather (); /* Gather all the names */
+ open_archive (1); /* Open for reading */
+
+ for (;;)
+ {
+ prev_status = status;
+ status = read_header ();
+ switch (status)
+ {
+
+ case 1: /* Valid header */
+ /* We should decode next field (mode) first... */
+ /* Ensure incoming names are null terminated. */
+
+ if (!name_match (current_file_name)
+ || (f_new_files && hstat.st_mtime < new_time)
+ || (f_exclude && check_exclude (current_file_name)))
+ {
+
+ int isextended = 0;
+
+ if (head->header.linkflag == LF_VOLHDR
+ || head->header.linkflag == LF_MULTIVOL
+ || head->header.linkflag == LF_NAMES)
+ {
+ (*do_something) ();
+ continue;
+ }
+ if (f_show_omitted_dirs
+ && head->header.linkflag == LF_DIR)
+ msg ("Omitting %s\n", current_file_name);
+ /* Skip past it in the archive */
+ if (head->header.isextended)
+ isextended = 1;
+ save_linkflag = head->header.linkflag;
+ userec (head);
+ if (isextended)
+ {
+ /* register union record *exhdr;
+
+ for (;;) {
+ exhdr = findrec();
+ if (!exhdr->ext_hdr.isextended) {
+ userec(exhdr);
+ break;
+ }
+ }
+ userec(exhdr);*/
+ skip_extended_headers ();
+ }
+ /* Skip to the next header on the archive */
+ if (save_linkflag != LF_DIR)
+ skip_file ((long) hstat.st_size);
+ continue;
+
+ }
+
+ (*do_something) ();
+ continue;
+
+ /*
+ * If the previous header was good, tell them
+ * that we are skipping bad ones.
+ */
+ case 0: /* Invalid header */
+ userec (head);
+ switch (prev_status)
+ {
+ case 3: /* Error on first record */
+ msg ("Hmm, this doesn't look like a tar archive.");
+ /* FALL THRU */
+ case 2: /* Error after record of zeroes */
+ case 1: /* Error after header rec */
+ msg ("Skipping to next file header...");
+ case 0: /* Error after error */
+ break;
+ }
+ continue;
+
+ case 2: /* Record of zeroes */
+ userec (head);
+ status = prev_status; /* If error after 0's */
+ if (f_ignorez)
+ continue;
+ /* FALL THRU */
+ case EOF: /* End of archive */
+ break;
+ }
+ break;
+ };
+
+ restore_saved_dir_info ();
+ close_archive ();
+ names_notfound (); /* Print names not found */
+}
+
+
+/*
+ * Print a header record, based on tar options.
+ */
+void
+list_archive ()
+{
+ extern char *save_name;
+ int isextended = 0; /* Flag to remember if head is extended */
+
+ /* Save the record */
+ saverec (&head);
+
+ /* Print the header record */
+ if (f_verbose)
+ {
+ if (f_verbose > 1)
+ decode_header (head, &hstat, &head_standard, 0);
+ print_header ();
+ }
+
+ if (f_gnudump && head->header.linkflag == LF_DUMPDIR)
+ {
+ size_t size, written, check;
+ char *data;
+ extern long save_totsize;
+ extern long save_sizeleft;
+
+ userec (head);
+ if (f_multivol)
+ {
+ save_name = current_file_name;
+ save_totsize = hstat.st_size;
+ }
+ for (size = hstat.st_size; size > 0; size -= written)
+ {
+ if (f_multivol)
+ save_sizeleft = size;
+ data = findrec ()->charptr;
+ if (data == NULL)
+ {
+ msg ("EOF in archive file?");
+ break;
+ }
+ written = endofrecs ()->charptr - data;
+ if (written > size)
+ written = size;
+ errno = 0;
+ check = fwrite (data, sizeof (char), written, msg_file);
+ userec ((union record *) (data + written - 1));
+ if (check != written)
+ {
+ msg_perror ("only wrote %ld of %ld bytes to file %s", check, written, current_file_name);
+ skip_file ((long) (size) - written);
+ break;
+ }
+ }
+ if (f_multivol)
+ save_name = 0;
+ saverec ((union record **) 0); /* Unsave it */
+ fputc ('\n', msg_file);
+ fflush (msg_file);
+ return;
+
+ }
+ saverec ((union record **) 0);/* Unsave it */
+ /* Check to see if we have an extended header to skip over also */
+ if (head->header.isextended)
+ isextended = 1;
+
+ /* Skip past the header in the archive */
+ userec (head);
+
+ /*
+ * If we needed to skip any extended headers, do so now, by
+ * reading extended headers and skipping past them in the
+ * archive.
+ */
+ if (isextended)
+ {
+ /* register union record *exhdr;
+
+ for (;;) {
+ exhdr = findrec();
+
+ if (!exhdr->ext_hdr.isextended) {
+ userec(exhdr);
+ break;
+ }
+ userec(exhdr);
+ }*/
+ skip_extended_headers ();
+ }
+
+ if (f_multivol)
+ save_name = current_file_name;
+ /* Skip to the next header on the archive */
+
+ skip_file ((long) hstat.st_size);
+
+ if (f_multivol)
+ save_name = 0;
+}
+
+
+/*
+ * Read a record that's supposed to be a header record.
+ * Return its address in "head", and if it is good, the file's
+ * size in hstat.st_size.
+ *
+ * Return 1 for success, 0 if the checksum is bad, EOF on eof,
+ * 2 for a record full of zeros (EOF marker).
+ *
+ * You must always userec(head) to skip past the header which this
+ * routine reads.
+ */
+int
+read_header ()
+{
+ register int i;
+ register long sum, signed_sum, recsum;
+ register char *p;
+ register union record *header;
+ long from_oct ();
+ char **longp;
+ char *bp, *data;
+ int size, written;
+ static char *next_long_name, *next_long_link;
+ char *name;
+
+recurse:
+
+ header = findrec ();
+ head = header; /* This is our current header */
+ if (NULL == header)
+ return EOF;
+
+ recsum = from_oct (8, header->header.chksum);
+
+ sum = 0;
+ p = header->charptr;
+ for (i = sizeof (*header); --i >= 0;)
+ {
+ /*
+ * We can't use unsigned char here because of old compilers,
+ * e.g. V7.
+ */
+ signed_sum += *p;
+ sum += 0xFF & *p++;
+ }
+
+ /* Adjust checksum to count the "chksum" field as blanks. */
+ for (i = sizeof (header->header.chksum); --i >= 0;)
+ {
+ sum -= 0xFF & header->header.chksum[i];
+ signed_sum -= (char) header->header.chksum[i];
+ }
+ sum += ' ' * sizeof header->header.chksum;
+ signed_sum += ' ' * sizeof header->header.chksum;
+
+ if (sum == 8 * ' ')
+ {
+ /*
+ * This is a zeroed record...whole record is 0's except
+ * for the 8 blanks we faked for the checksum field.
+ */
+ return 2;
+ }
+
+ if (sum != recsum && signed_sum != recsum)
+ return 0;
+
+ /*
+ * Good record. Decode file size and return.
+ */
+ if (header->header.linkflag == LF_LINK)
+ hstat.st_size = 0; /* Links 0 size on tape */
+ else
+ hstat.st_size = from_oct (1 + 12, header->header.size);
+
+ header->header.arch_name[NAMSIZ - 1] = '\0';
+ if (header->header.linkflag == LF_LONGNAME
+ || header->header.linkflag == LF_LONGLINK)
+ {
+ longp = ((header->header.linkflag == LF_LONGNAME)
+ ? &next_long_name
+ : &next_long_link);
+
+ userec (header);
+ if (*longp)
+ free (*longp);
+ bp = *longp = (char *) ck_malloc (hstat.st_size);
+
+ for (size = hstat.st_size;
+ size > 0;
+ size -= written)
+ {
+ data = findrec ()->charptr;
+ if (data == NULL)
+ {
+ msg ("Unexpected EOF on archive file");
+ break;
+ }
+ written = endofrecs ()->charptr - data;
+ if (written > size)
+ written = size;
+
+ bcopy (data, bp, written);
+ bp += written;
+ userec ((union record *) (data + written - 1));
+ }
+ goto recurse;
+ }
+ else
+ {
+ name = (next_long_name
+ ? next_long_name
+ : head->header.arch_name);
+ if (current_file_name)
+ free (current_file_name);
+ current_file_name = ck_malloc (strlen (name) + 1);
+ strcpy (current_file_name, name);
+
+ name = (next_long_link
+ ? next_long_link
+ : head->header.arch_linkname);
+ if (current_link_name)
+ free (current_link_name);
+ current_link_name = ck_malloc (strlen (name) + 1);
+ strcpy (current_link_name, name);
+
+ next_long_link = next_long_name = 0;
+ return 1;
+ }
+}
+
+
+/*
+ * Decode things from a file header record into a "struct stat".
+ * Also set "*stdp" to !=0 or ==0 depending whether header record is "Unix
+ * Standard" tar format or regular old tar format.
+ *
+ * read_header() has already decoded the checksum and length, so we don't.
+ *
+ * If wantug != 0, we want the uid/group info decoded from Unix Standard
+ * tapes (for extraction). If == 0, we are just printing anyway, so save time.
+ *
+ * decode_header should NOT be called twice for the same record, since the
+ * two calls might use different "wantug" values and thus might end up with
+ * different uid/gid for the two calls. If anybody wants the uid/gid they
+ * should decode it first, and other callers should decode it without uid/gid
+ * before calling a routine, e.g. print_header, that assumes decoded data.
+ */
+void
+decode_header (header, st, stdp, wantug)
+ register union record *header;
+ register struct stat *st;
+ int *stdp;
+ int wantug;
+{
+ long from_oct ();
+
+ st->st_mode = from_oct (8, header->header.mode);
+ st->st_mode &= 07777;
+ st->st_mtime = from_oct (1 + 12, header->header.mtime);
+ if (f_gnudump)
+ {
+ st->st_atime = from_oct (1 + 12, header->header.atime);
+ st->st_ctime = from_oct (1 + 12, header->header.ctime);
+ }
+
+ if (0 == strcmp (header->header.magic, TMAGIC))
+ {
+ /* Unix Standard tar archive */
+ *stdp = 1;
+ if (wantug)
+ {
+#ifdef NONAMES
+ st->st_uid = from_oct (8, header->header.uid);
+ st->st_gid = from_oct (8, header->header.gid);
+#else
+ st->st_uid =
+ (*header->header.uname
+ ? finduid (header->header.uname)
+ : from_oct (8, header->header.uid));
+ st->st_gid =
+ (*header->header.gname
+ ? findgid (header->header.gname)
+ : from_oct (8, header->header.gid));
+#endif
+ }
+#if defined(S_IFBLK) || defined(S_IFCHR)
+ switch (header->header.linkflag)
+ {
+ case LF_BLK:
+ case LF_CHR:
+ st->st_rdev = makedev (from_oct (8, header->header.devmajor),
+ from_oct (8, header->header.devminor));
+ }
+#endif
+ }
+ else
+ {
+ /* Old fashioned tar archive */
+ *stdp = 0;
+ st->st_uid = from_oct (8, header->header.uid);
+ st->st_gid = from_oct (8, header->header.gid);
+ st->st_rdev = 0;
+ }
+}
+
+
+/*
+ * Quick and dirty octal conversion.
+ *
+ * Result is -1 if the field is invalid (all blank, or nonoctal).
+ */
+long
+from_oct (digs, where)
+ register int digs;
+ register char *where;
+{
+ register long value;
+
+ while (isspace (*where))
+ { /* Skip spaces */
+ where++;
+ if (--digs <= 0)
+ return -1; /* All blank field */
+ }
+ value = 0;
+ while (digs > 0 && isodigit (*where))
+ { /* Scan til nonoctal */
+ value = (value << 3) | (*where++ - '0');
+ --digs;
+ }
+
+ if (digs > 0 && *where && !isspace (*where))
+ return -1; /* Ended on non-space/nul */
+
+ return value;
+}
+
+
+/*
+ * Actually print it.
+ *
+ * Plain and fancy file header block logging.
+ * Non-verbose just prints the name, e.g. for "tar t" or "tar x".
+ * This should just contain file names, so it can be fed back into tar
+ * with xargs or the "-T" option. The verbose option can give a bunch
+ * of info, one line per file. I doubt anybody tries to parse its
+ * format, or if they do, they shouldn't. Unix tar is pretty random here
+ * anyway.
+ *
+ * Note that print_header uses the globals <head>, <hstat>, and
+ * <head_standard>, which must be set up in advance. This is not very clean
+ * and should be cleaned up. FIXME.
+ */
+#define UGSWIDTH 18 /* min width of User, group, size */
+/* UGSWIDTH of 18 means that with user and group names <= 8 chars the columns
+ never shift during the listing. */
+#define DATEWIDTH 19 /* Last mod date */
+static int ugswidth = UGSWIDTH; /* Max width encountered so far */
+
+void
+print_header ()
+{
+ char modes[11];
+ char *timestamp;
+ char uform[11], gform[11]; /* These hold formatted ints */
+ char *user, *group;
+ char size[24]; /* Holds a formatted long or maj, min */
+ time_t longie; /* To make ctime() call portable */
+ int pad;
+ char *name;
+ extern long baserec;
+
+ if (f_sayblock)
+ fprintf (msg_file, "rec %10d: ", baserec + (ar_record - ar_block));
+ /* annofile(msg_file, (char *)NULL); */
+
+ if (f_verbose <= 1)
+ {
+ /* Just the fax, mam. */
+ char *name;
+
+ name = quote_copy_string (current_file_name);
+ if (name == 0)
+ name = current_file_name;
+ fprintf (msg_file, "%s\n", name);
+ if (name != current_file_name)
+ free (name);
+ }
+ else
+ {
+ /* File type and modes */
+ modes[0] = '?';
+ switch (head->header.linkflag)
+ {
+ case LF_VOLHDR:
+ modes[0] = 'V';
+ break;
+
+ case LF_MULTIVOL:
+ modes[0] = 'M';
+ break;
+
+ case LF_NAMES:
+ modes[0] = 'N';
+ break;
+
+ case LF_LONGNAME:
+ case LF_LONGLINK:
+ msg ("Visible longname error\n");
+ break;
+
+ case LF_SPARSE:
+ case LF_NORMAL:
+ case LF_OLDNORMAL:
+ case LF_LINK:
+ modes[0] = '-';
+ if ('/' == current_file_name[strlen (current_file_name) - 1])
+ modes[0] = 'd';
+ break;
+ case LF_DUMPDIR:
+ modes[0] = 'd';
+ break;
+ case LF_DIR:
+ modes[0] = 'd';
+ break;
+ case LF_SYMLINK:
+ modes[0] = 'l';
+ break;
+ case LF_BLK:
+ modes[0] = 'b';
+ break;
+ case LF_CHR:
+ modes[0] = 'c';
+ break;
+ case LF_FIFO:
+ modes[0] = 'p';
+ break;
+ case LF_CONTIG:
+ modes[0] = 'C';
+ break;
+ }
+
+ demode ((unsigned) hstat.st_mode, modes + 1);
+
+ /* Timestamp */
+ longie = hstat.st_mtime;
+ timestamp = ctime (&longie);
+ timestamp[16] = '\0';
+ timestamp[24] = '\0';
+
+ /* User and group names */
+ if (*head->header.uname && head_standard)
+ {
+ user = head->header.uname;
+ }
+ else
+ {
+ user = uform;
+ (void) sprintf (uform, "%d",
+ from_oct (8, head->header.uid));
+ }
+ if (*head->header.gname && head_standard)
+ {
+ group = head->header.gname;
+ }
+ else
+ {
+ group = gform;
+ (void) sprintf (gform, "%d",
+ from_oct (8, head->header.gid));
+ }
+
+ /* Format the file size or major/minor device numbers */
+ switch (head->header.linkflag)
+ {
+#if defined(S_IFBLK) || defined(S_IFCHR)
+ case LF_CHR:
+ case LF_BLK:
+ (void) sprintf (size, "%d,%d",
+ major (hstat.st_rdev),
+ minor (hstat.st_rdev));
+ break;
+#endif
+ case LF_SPARSE:
+ (void) sprintf (size, "%ld",
+ from_oct (1 + 12, head->header.realsize));
+ break;
+ default:
+ (void) sprintf (size, "%ld", (long) hstat.st_size);
+ }
+
+ /* Figure out padding and print the whole line. */
+ pad = strlen (user) + strlen (group) + strlen (size) + 1;
+ if (pad > ugswidth)
+ ugswidth = pad;
+
+ name = quote_copy_string (current_file_name);
+ if (!name)
+ name = current_file_name;
+ fprintf (msg_file, "%s %s/%s %*s%s %s %s %s",
+ modes,
+ user,
+ group,
+ ugswidth - pad,
+ "",
+ size,
+ timestamp + 4, timestamp + 20,
+ name);
+
+ if (name != current_file_name)
+ free (name);
+ switch (head->header.linkflag)
+ {
+ case LF_SYMLINK:
+ name = quote_copy_string (current_link_name);
+ if (!name)
+ name = current_link_name;
+ fprintf (msg_file, " -> %s\n", name);
+ if (name != current_link_name)
+ free (name);
+ break;
+
+ case LF_LINK:
+ name = quote_copy_string (current_link_name);
+ if (!name)
+ name = current_link_name;
+ fprintf (msg_file, " link to %s\n", current_link_name);
+ if (name != current_link_name)
+ free (name);
+ break;
+
+ default:
+ fprintf (msg_file, " unknown file type '%c'\n",
+ head->header.linkflag);
+ break;
+
+ case LF_OLDNORMAL:
+ case LF_NORMAL:
+ case LF_SPARSE:
+ case LF_CHR:
+ case LF_BLK:
+ case LF_DIR:
+ case LF_FIFO:
+ case LF_CONTIG:
+ case LF_DUMPDIR:
+ putc ('\n', msg_file);
+ break;
+
+ case LF_VOLHDR:
+ fprintf (msg_file, "--Volume Header--\n");
+ break;
+
+ case LF_MULTIVOL:
+ fprintf (msg_file, "--Continued at byte %ld--\n", from_oct (1 + 12, head->header.offset));
+ break;
+
+ case LF_NAMES:
+ fprintf (msg_file, "--Mangled file names--\n");
+ break;
+ }
+ }
+ fflush (msg_file);
+}
+
+/*
+ * Print a similar line when we make a directory automatically.
+ */
+void
+pr_mkdir (pathname, length, mode)
+ char *pathname;
+ int length;
+ int mode;
+{
+ char modes[11];
+ char *name;
+ extern long baserec;
+
+ if (f_verbose > 1)
+ {
+ /* File type and modes */
+ modes[0] = 'd';
+ demode ((unsigned) mode, modes + 1);
+
+ if (f_sayblock)
+ fprintf (msg_file, "rec %10d: ", baserec + (ar_record - ar_block));
+ /* annofile(msg_file, (char *)NULL); */
+ name = quote_copy_string (pathname);
+ if (!name)
+ name = pathname;
+ fprintf (msg_file, "%s %*s %.*s\n",
+ modes,
+ ugswidth + DATEWIDTH,
+ "Creating directory:",
+ length,
+ pathname);
+ if (name != pathname)
+ free (name);
+ }
+}
+
+
+/*
+ * Skip over <size> bytes of data in records in the archive.
+ */
+void
+skip_file (size)
+ register long size;
+{
+ union record *x;
+ extern long save_totsize;
+ extern long save_sizeleft;
+
+ if (f_multivol)
+ {
+ save_totsize = size;
+ save_sizeleft = size;
+ }
+
+ while (size > 0)
+ {
+ x = findrec ();
+ if (x == NULL)
+ { /* Check it... */
+ msg ("Unexpected EOF on archive file");
+ exit (EX_BADARCH);
+ }
+ userec (x);
+ size -= RECORDSIZE;
+ if (f_multivol)
+ save_sizeleft -= RECORDSIZE;
+ }
+}
+
+void
+skip_extended_headers ()
+{
+ register union record *exhdr;
+
+ for (;;)
+ {
+ exhdr = findrec ();
+ if (!exhdr->ext_hdr.isextended)
+ {
+ userec (exhdr);
+ break;
+ }
+ userec (exhdr);
+ }
+}
+
+/*
+ * Decode the mode string from a stat entry into a 9-char string and a null.
+ */
+void
+demode (mode, string)
+ register unsigned mode;
+ register char *string;
+{
+ register unsigned mask;
+ register char *rwx = "rwxrwxrwx";
+
+ for (mask = 0400; mask != 0; mask >>= 1)
+ {
+ if (mode & mask)
+ *string++ = *rwx++;
+ else
+ {
+ *string++ = '-';
+ rwx++;
+ }
+ }
+
+ if (mode & S_ISUID)
+ if (string[-7] == 'x')
+ string[-7] = 's';
+ else
+ string[-7] = 'S';
+ if (mode & S_ISGID)
+ if (string[-4] == 'x')
+ string[-4] = 's';
+ else
+ string[-4] = 'S';
+ if (mode & S_ISVTX)
+ if (string[-1] == 'x')
+ string[-1] = 't';
+ else
+ string[-1] = 'T';
+ *string = '\0';
+}
diff --git a/gnu/usr.bin/tar/mangle.c b/gnu/usr.bin/tar/mangle.c
new file mode 100644
index 000000000000..628168473a1b
--- /dev/null
+++ b/gnu/usr.bin/tar/mangle.c
@@ -0,0 +1,270 @@
+/* mangle.c -- encode long filenames
+ Copyright (C) 1988, 1992 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <time.h>
+time_t time ();
+
+#include "tar.h"
+#include "port.h"
+
+void add_buffer ();
+extern PTR ck_malloc ();
+void finish_header ();
+extern PTR init_buffer ();
+extern char *quote_copy_string ();
+extern char *get_buffer ();
+char *un_quote_string ();
+
+extern union record *start_header ();
+
+extern struct stat hstat; /* Stat struct corresponding */
+
+struct mangled
+ {
+ struct mangled *next;
+ int type;
+ char mangled[NAMSIZ];
+ char *linked_to;
+ char normal[1];
+ };
+
+
+/* Should use a hash table, etc. . */
+struct mangled *first_mangle;
+int mangled_num = 0;
+
+#if 0 /* Deleted because there is now a better way to do all this */
+
+char *
+find_mangled (name)
+ char *name;
+{
+ struct mangled *munge;
+
+ for (munge = first_mangle; munge; munge = munge->next)
+ if (!strcmp (name, munge->normal))
+ return munge->mangled;
+ return 0;
+}
+
+
+#ifdef S_ISLNK
+void
+add_symlink_mangle (symlink, linkto, buffer)
+ char *symlink;
+ char *linkto;
+ char *buffer;
+{
+ struct mangled *munge, *kludge;
+
+ munge = (struct mangled *) ck_malloc (sizeof (struct mangled) + strlen (symlink) + strlen (linkto) + 2);
+ if (!first_mangle)
+ first_mangle = munge;
+ else
+ {
+ for (kludge = first_mangle; kludge->next; kludge = kludge->next)
+ ;
+ kludge->next = munge;
+ }
+ munge->type = 1;
+ munge->next = 0;
+ strcpy (munge->normal, symlink);
+ munge->linked_to = munge->normal + strlen (symlink) + 1;
+ strcpy (munge->linked_to, linkto);
+ sprintf (munge->mangled, "@@MaNgLeD.%d", mangled_num++);
+ strncpy (buffer, munge->mangled, NAMSIZ);
+}
+
+#endif
+
+void
+add_mangle (name, buffer)
+ char *name;
+ char *buffer;
+{
+ struct mangled *munge, *kludge;
+
+ munge = (struct mangled *) ck_malloc (sizeof (struct mangled) + strlen (name));
+ if (!first_mangle)
+ first_mangle = munge;
+ else
+ {
+ for (kludge = first_mangle; kludge->next; kludge = kludge->next)
+ ;
+ kludge->next = munge;
+ }
+ munge->next = 0;
+ munge->type = 0;
+ strcpy (munge->normal, name);
+ sprintf (munge->mangled, "@@MaNgLeD.%d", mangled_num++);
+ strncpy (buffer, munge->mangled, NAMSIZ);
+}
+
+void
+write_mangled ()
+{
+ struct mangled *munge;
+ struct stat hstat;
+ union record *header;
+ char *ptr1, *ptr2;
+ PTR the_buffer;
+ int size;
+ int bufsize;
+
+ if (!first_mangle)
+ return;
+ the_buffer = init_buffer ();
+ for (munge = first_mangle, size = 0; munge; munge = munge->next)
+ {
+ ptr1 = quote_copy_string (munge->normal);
+ if (!ptr1)
+ ptr1 = munge->normal;
+ if (munge->type)
+ {
+ add_buffer (the_buffer, "Symlink ", 8);
+ add_buffer (the_buffer, ptr1, strlen (ptr1));
+ add_buffer (the_buffer, " to ", 4);
+
+ if (ptr2 = quote_copy_string (munge->linked_to))
+ {
+ add_buffer (the_buffer, ptr2, strlen (ptr2));
+ free (ptr2);
+ }
+ else
+ add_buffer (the_buffer, munge->linked_to, strlen (munge->linked_to));
+ }
+ else
+ {
+ add_buffer (the_buffer, "Rename ", 7);
+ add_buffer (the_buffer, munge->mangled, strlen (munge->mangled));
+ add_buffer (the_buffer, " to ", 4);
+ add_buffer (the_buffer, ptr1, strlen (ptr1));
+ }
+ add_buffer (the_buffer, "\n", 1);
+ if (ptr1 != munge->normal)
+ free (ptr1);
+ }
+
+ bzero (&hstat, sizeof (struct stat));
+ hstat.st_atime = hstat.st_mtime = hstat.st_ctime = time (0);
+ ptr1 = get_buffer (the_buffer);
+ hstat.st_size = strlen (ptr1);
+
+ header = start_header ("././@MaNgLeD_NaMeS", &hstat);
+ header->header.linkflag = LF_NAMES;
+ finish_header (header);
+ size = hstat.st_size;
+ header = findrec ();
+ bufsize = endofrecs ()->charptr - header->charptr;
+
+ while (bufsize < size)
+ {
+ bcopy (ptr1, header->charptr, bufsize);
+ ptr1 += bufsize;
+ size -= bufsize;
+ userec (header + (bufsize - 1) / RECORDSIZE);
+ header = findrec ();
+ bufsize = endofrecs ()->charptr - header->charptr;
+ }
+ bcopy (ptr1, header->charptr, size);
+ bzero (header->charptr + size, bufsize - size);
+ userec (header + (size - 1) / RECORDSIZE);
+}
+
+#endif
+
+void
+extract_mangle (head)
+ union record *head;
+{
+ char *buf;
+ char *fromtape;
+ char *to;
+ char *ptr, *ptrend;
+ char *nam1, *nam1end;
+ int size;
+ int copied;
+
+ size = hstat.st_size;
+ buf = to = ck_malloc (size + 1);
+ buf[size] = '\0';
+ while (size > 0)
+ {
+ fromtape = findrec ()->charptr;
+ if (fromtape == 0)
+ {
+ msg ("Unexpected EOF in mangled names!");
+ return;
+ }
+ copied = endofrecs ()->charptr - fromtape;
+ if (copied > size)
+ copied = size;
+ bcopy (fromtape, to, copied);
+ to += copied;
+ size -= copied;
+ userec ((union record *) (fromtape + copied - 1));
+ }
+ for (ptr = buf; *ptr; ptr = ptrend)
+ {
+ ptrend = index (ptr, '\n');
+ *ptrend++ = '\0';
+
+ if (!strncmp (ptr, "Rename ", 7))
+ {
+ nam1 = ptr + 7;
+ nam1end = index (nam1, ' ');
+ while (strncmp (nam1end, " to ", 4))
+ {
+ nam1end++;
+ nam1end = index (nam1end, ' ');
+ }
+ *nam1end = '\0';
+ if (ptrend[-2] == '/')
+ ptrend[-2] = '\0';
+ un_quote_string (nam1end + 4);
+ if (rename (nam1, nam1end + 4))
+ msg_perror ("Can't rename %s to %s", nam1, nam1end + 4);
+ else if (f_verbose)
+ msg ("Renamed %s to %s", nam1, nam1end + 4);
+ }
+#ifdef S_ISLNK
+ else if (!strncmp (ptr, "Symlink ", 8))
+ {
+ nam1 = ptr + 8;
+ nam1end = index (nam1, ' ');
+ while (strncmp (nam1end, " to ", 4))
+ {
+ nam1end++;
+ nam1end = index (nam1end, ' ');
+ }
+ *nam1end = '\0';
+ un_quote_string (nam1);
+ un_quote_string (nam1end + 4);
+ if (symlink (nam1, nam1end + 4) && (unlink (nam1end + 4) || symlink (nam1, nam1end + 4)))
+ msg_perror ("Can't symlink %s to %s", nam1, nam1end + 4);
+ else if (f_verbose)
+ msg ("Symlinkd %s to %s", nam1, nam1end + 4);
+ }
+#endif
+ else
+ msg ("Unknown demangling command %s", ptr);
+ }
+}
diff --git a/gnu/usr.bin/tar/msd_dir.h b/gnu/usr.bin/tar/msd_dir.h
new file mode 100644
index 000000000000..06c7a644b42d
--- /dev/null
+++ b/gnu/usr.bin/tar/msd_dir.h
@@ -0,0 +1,44 @@
+/*
+ * @(#)msd_dir.h 1.4 87/11/06 Public Domain.
+ *
+ * A public domain implementation of BSD directory routines for
+ * MS-DOS. Written by Michael Rendell ({uunet,utai}michael@garfield),
+ * August 1897
+ */
+
+#define rewinddir(dirp) seekdir(dirp, 0L)
+
+#define MAXNAMLEN 12
+
+#ifdef __TURBOC__
+typedef int ino_t;
+typedef int dev_t;
+#endif
+
+struct dirent
+ {
+ ino_t d_ino; /* a bit of a farce */
+ int d_reclen; /* more farce */
+ int d_namlen; /* length of d_name */
+ char d_name[MAXNAMLEN + 1]; /* garentee null termination */
+ };
+
+struct _dircontents
+ {
+ char *_d_entry;
+ struct _dircontents *_d_next;
+ };
+
+typedef struct _dirdesc
+ {
+ int dd_id; /* uniquely identify each open directory */
+ long dd_loc; /* where we are in directory entry is this */
+ struct _dircontents *dd_contents; /* pointer to contents of dir */
+ struct _dircontents *dd_cp; /* pointer to current position */
+ } DIR;
+
+extern DIR *opendir ();
+extern struct dirent *readdir ();
+extern void seekdir ();
+extern long telldir ();
+extern void closedir ();
diff --git a/gnu/usr.bin/tar/names.c b/gnu/usr.bin/tar/names.c
new file mode 100644
index 000000000000..0de6a8898a30
--- /dev/null
+++ b/gnu/usr.bin/tar/names.c
@@ -0,0 +1,149 @@
+/* Look up user and/or group names.
+ Copyright (C) 1988, 1992 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ * Look up user and/or group names.
+ *
+ * This file should be modified for non-unix systems to do something
+ * reasonable.
+ */
+
+#include <sys/types.h>
+#include "tar.h"
+#include "port.h"
+
+#ifndef NONAMES
+/* Whole module goes away if NONAMES defined. Otherwise... */
+#include <stdio.h>
+#include <pwd.h>
+#include <grp.h>
+
+static int saveuid = -993;
+static char saveuname[TUNMLEN];
+static int my_uid = -993;
+
+static int savegid = -993;
+static char savegname[TGNMLEN];
+static int my_gid = -993;
+
+#define myuid ( my_uid < 0? (my_uid = getuid()): my_uid )
+#define mygid ( my_gid < 0? (my_gid = getgid()): my_gid )
+
+/*
+ * Look up a user or group name from a uid/gid, maintaining a cache.
+ * FIXME, for now it's a one-entry cache.
+ * FIXME2, the "-993" is to reduce the chance of a hit on the first lookup.
+ *
+ * This is ifdef'd because on Suns, it drags in about 38K of "yellow
+ * pages" code, roughly doubling the program size. Thanks guys.
+ */
+void
+finduname (uname, uid)
+ char uname[TUNMLEN];
+ int uid;
+{
+ struct passwd *pw;
+#ifndef HAVE_GETPWUID
+ extern struct passwd *getpwuid ();
+#endif
+
+ if (uid != saveuid)
+ {
+ saveuid = uid;
+ saveuname[0] = '\0';
+ pw = getpwuid (uid);
+ if (pw)
+ strncpy (saveuname, pw->pw_name, TUNMLEN);
+ }
+ strncpy (uname, saveuname, TUNMLEN);
+}
+
+int
+finduid (uname)
+ char uname[TUNMLEN];
+{
+ struct passwd *pw;
+ extern struct passwd *getpwnam ();
+
+ if (uname[0] != saveuname[0] /* Quick test w/o proc call */
+ || 0 != strncmp (uname, saveuname, TUNMLEN))
+ {
+ strncpy (saveuname, uname, TUNMLEN);
+ pw = getpwnam (uname);
+ if (pw)
+ {
+ saveuid = pw->pw_uid;
+ }
+ else
+ {
+ saveuid = myuid;
+ }
+ }
+ return saveuid;
+}
+
+
+void
+findgname (gname, gid)
+ char gname[TGNMLEN];
+ int gid;
+{
+ struct group *gr;
+#ifndef HAVE_GETGRGID
+ extern struct group *getgrgid ();
+#endif
+
+ if (gid != savegid)
+ {
+ savegid = gid;
+ savegname[0] = '\0';
+ (void) setgrent ();
+ gr = getgrgid (gid);
+ if (gr)
+ strncpy (savegname, gr->gr_name, TGNMLEN);
+ }
+ (void) strncpy (gname, savegname, TGNMLEN);
+}
+
+
+int
+findgid (gname)
+ char gname[TUNMLEN];
+{
+ struct group *gr;
+ extern struct group *getgrnam ();
+
+ if (gname[0] != savegname[0] /* Quick test w/o proc call */
+ || 0 != strncmp (gname, savegname, TUNMLEN))
+ {
+ strncpy (savegname, gname, TUNMLEN);
+ gr = getgrnam (gname);
+ if (gr)
+ {
+ savegid = gr->gr_gid;
+ }
+ else
+ {
+ savegid = mygid;
+ }
+ }
+ return savegid;
+}
+
+#endif
diff --git a/gnu/usr.bin/tar/open3.h b/gnu/usr.bin/tar/open3.h
new file mode 100644
index 000000000000..c1c0e59b6761
--- /dev/null
+++ b/gnu/usr.bin/tar/open3.h
@@ -0,0 +1,67 @@
+/* Defines for Sys V style 3-argument open call.
+ Copyright (C) 1988 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ * open3.h -- #defines for the various flags for the Sys V style 3-argument
+ * open() call. On BSD or System 5, the system already has this in an
+ * include file. This file is needed for V7 and MINIX systems for the
+ * benefit of open3() in port.c, a routine that emulates the 3-argument
+ * call using system calls available on V7/MINIX.
+ *
+ * This file is needed by PD tar even if we aren't using the
+ * emulator, since the #defines for O_WRONLY, etc. are used in
+ * a couple of places besides the open() calls, (e.g. in the assignment
+ * to openflag in extract.c). We just #include this rather than
+ * #ifdef them out.
+ *
+ * Written 6/10/87 by rmtodd@uokmax (Richard Todd).
+ *
+ * The names have been changed by John Gilmore, 31 July 1987, since
+ * Richard called it "bsdopen", and really this change was introduced in
+ * AT&T Unix systems before BSD picked it up.
+ */
+
+/* Only one of the next three should be specified */
+#define O_RDONLY 0 /* only allow read */
+#define O_WRONLY 1 /* only allow write */
+#define O_RDWR 2 /* both are allowed */
+
+/* The rest of these can be OR-ed in to the above. */
+/*
+ * O_NDELAY isn't implemented by the emulator. It's only useful (to tar) on
+ * systems that have named pipes anyway; it prevents tar's hanging by
+ * opening a named pipe. We #ifndef it because some systems already have
+ * it defined.
+ */
+#ifndef O_NDELAY
+#define O_NDELAY 4 /* don't block on opening devices that would
+ * block on open -- ignored by emulator. */
+#endif
+#define O_CREAT 8 /* create file if needed */
+#define O_EXCL 16 /* file cannot already exist */
+#define O_TRUNC 32 /* truncate file on open */
+#define O_APPEND 64 /* always write at end of file -- ignored by emul */
+
+#ifdef EMUL_OPEN3
+/*
+ * make emulation transparent to rest of file -- redirect all open() calls
+ * to our routine
+ */
+#define open open3
+#endif
diff --git a/gnu/usr.bin/tar/pathmax.h b/gnu/usr.bin/tar/pathmax.h
new file mode 100644
index 000000000000..aeba9f7d2186
--- /dev/null
+++ b/gnu/usr.bin/tar/pathmax.h
@@ -0,0 +1,53 @@
+/* Define PATH_MAX somehow. Requires sys/types.h.
+ Copyright (C) 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef _PATHMAX_H
+#define _PATHMAX_H
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+/* Non-POSIX BSD systems might have gcc's limits.h, which doesn't define
+ PATH_MAX but might cause redefinition warnings when sys/param.h is
+ later included (as on MORE/BSD 4.3). */
+#if defined(_POSIX_VERSION) || (defined(HAVE_LIMITS_H) && defined(USG))
+#include <limits.h>
+#endif
+
+#ifndef _POSIX_PATH_MAX
+#define _POSIX_PATH_MAX 255
+#endif
+
+#if !defined(PATH_MAX) && defined(_PC_PATH_MAX)
+#define PATH_MAX (pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 : pathconf ("/", _PC_PATH_MAX))
+#endif
+
+/* Don't include sys/param.h if it already has been. */
+#if !defined(PATH_MAX) && !defined(MAXPATHLEN) && !defined(__MSDOS__)
+#include <sys/param.h>
+#endif
+
+#if !defined(PATH_MAX) && defined(MAXPATHLEN)
+#define PATH_MAX MAXPATHLEN
+#endif
+
+#ifndef PATH_MAX
+#define PATH_MAX _POSIX_PATH_MAX
+#endif
+
+#endif /* _PATHMAX_H */
diff --git a/gnu/usr.bin/tar/port.c b/gnu/usr.bin/tar/port.c
new file mode 100644
index 000000000000..10ec32ed7288
--- /dev/null
+++ b/gnu/usr.bin/tar/port.c
@@ -0,0 +1,1256 @@
+/* Supporting routines which may sometimes be missing.
+ Copyright (C) 1988, 1992 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+#ifndef STDC_HEADERS
+extern int errno;
+#endif
+
+#ifdef BSD42
+#include <sys/file.h>
+#else
+#ifndef V7
+#include <fcntl.h>
+#endif
+#endif
+
+#include "tar.h"
+#include "port.h"
+
+extern long baserec;
+
+/* All machine-dependent #ifdefs should appear here, instead of
+ being scattered through the file. For UN*X systems, it is better to
+ figure out what is needed in the configure script, for most of the
+ features. */
+
+#ifdef __MSDOS__
+char TTY_NAME[] = "con";
+#define HAVE_STRSTR
+#define HAVE_RENAME
+#define HAVE_MKDIR
+#else
+char TTY_NAME[] = "/dev/tty";
+#endif
+
+/* End of system-dependent #ifdefs */
+
+
+#ifndef HAVE_VALLOC
+/*
+ * valloc() does a malloc() on a page boundary. On some systems,
+ * this can make large block I/O more efficient.
+ */
+char *
+valloc (size)
+ unsigned size;
+{
+ return (malloc (size));
+}
+
+#endif /* !HAVE_VALLOC */
+
+#ifndef HAVE_MKDIR
+/*
+ * Written by Robert Rother, Mariah Corporation, August 1985.
+ *
+ * If you want it, it's yours. All I ask in return is that if you
+ * figure out how to do this in a Bourne Shell script you send me
+ * a copy.
+ * sdcsvax!rmr or rmr@uscd
+ *
+ * Severely hacked over by John Gilmore to make a 4.2BSD compatible
+ * subroutine. 11Mar86; hoptoad!gnu
+ *
+ * Modified by rmtodd@uokmax 6-28-87 -- when making an already existing dir,
+ * subroutine didn't return EEXIST. It does now.
+ */
+
+/*
+ * Make a directory.
+ */
+int
+mkdir (dpath, dmode)
+ char *dpath;
+ int dmode;
+{
+ int cpid, status;
+ struct stat statbuf;
+
+ if (stat (dpath, &statbuf) == 0)
+ {
+ errno = EEXIST; /* Stat worked, so it already exists */
+ return -1;
+ }
+
+ /* If stat fails for a reason other than non-existence, return error */
+ if (errno != ENOENT)
+ return -1;
+
+ switch (cpid = fork ())
+ {
+
+ case -1: /* Error in fork() */
+ return (-1); /* Errno is set already */
+
+ case 0: /* Child process */
+ /*
+ * Cheap hack to set mode of new directory. Since this
+ * child process is going away anyway, we zap its umask.
+ * FIXME, this won't suffice to set SUID, SGID, etc. on this
+ * directory. Does anybody care?
+ */
+ status = umask (0); /* Get current umask */
+ status = umask (status | (0777 & ~dmode)); /* Set for mkdir */
+ execl ("/bin/mkdir", "mkdir", dpath, (char *) 0);
+ _exit (-1); /* Can't exec /bin/mkdir */
+
+ default: /* Parent process */
+ while (cpid != wait (&status)); /* Wait for kid to finish */
+ }
+
+ if (WIFSIGNALED (status) || WEXITSTATUS (status) != 0)
+ {
+ errno = EIO; /* We don't know why, but */
+ return -1; /* /bin/mkdir failed */
+ }
+
+ return 0;
+}
+
+int
+rmdir (dpath)
+ char *dpath;
+{
+ int cpid, status;
+ struct stat statbuf;
+
+ if (stat (dpath, &statbuf) != 0)
+ {
+ /* Stat just set errno. We don't have to */
+ return -1;
+ }
+
+ switch (cpid = fork ())
+ {
+
+ case -1: /* Error in fork() */
+ return (-1); /* Errno is set already */
+
+ case 0: /* Child process */
+ execl ("/bin/rmdir", "rmdir", dpath, (char *) 0);
+ _exit (-1); /* Can't exec /bin/mkdir */
+
+ default: /* Parent process */
+ while (cpid != wait (&status)); /* Wait for kid to finish */
+ }
+
+ if (WIFSIGNALED (status) || WEXITSTATUS (status) != 0)
+ {
+ errno = EIO; /* We don't know why, but */
+ return -1; /* /bin/mkdir failed */
+ }
+
+ return 0;
+}
+
+#endif /* !HAVE_MKDIR */
+
+#ifndef HAVE_RENAME
+/* Rename file FROM to file TO.
+ Return 0 if successful, -1 if not. */
+
+int
+rename (from, to)
+ char *from;
+ char *to;
+{
+ struct stat from_stats;
+
+ if (stat (from, &from_stats))
+ return -1;
+
+ if (unlink (to) && errno != ENOENT)
+ return -1;
+
+ if (link (from, to))
+ return -1;
+
+ if (unlink (from) && errno != ENOENT)
+ {
+ unlink (to);
+ return -1;
+ }
+
+ return 0;
+}
+
+#endif /* !HAVE_RENAME */
+
+#ifdef minix
+/* Minix has bcopy but not bzero, and no memset. Thanks, Andy. */
+void
+bzero (s1, n)
+ register char *s1;
+ register int n;
+{
+ while (n--)
+ *s1++ = '\0';
+}
+
+/* It also has no bcmp() */
+int
+bcmp (s1, s2, n)
+ register char *s1, *s2;
+ register int n;
+{
+ for (; n--; ++s1, ++s2)
+ {
+ if (*s1 != *s2)
+ return *s1 - *s2;
+ }
+ return 0;
+}
+
+/*
+ * Groan, Minix doesn't have execlp either!
+ *
+ * execlp(file,arg0,arg1...argn,(char *)NULL)
+ * exec a program, automatically searching for the program through
+ * all the directories on the PATH.
+ *
+ * This version is naive about variable argument lists, it assumes
+ * a straightforward C calling sequence. If your system has odd stacks
+ * *and* doesn't have execlp, YOU get to fix it.
+ */
+int
+execlp (filename, arg0)
+ char *filename, *arg0;
+{
+ register char *p, *path;
+ register char *fnbuffer;
+ char **argstart = &arg0;
+ struct stat statbuf;
+ extern char **environ;
+
+ if ((p = getenv ("PATH")) == NULL)
+ {
+ /* couldn't find path variable -- try to exec given filename */
+ return execve (filename, argstart, environ);
+ }
+
+ /*
+ * make a place to build the filename. We malloc larger than we
+ * need, but we know it will fit in this.
+ */
+ fnbuffer = malloc (strlen (p) + 1 + strlen (filename));
+ if (fnbuffer == NULL)
+ {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ /*
+ * try each component of the path to see if the file's there
+ * and executable.
+ */
+ for (path = p; path; path = p)
+ {
+ /* construct full path name to try */
+ if ((p = index (path, ':')) == NULL)
+ {
+ strcpy (fnbuffer, path);
+ }
+ else
+ {
+ strncpy (fnbuffer, path, p - path);
+ fnbuffer[p - path] = '\0';
+ p++; /* Skip : for next time */
+ }
+ if (strlen (fnbuffer) != 0)
+ strcat (fnbuffer, "/");
+ strcat (fnbuffer, filename);
+
+ /* check to see if file is there and is a normal file */
+ if (stat (fnbuffer, &statbuf) < 0)
+ {
+ if (errno == ENOENT)
+ continue; /* file not there,keep on looking */
+ else
+ goto fail; /* failed for some reason, return */
+ }
+ if (!S_ISREG (statbuf.st_mode))
+ continue;
+
+ if (execve (fnbuffer, argstart, environ) < 0
+ && errno != ENOENT
+ && errno != ENOEXEC)
+ {
+ /* failed, for some other reason besides "file
+ * not found" or "not a.out format"
+ */
+ goto fail;
+ }
+
+ /*
+ * If we got error ENOEXEC, the file is executable but is
+ * not an object file. Try to execute it as a shell script,
+ * returning error if we can't execute /bin/sh.
+ *
+ * FIXME, this code is broken in several ways. Shell
+ * scripts should not in general be executed by the user's
+ * SHELL variable program. On more mature systems, the
+ * script can specify with #!/bin/whatever. Also, this
+ * code clobbers argstart[-1] if the exec of the shell
+ * fails.
+ */
+ if (errno == ENOEXEC)
+ {
+ char *shell;
+
+ /* Try to execute command "sh arg0 arg1 ..." */
+ if ((shell = getenv ("SHELL")) == NULL)
+ shell = "/bin/sh";
+ argstart[-1] = shell;
+ argstart[0] = fnbuffer;
+ execve (shell, &argstart[-1], environ);
+ goto fail; /* Exec didn't work */
+ }
+
+ /*
+ * If we succeeded, the execve() doesn't return, so we
+ * can only be here is if the file hasn't been found yet.
+ * Try the next place on the path.
+ */
+ }
+
+ /* all attempts failed to locate the file. Give up. */
+ errno = ENOENT;
+
+fail:
+ free (fnbuffer);
+ return -1;
+}
+
+#endif /* minix */
+
+
+#ifdef EMUL_OPEN3
+#include "open3.h"
+/*
+ * open3 -- routine to emulate the 3-argument open system
+ * call that is present in most modern Unix systems.
+ * This version attempts to support all the flag bits except for O_NDELAY
+ * and O_APPEND, which are silently ignored. The emulation is not as efficient
+ * as the real thing (at worst, 4 system calls instead of one), but there's
+ * not much I can do about that.
+ *
+ * Written 6/10/87 by rmtodd@uokmax
+ *
+ * open3(path, flag, mode)
+ * Attempts to open the file specified by
+ * the given pathname. The following flag bits (#defined in tar.h)
+ * specify options to the routine:
+ * O_RDONLY file open for read only
+ * O_WRONLY file open for write only
+ * O_RDWR file open for both read & write
+ * (Needless to say, you should only specify one of the above).
+ * O_CREAT file is created with specified mode if it needs to be.
+ * O_TRUNC if file exists, it is truncated to 0 bytes
+ * O_EXCL used with O_CREAT--routine returns error if file exists
+ * Function returns file descriptor if successful, -1 and errno if not.
+ */
+
+/*
+ * array to give arguments to access for various modes
+ * FIXME, this table depends on the specific integer values of O_XXX,
+ * and also contains integers (args to 'access') that should be #define's.
+ */
+static int modes[] =
+{
+ 04, /* O_RDONLY */
+ 02, /* O_WRONLY */
+ 06, /* O_RDWR */
+ 06, /* invalid but we'd better cope -- O_WRONLY+O_RDWR */
+};
+
+/* Shut off the automatic emulation of open(), we'll need it. */
+#undef open
+
+int
+open3 (path, flags, mode)
+ char *path;
+ int flags, mode;
+{
+ int exists = 1;
+ int call_creat = 0;
+ int fd;
+ /*
+ * We actually do the work by calling the open() or creat() system
+ * call, depending on the flags. Call_creat is true if we will use
+ * creat(), false if we will use open().
+ */
+
+ /*
+ * See if the file exists and is accessible in the requested mode.
+ *
+ * Strictly speaking we shouldn't be using access, since access checks
+ * against real uid, and the open call should check against euid.
+ * Most cases real uid == euid, so it won't matter. FIXME.
+ * FIXME, the construction "flags & 3" and the modes table depends
+ * on the specific integer values of the O_XXX #define's. Foo!
+ */
+ if (access (path, modes[flags & 3]) < 0)
+ {
+ if (errno == ENOENT)
+ {
+ /* the file does not exist */
+ exists = 0;
+ }
+ else
+ {
+ /* probably permission violation */
+ if (flags & O_EXCL)
+ {
+ /* Oops, the file exists, we didn't want it. */
+ /* No matter what the error, claim EEXIST. */
+ errno = EEXIST;
+ }
+ return -1;
+ }
+ }
+
+ /* if we have the O_CREAT bit set, check for O_EXCL */
+ if (flags & O_CREAT)
+ {
+ if ((flags & O_EXCL) && exists)
+ {
+ /* Oops, the file exists and we didn't want it to. */
+ errno = EEXIST;
+ return -1;
+ }
+ /*
+ * If the file doesn't exist, be sure to call creat() so that
+ * it will be created with the proper mode.
+ */
+ if (!exists)
+ call_creat = 1;
+ }
+ else
+ {
+ /* If O_CREAT isn't set and the file doesn't exist, error. */
+ if (!exists)
+ {
+ errno = ENOENT;
+ return -1;
+ }
+ }
+
+ /*
+ * If the O_TRUNC flag is set and the file exists, we want to call
+ * creat() anyway, since creat() guarantees that the file will be
+ * truncated and open()-for-writing doesn't.
+ * (If the file doesn't exist, we're calling creat() anyway and the
+ * file will be created with zero length.)
+ */
+ if ((flags & O_TRUNC) && exists)
+ call_creat = 1;
+ /* actually do the call */
+ if (call_creat)
+ {
+ /*
+ * call creat. May have to close and reopen the file if we
+ * want O_RDONLY or O_RDWR access -- creat() only gives
+ * O_WRONLY.
+ */
+ fd = creat (path, mode);
+ if (fd < 0 || (flags & O_WRONLY))
+ return fd;
+ if (close (fd) < 0)
+ return -1;
+ /* Fall out to reopen the file we've created */
+ }
+
+ /*
+ * calling old open, we strip most of the new flags just in case.
+ */
+ return open (path, flags & (O_RDONLY | O_WRONLY | O_RDWR | O_BINARY));
+}
+
+#endif /* EMUL_OPEN3 */
+
+#ifndef HAVE_MKNOD
+#ifdef __MSDOS__
+typedef int dev_t;
+#endif
+/* Fake mknod by complaining */
+int
+mknod (path, mode, dev)
+ char *path;
+ unsigned short mode;
+ dev_t dev;
+{
+ int fd;
+
+ errno = ENXIO; /* No such device or address */
+ return -1; /* Just give an error */
+}
+
+/* Fake links by copying */
+int
+link (path1, path2)
+ char *path1;
+ char *path2;
+{
+ char buf[256];
+ int ifd, ofd;
+ int nrbytes;
+ int nwbytes;
+
+ fprintf (stderr, "%s: %s: cannot link to %s, copying instead\n",
+ tar, path1, path2);
+ if ((ifd = open (path1, O_RDONLY | O_BINARY)) < 0)
+ return -1;
+ if ((ofd = creat (path2, 0666)) < 0)
+ return -1;
+ setmode (ofd, O_BINARY);
+ while ((nrbytes = read (ifd, buf, sizeof (buf))) > 0)
+ {
+ if ((nwbytes = write (ofd, buf, nrbytes)) != nrbytes)
+ {
+ nrbytes = -1;
+ break;
+ }
+ }
+ /* Note use of "|" rather than "||" below: we want to close
+ * the files even if an error occurs.
+ */
+ if ((nrbytes < 0) | (0 != close (ifd)) | (0 != close (ofd)))
+ {
+ unlink (path2);
+ return -1;
+ }
+ return 0;
+}
+
+/* everyone owns everything on MS-DOS (or is it no one owns anything?) */
+int
+chown (path, uid, gid)
+ char *path;
+ int uid;
+ int gid;
+{
+ return 0;
+}
+
+int
+geteuid ()
+{
+ return 0;
+}
+
+#endif /* !HAVE_MKNOD */
+
+#ifdef __TURBOC__
+#include <time.h>
+#include <fcntl.h>
+#include <io.h>
+
+struct utimbuf
+{
+ time_t actime; /* Access time. */
+ time_t modtime; /* Modification time. */
+};
+
+int
+utime (char *filename, struct utimbuf *utb)
+{
+ struct tm *tm;
+ struct ftime filetime;
+ time_t when;
+ int fd;
+ int status;
+
+ if (utb == 0)
+ when = time (0);
+ else
+ when = utb->modtime;
+
+ fd = _open (filename, O_RDWR);
+ if (fd == -1)
+ return -1;
+
+ tm = localtime (&when);
+ if (tm->tm_year < 80)
+ filetime.ft_year = 0;
+ else
+ filetime.ft_year = tm->tm_year - 80;
+ filetime.ft_month = tm->tm_mon + 1;
+ filetime.ft_day = tm->tm_mday;
+ if (tm->tm_hour < 0)
+ filetime.ft_hour = 0;
+ else
+ filetime.ft_hour = tm->tm_hour;
+ filetime.ft_min = tm->tm_min;
+ filetime.ft_tsec = tm->tm_sec / 2;
+
+ status = setftime (fd, &filetime);
+ _close (fd);
+ return status;
+}
+
+#endif /* __TURBOC__ */
+
+/* Stash argv[0] here so panic will know what the program is called */
+char *myname = 0;
+
+void
+panic (s)
+ char *s;
+{
+ if (myname)
+ fprintf (stderr, "%s:", myname);
+ fprintf (stderr, s);
+ putc ('\n', stderr);
+ exit (12);
+}
+
+
+PTR
+ck_malloc (size)
+ size_t size;
+{
+ PTR ret;
+
+ if (!size)
+ size++;
+ ret = malloc (size);
+ if (ret == 0)
+ panic ("Couldn't allocate memory");
+ return ret;
+}
+
+/* Used by alloca.c and bison.simple. */
+char *
+xmalloc (size)
+ size_t size;
+{
+ return (char *) ck_malloc (size);
+}
+
+PTR
+ck_realloc (ptr, size)
+ PTR ptr;
+ size_t size;
+{
+ PTR ret;
+
+ if (!ptr)
+ ret = ck_malloc (size);
+ else
+ ret = realloc (ptr, size);
+ if (ret == 0)
+ panic ("Couldn't re-allocate memory");
+ return ret;
+}
+
+/* Implement a variable sized buffer of 'stuff'. We don't know what it is,
+ nor do we care, as long as it doesn't mind being aligned on a char boundry.
+ */
+
+struct buffer
+ {
+ int allocated;
+ int length;
+ char *b;
+ };
+
+#define MIN_ALLOCATE 50
+
+char *
+init_buffer ()
+{
+ struct buffer *b;
+
+ b = (struct buffer *) ck_malloc (sizeof (struct buffer));
+ b->allocated = MIN_ALLOCATE;
+ b->b = (char *) ck_malloc (MIN_ALLOCATE);
+ b->length = 0;
+ return (char *) b;
+}
+
+void
+flush_buffer (bb)
+ char *bb;
+{
+ struct buffer *b;
+
+ b = (struct buffer *) bb;
+ free (b->b);
+ b->b = 0;
+ b->allocated = 0;
+ b->length = 0;
+ free ((void *) b);
+}
+
+void
+add_buffer (bb, p, n)
+ char *bb;
+ char *p;
+ int n;
+{
+ struct buffer *b;
+
+ b = (struct buffer *) bb;
+ if (b->length + n > b->allocated)
+ {
+ b->allocated = b->length + n + MIN_ALLOCATE;
+ b->b = (char *) ck_realloc (b->b, b->allocated);
+ }
+ bcopy (p, b->b + b->length, n);
+ b->length += n;
+}
+
+char *
+get_buffer (bb)
+ char *bb;
+{
+ struct buffer *b;
+
+ b = (struct buffer *) bb;
+ return b->b;
+}
+
+char *
+merge_sort (list, n, off, cmp)
+ char *list;
+ int (*cmp) ();
+ unsigned n;
+ int off;
+{
+ char *ret;
+
+ char *alist, *blist;
+ unsigned alength, blength;
+
+ char *tptr;
+ int tmp;
+ char **prev;
+#define NEXTOF(ptr) (* ((char **)(((char *)(ptr))+off) ) )
+ if (n == 1)
+ return list;
+ if (n == 2)
+ {
+ if ((*cmp) (list, NEXTOF (list)) > 0)
+ {
+ ret = NEXTOF (list);
+ NEXTOF (ret) = list;
+ NEXTOF (list) = 0;
+ return ret;
+ }
+ return list;
+ }
+ alist = list;
+ alength = (n + 1) / 2;
+ blength = n / 2;
+ for (tptr = list, tmp = (n - 1) / 2; tmp; tptr = NEXTOF (tptr), tmp--)
+ ;
+ blist = NEXTOF (tptr);
+ NEXTOF (tptr) = 0;
+
+ alist = merge_sort (alist, alength, off, cmp);
+ blist = merge_sort (blist, blength, off, cmp);
+ prev = &ret;
+ for (; alist && blist;)
+ {
+ if ((*cmp) (alist, blist) < 0)
+ {
+ tptr = NEXTOF (alist);
+ *prev = alist;
+ prev = &(NEXTOF (alist));
+ alist = tptr;
+ }
+ else
+ {
+ tptr = NEXTOF (blist);
+ *prev = blist;
+ prev = &(NEXTOF (blist));
+ blist = tptr;
+ }
+ }
+ if (alist)
+ *prev = alist;
+ else
+ *prev = blist;
+
+ return ret;
+}
+
+void
+ck_close (fd)
+ int fd;
+{
+ if (close (fd) < 0)
+ {
+ msg_perror ("can't close a file #%d", fd);
+ exit (EX_SYSTEM);
+ }
+}
+
+#include <ctype.h>
+
+/* Quote_copy_string is like quote_string, but instead of modifying the
+ string in place, it malloc-s a copy of the string, and returns that.
+ If the string does not have to be quoted, it returns the NULL string.
+ The allocated copy can, of course, be freed with free() after the
+ caller is done with it.
+ */
+char *
+quote_copy_string (string)
+ char *string;
+{
+ char *from_here;
+ char *to_there = 0;
+ char *copy_buf = 0;
+ int c;
+ int copying = 0;
+
+ from_here = string;
+ while (*from_here)
+ {
+ c = *from_here++;
+ if (c == '\\')
+ {
+ if (!copying)
+ {
+ int n;
+
+ n = (from_here - string) - 1;
+ copying++;
+ copy_buf = (char *) malloc (n + 5 + strlen (from_here) * 4);
+ if (!copy_buf)
+ return 0;
+ bcopy (string, copy_buf, n);
+ to_there = copy_buf + n;
+ }
+ *to_there++ = '\\';
+ *to_there++ = '\\';
+ }
+ else if (isprint (c))
+ {
+ if (copying)
+ *to_there++ = c;
+ }
+ else
+ {
+ if (!copying)
+ {
+ int n;
+
+ n = (from_here - string) - 1;
+ copying++;
+ copy_buf = (char *) malloc (n + 5 + strlen (from_here) * 4);
+ if (!copy_buf)
+ return 0;
+ bcopy (string, copy_buf, n);
+ to_there = copy_buf + n;
+ }
+ *to_there++ = '\\';
+ if (c == '\n')
+ *to_there++ = 'n';
+ else if (c == '\t')
+ *to_there++ = 't';
+ else if (c == '\f')
+ *to_there++ = 'f';
+ else if (c == '\b')
+ *to_there++ = 'b';
+ else if (c == '\r')
+ *to_there++ = 'r';
+ else if (c == '\177')
+ *to_there++ = '?';
+ else
+ {
+ to_there[0] = (c >> 6) + '0';
+ to_there[1] = ((c >> 3) & 07) + '0';
+ to_there[2] = (c & 07) + '0';
+ to_there += 3;
+ }
+ }
+ }
+ if (copying)
+ {
+ *to_there = '\0';
+ return copy_buf;
+ }
+ return (char *) 0;
+}
+
+
+/* Un_quote_string takes a quoted c-string (like those produced by
+ quote_string or quote_copy_string and turns it back into the
+ un-quoted original. This is done in place.
+ */
+
+/* There is no un-quote-copy-string. Write it yourself */
+
+char *
+un_quote_string (string)
+ char *string;
+{
+ char *ret;
+ char *from_here;
+ char *to_there;
+ int tmp;
+
+ ret = string;
+ to_there = string;
+ from_here = string;
+ while (*from_here)
+ {
+ if (*from_here != '\\')
+ {
+ if (from_here != to_there)
+ *to_there++ = *from_here++;
+ else
+ from_here++, to_there++;
+ continue;
+ }
+ switch (*++from_here)
+ {
+ case '\\':
+ *to_there++ = *from_here++;
+ break;
+ case 'n':
+ *to_there++ = '\n';
+ from_here++;
+ break;
+ case 't':
+ *to_there++ = '\t';
+ from_here++;
+ break;
+ case 'f':
+ *to_there++ = '\f';
+ from_here++;
+ break;
+ case 'b':
+ *to_there++ = '\b';
+ from_here++;
+ break;
+ case 'r':
+ *to_there++ = '\r';
+ from_here++;
+ break;
+ case '?':
+ *to_there++ = 0177;
+ from_here++;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ tmp = *from_here - '0';
+ from_here++;
+ if (*from_here < '0' || *from_here > '7')
+ {
+ *to_there++ = tmp;
+ break;
+ }
+ tmp = tmp * 8 + *from_here - '0';
+ from_here++;
+ if (*from_here < '0' || *from_here > '7')
+ {
+ *to_there++ = tmp;
+ break;
+ }
+ tmp = tmp * 8 + *from_here - '0';
+ from_here++;
+ *to_there = tmp;
+ break;
+ default:
+ ret = 0;
+ *to_there++ = '\\';
+ *to_there++ = *from_here++;
+ break;
+ }
+ }
+ if (*to_there)
+ *to_there++ = '\0';
+ return ret;
+}
+
+#ifndef __MSDOS__
+void
+ck_pipe (pipes)
+ int *pipes;
+{
+ if (pipe (pipes) < 0)
+ {
+ msg_perror ("can't open a pipe");
+ exit (EX_SYSTEM);
+ }
+}
+#endif /* !__MSDOS__ */
+
+#ifndef HAVE_STRSTR
+/*
+ * strstr - find first occurrence of wanted in s
+ */
+
+char * /* found string, or NULL if none */
+strstr (s, wanted)
+ char *s;
+ char *wanted;
+{
+ register char *scan;
+ register size_t len;
+ register char firstc;
+
+ if (*wanted == '\0')
+ return (char *) 0;
+ /*
+ * The odd placement of the two tests is so "" is findable.
+ * Also, we inline the first char for speed.
+ * The ++ on scan has been moved down for optimization.
+ */
+ firstc = *wanted;
+ len = strlen (wanted);
+ for (scan = s; *scan != firstc || strncmp (scan, wanted, len) != 0;)
+ if (*scan++ == '\0')
+ return (char *) 0;
+ return scan;
+}
+
+#endif /* !HAVE_STRSTR */
+
+#ifndef HAVE_FTRUNCATE
+
+#ifdef F_CHSIZE
+int
+ftruncate (fd, length)
+ int fd;
+ off_t length;
+{
+ return fcntl (fd, F_CHSIZE, length);
+}
+
+#else /* !F_CHSIZE */
+#ifdef F_FREESP
+/* code courtesy of William Kucharski, kucharsk@Solbourne.com */
+
+int
+ftruncate (fd, length)
+ int fd; /* file descriptor */
+ off_t length; /* length to set file to */
+{
+ struct flock fl;
+
+ fl.l_whence = 0;
+ fl.l_len = 0;
+ fl.l_start = length;
+ fl.l_type = F_WRLCK; /* write lock on file space */
+
+ /*
+ * This relies on the UNDOCUMENTED F_FREESP argument to
+ * fcntl(2), which truncates the file so that it ends at the
+ * position indicated by fl.l_start.
+ *
+ * Will minor miracles never cease?
+ */
+
+ if (fcntl (fd, F_FREESP, &fl) < 0)
+ return -1;
+
+ return 0;
+}
+
+#else /* !F_FREESP */
+
+int
+ftruncate (fd, length)
+ int fd;
+ off_t length;
+{
+ errno = EIO;
+ return -1;
+}
+
+#endif /* !F_FREESP */
+#endif /* !F_CHSIZE */
+#endif /* !HAVE_FTRUNCATE */
+
+
+extern FILE *msg_file;
+
+#if defined (HAVE_VPRINTF) && __STDC__
+#include <stdarg.h>
+
+void
+msg (char *str,...)
+{
+ va_list args;
+
+ va_start (args, str);
+ fflush (msg_file);
+ fprintf (stderr, "%s: ", tar);
+ if (f_sayblock)
+ fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block));
+ vfprintf (stderr, str, args);
+ va_end (args);
+ putc ('\n', stderr);
+ fflush (stderr);
+}
+
+void
+msg_perror (char *str,...)
+{
+ va_list args;
+ int save_e;
+
+ save_e = errno;
+ fflush (msg_file);
+ fprintf (stderr, "%s: ", tar);
+ if (f_sayblock)
+ fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block));
+ va_start (args, str);
+ vfprintf (stderr, str, args);
+ va_end (args);
+ errno = save_e;
+ perror (" ");
+ fflush (stderr);
+}
+
+#endif /* HAVE_VPRINTF and __STDC__ */
+
+#if defined(HAVE_VPRINTF) && !__STDC__
+#include <varargs.h>
+void
+msg (str, va_alist)
+ char *str;
+ va_dcl
+{
+ va_list args;
+
+ fflush (msg_file);
+ fprintf (stderr, "%s: ", tar);
+ if (f_sayblock)
+ fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block));
+ va_start (args);
+ vfprintf (stderr, str, args);
+ va_end (args);
+ putc ('\n', stderr);
+ fflush (stderr);
+}
+
+void
+msg_perror (str, va_alist)
+ char *str;
+ va_dcl
+{
+ va_list args;
+ int save_e;
+
+ save_e = errno;
+ fflush (msg_file);
+ fprintf (stderr, "%s: ", tar);
+ if (f_sayblock)
+ fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block));
+ va_start (args);
+ vfprintf (stderr, str, args);
+ va_end (args);
+ errno = save_e;
+ perror (" ");
+ fflush (stderr);
+}
+
+#endif /* HAVE_VPRINTF and not __STDC__ */
+
+#if !defined(HAVE_VPRINTF) && defined(HAVE_DOPRNT)
+void
+msg (str, args)
+ char *str;
+ int args;
+{
+ fflush (msg_file);
+ fprintf (stderr, "%s: ", tar);
+ if (f_sayblock)
+ fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block));
+ _doprnt (str, &args, stderr);
+ putc ('\n', stderr);
+ fflush (stderr);
+}
+
+void
+msg_perror (str, args)
+ char *str;
+ int args;
+{
+ int save_e;
+
+ save_e = errno;
+ fflush (msg_file);
+ fprintf (stderr, "%s: ", tar);
+ if (f_sayblock)
+ fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block));
+ _doprnt (str, &args, stderr);
+ errno = save_e;
+ perror (" ");
+ fflush (stderr);
+}
+
+#endif /* !HAVE_VPRINTF and HAVE_DOPRNT */
+
+#if !defined(HAVE_VPRINTF) && !defined(HAVE_DOPRNT)
+void
+msg (str, a1, a2, a3, a4, a5, a6)
+ char *str;
+{
+ fflush (msg_file);
+ fprintf (stderr, "%s: ", tar);
+ if (f_sayblock)
+ fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block));
+ fprintf (stderr, str, a1, a2, a3, a4, a5, a6);
+ putc ('\n', stderr);
+ fflush (stderr);
+}
+
+void
+msg_perror (str, a1, a2, a3, a4, a5, a6)
+ char *str;
+{
+ int save_e;
+
+ save_e = errno;
+ fflush (msg_file);
+ fprintf (stderr, "%s: ", tar);
+ if (f_sayblock)
+ fprintf (stderr, "rec %d: ", baserec + (ar_record - ar_block));
+ fprintf (stderr, str, a1, a2, a3, a4, a5, a6);
+ fprintf (stderr, ": ");
+ errno = save_e;
+ perror (" ");
+}
+
+#endif /* !HAVE_VPRINTF and !HAVE_DOPRNT */
diff --git a/gnu/usr.bin/tar/port.h b/gnu/usr.bin/tar/port.h
new file mode 100644
index 000000000000..4e65a9ace886
--- /dev/null
+++ b/gnu/usr.bin/tar/port.h
@@ -0,0 +1,215 @@
+/* Portability declarations. Requires sys/types.h.
+ Copyright (C) 1988, 1992 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not HAVE_ALLOCA_H */
+#ifdef _AIX
+ #pragma alloca
+#else /* not _AIX */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#include "pathmax.h"
+
+#ifdef _POSIX_VERSION
+#include <sys/wait.h>
+#else /* !_POSIX_VERSION */
+#define WIFSTOPPED(w) (((w) & 0xff) == 0x7f)
+#define WIFSIGNALED(w) (((w) & 0xff) != 0x7f && ((w) & 0xff) != 0)
+#define WIFEXITED(w) (((w) & 0xff) == 0)
+
+#define WSTOPSIG(w) (((w) >> 8) & 0xff)
+#define WTERMSIG(w) ((w) & 0x7f)
+#define WEXITSTATUS(w) (((w) >> 8) & 0xff)
+#endif /* _POSIX_VERSION */
+
+/* nonstandard */
+#ifndef WIFCOREDUMPED
+#define WIFCOREDUMPED(w) (((w) & 0x80) != 0)
+#endif
+
+#ifdef __MSDOS__
+/* missing things from sys/stat.h */
+#define S_ISUID 0
+#define S_ISGID 0
+#define S_ISVTX 0
+
+/* device stuff */
+#define makedev(ma, mi) ((ma << 8) | mi)
+#define major(dev) (dev)
+#define minor(dev) (dev)
+typedef long off_t;
+#endif /* __MSDOS__ */
+
+#if defined(__STDC__) || defined(__TURBOC__)
+#define PTR void *
+#else
+#define PTR char *
+#define const
+#endif
+
+/* Since major is a function on SVR4, we can't just use `ifndef major'. */
+#ifdef major /* Might be defined in sys/types.h. */
+#define HAVE_MAJOR
+#endif
+
+#if !defined(HAVE_MAJOR) && defined(MAJOR_IN_MKDEV)
+#include <sys/mkdev.h>
+#define HAVE_MAJOR
+#endif
+
+#if !defined(HAVE_MAJOR) && defined(MAJOR_IN_SYSMACROS)
+#include <sys/sysmacros.h>
+#define HAVE_MAJOR
+#endif
+
+#ifndef HAVE_MAJOR
+#define major(dev) (((dev) >> 8) & 0xff)
+#define minor(dev) ((dev) & 0xff)
+#define makedev(maj, min) (((maj) << 8) | (min))
+#endif
+#undef HAVE_MAJOR
+
+#if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
+#include <string.h>
+#if !defined(__MSDOS__) && !defined(STDC_HEADERS)
+#include <memory.h>
+#endif
+#ifdef index
+#undef index
+#endif
+#ifdef rindex
+#undef rindex
+#endif
+#define index strchr
+#define rindex strrchr
+#define bcopy(s, d, n) memcpy(d, s, n)
+#define bzero(s, n) memset(s, 0, n)
+#define bcmp memcmp
+#else
+#include <strings.h>
+#endif
+
+#if defined(STDC_HEADERS)
+#include <stdlib.h>
+#else
+char *malloc (), *realloc ();
+char *getenv ();
+#endif
+
+#ifndef _POSIX_VERSION
+#ifdef __MSDOS__
+#include <io.h>
+#else /* !__MSDOS__ */
+off_t lseek ();
+#endif /* !__MSDOS__ */
+char *getcwd ();
+#endif /* !_POSIX_VERSION */
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+#ifndef O_CREAT
+#define O_CREAT 0
+#endif
+#ifndef O_NDELAY
+#define O_NDELAY 0
+#endif
+#ifndef O_RDONLY
+#define O_RDONLY 0
+#endif
+#ifndef O_RDWR
+#define O_RDWR 2
+#endif
+
+#include <sys/stat.h>
+#ifndef S_ISREG /* Doesn't have POSIX.1 stat stuff. */
+#define mode_t unsigned short
+#endif
+#if !defined(S_ISBLK) && defined(S_IFBLK)
+#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
+#endif
+#if !defined(S_ISCHR) && defined(S_IFCHR)
+#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
+#endif
+#if !defined(S_ISDIR) && defined(S_IFDIR)
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#endif
+#if !defined(S_ISREG) && defined(S_IFREG)
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#endif
+#if !defined(S_ISFIFO) && defined(S_IFIFO)
+#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
+#define mkfifo(path, mode) (mknod ((path), (mode) | S_IFIFO, 0))
+#endif
+#if !defined(S_ISLNK) && defined(S_IFLNK)
+#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
+#endif
+#if !defined(S_ISSOCK) && defined(S_IFSOCK)
+#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
+#endif
+#if !defined(S_ISMPB) && defined(S_IFMPB) /* V7 */
+#define S_ISMPB(m) (((m) & S_IFMT) == S_IFMPB)
+#define S_ISMPC(m) (((m) & S_IFMT) == S_IFMPC)
+#endif
+#if !defined(S_ISNWK) && defined(S_IFNWK) /* HP/UX */
+#define S_ISNWK(m) (((m) & S_IFMT) == S_IFNWK)
+#endif
+#if !defined(S_ISCTG) && defined(S_IFCTG) /* contiguous file */
+#define S_ISCTG(m) (((m) & S_IFMT) == S_IFCTG)
+#endif
+#if !defined(S_ISVTX)
+#define S_ISVTX 0001000
+#endif
+
+#ifdef __MSDOS__
+#include "msd_dir.h"
+#define NLENGTH(direct) ((direct)->d_namlen)
+
+#else /* not __MSDOS__ */
+
+#if defined(DIRENT) || defined(_POSIX_VERSION)
+#include <dirent.h>
+#define NLENGTH(direct) (strlen((direct)->d_name))
+#else /* not (DIRENT or _POSIX_VERSION) */
+#define dirent direct
+#define NLENGTH(direct) ((direct)->d_namlen)
+#ifdef SYSNDIR
+#include <sys/ndir.h>
+#endif /* SYSNDIR */
+#ifdef SYSDIR
+#include <sys/dir.h>
+#endif /* SYSDIR */
+#ifdef NDIR
+#include <ndir.h>
+#endif /* NDIR */
+#endif /* DIRENT or _POSIX_VERSION */
+
+#endif /* not __MSDOS__ */
diff --git a/gnu/usr.bin/tar/regex.c b/gnu/usr.bin/tar/regex.c
new file mode 100644
index 000000000000..cb94d597c6f7
--- /dev/null
+++ b/gnu/usr.bin/tar/regex.c
@@ -0,0 +1,4932 @@
+/* Extended regular expression matching and search library,
+ version 0.11.
+ (Implements POSIX draft P10003.2/D11.2, except for
+ internationalization features.)
+
+ Copyright (C) 1993 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+ #pragma alloca
+#endif
+
+#define _GNU_SOURCE
+
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+#ifdef emacs
+
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+
+/* Emacs uses `NULL' as a predicate. */
+#undef NULL
+
+#else /* not emacs */
+
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+ `BSTRING', as far as I know, and neither of them use this code. */
+#if HAVE_STRING_H || STDC_HEADERS
+#include <string.h>
+#ifndef bcmp
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
+#endif
+#ifndef bzero
+#define bzero(s, n) memset ((s), 0, (n))
+#endif
+#else
+#include <strings.h>
+#endif
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+#ifndef Sword
+#define Sword 1
+#endif
+
+#ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set. */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits. */
+#include "regex.h"
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+
+#ifndef isascii
+#define isascii(c) 1
+#endif
+
+#ifdef isblank
+#define ISBLANK(c) (isascii (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#define ISALNUM(c) (isascii (c) && isalnum (c))
+#define ISALPHA(c) (isascii (c) && isalpha (c))
+#define ISCNTRL(c) (isascii (c) && iscntrl (c))
+#define ISLOWER(c) (isascii (c) && islower (c))
+#define ISPUNCT(c) (isascii (c) && ispunct (c))
+#define ISSPACE(c) (isascii (c) && isspace (c))
+#define ISUPPER(c) (isascii (c) && isupper (c))
+#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+#ifndef alloca
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top. */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+#define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ bcopy (source, destination, osize), \
+ destination)
+
+#endif /* not REGEX_MALLOC */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression.
+
+ The value of `exactn' is needed in search.c (search_buffer) in Emacs.
+ So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+ `exactn' we use here must also be 1. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn = 1,
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+#ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+
+#define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += 2; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+
+#define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number (dest, source)
+ int *dest;
+ unsigned char *source;
+{
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+}
+
+#ifndef EXTRACT_MACROS /* To debug the macros. */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += 2; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number_and_incr (destination, source)
+ int *destination;
+ unsigned char **source;
+{
+ extract_number (destination, *source);
+ *source += 2;
+}
+
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+extern void printchar ();
+
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ printchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ printchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+print_partial_compiled_pattern (start, end)
+ unsigned char *start;
+ unsigned char *end;
+{
+ int mcnt, mcnt2;
+ unsigned char *p = start;
+ unsigned char *pend = end;
+
+ if (start == NULL)
+ {
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+ switch ((re_opcode_t) *p++)
+ {
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
+ {
+ putchar ('/');
+ printchar (*p++);
+ }
+ while (--mcnt);
+ break;
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%d", *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ register int c;
+
+ printf ("/charset%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < *p; c++)
+ {
+ unsigned bit;
+ unsigned char map_byte = p[1 + c];
+
+ putchar ('/');
+
+ for (bit = 0; bit < BYTEWIDTH; bit++)
+ if (map_byte & (1 << bit))
+ printchar (c * BYTEWIDTH + bit);
+ }
+ p += 1 + *p;
+ break;
+ }
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_jump/0/%d", mcnt);
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_keep_string_jump/0/%d", mcnt);
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/dummy_failure_jump/0/%d", mcnt);
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/maybe_pop_jump/0/%d", mcnt);
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/pop_failure_jump/0/%d", mcnt);
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump_past_alt/0/%d", mcnt);
+ break;
+
+ case jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump/0/%d", mcnt);
+ break;
+
+ case succeed_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case jump_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+
+#ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+#endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%d", *(p-1));
+ }
+ }
+ printf ("/\n");
+}
+
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+ printf ("re_nsub: %d\t", bufp->re_nsub);
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %d\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+ const char *where;
+ const char *string1;
+ const char *string2;
+ int size1;
+ int size2;
+{
+ unsigned this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
+ {
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ printchar (string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ printchar (string2[this_char]);
+ }
+}
+
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there. */
+
+static const char *re_error_msg[] =
+ { NULL, /* REG_NOERROR */
+ "No match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "Invalid collation character", /* REG_ECOLLATE */
+ "Invalid character class name", /* REG_ECTYPE */
+ "Trailing backslash", /* REG_EESCAPE */
+ "Invalid back reference", /* REG_ESUBREG */
+ "Unmatched [ or [^", /* REG_EBRACK */
+ "Unmatched ( or \\(", /* REG_EPAREN */
+ "Unmatched \\{", /* REG_EBRACE */
+ "Invalid content of \\{\\}", /* REG_BADBR */
+ "Invalid range end", /* REG_ERANGE */
+ "Memory exhausted", /* REG_ESPACE */
+ "Invalid preceding regular expression", /* REG_BADRPT */
+ "Premature end of regular expression", /* REG_EEND */
+ "Regular expression too big", /* REG_ESIZE */
+ "Unmatched ) or \\)", /* REG_ERPAREN */
+ };
+
+/* Subroutine declarations and macros for regex_compile. */
+
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+#define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = translate[c]; \
+ } while (0)
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 32
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ while (b - bufp->buffer + (n) > bufp->allocated) \
+ EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (unsigned char) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ *b++ = (unsigned char) (c3); \
+ } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#define EXTEND_BUFFER() \
+ do { \
+ unsigned char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+ if (bufp->buffer == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != bufp->buffer) \
+ { \
+ b = (b - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ } \
+ } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (ISDIGIT (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+#define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+ const char *pattern;
+ int size;
+ reg_syntax_t syntax;
+ struct re_pattern_buffer *bufp;
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random tempory spot in PATTERN. */
+ const char *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register unsigned char *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ const char *p = pattern;
+ const char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ char *translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ unsigned char *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ unsigned char *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ unsigned char *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ unsigned char *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
+ {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ printchar (pattern[debug_count]);
+ putchar ('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+ }
+ if (!bufp->buffer) return REG_ESPACE;
+
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+
+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (3);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+ /* We've added more stuff to the buffer. */
+ b += 3;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 3);
+ pending_exact = 0;
+ b += 3;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+
+ if (p == pend) return REG_EBRACK;
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) return REG_EBRACK;
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ return REG_ERANGE;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) return REG_EBRACK;
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and:`]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ int ch;
+ boolean is_alnum = STREQ (str, "alnum");
+ boolean is_alpha = STREQ (str, "alpha");
+ boolean is_blank = STREQ (str, "blank");
+ boolean is_cntrl = STREQ (str, "cntrl");
+ boolean is_digit = STREQ (str, "digit");
+ boolean is_graph = STREQ (str, "graph");
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_print = STREQ (str, "print");
+ boolean is_punct = STREQ (str, "punct");
+ boolean is_space = STREQ (str, "space");
+ boolean is_upper = STREQ (str, "upper");
+ boolean is_xdigit = STREQ (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) return REG_EBRACK;
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+ {
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch))
+ || (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch))
+ || (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
+ SET_LIST_BIT (ch);
+ }
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_LIST_BIT (c);
+ }
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) return REG_EESCAPE;
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ bufp->re_nsub++;
+ regnum++;
+
+ if (COMPILE_STACK_FULL)
+ {
+ RETALLOC (compile_stack.stack, compile_stack.size << 1,
+ compile_stack_elt_t);
+ if (compile_stack.stack == NULL) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ /* These are the values to restore when we hit end of this
+ group. They are all relative offsets, so that if the
+ whole pattern moves because of realloc, they will still
+ be valid. */
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+ COMPILE_STACK_TOP.fixup_alt_jump
+ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ /* We will eventually replace the 0 with the number of
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM)
+ {
+ COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+ BUF_PUSH_3 (start_memory, regnum, 0);
+ }
+
+ compile_stack.avail++;
+
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ return REG_ERPAREN;
+
+ handle_close:
+ if (fixup_alt_jump)
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH (push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ }
+
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ return REG_ERPAREN;
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+ assert (compile_stack.avail != 0);
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+
+ compile_stack.avail--;
+ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+ fixup_alt_jump
+ = COMPILE_STACK_TOP.fixup_alt_jump
+ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+ : 0;
+ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+
+ /* We're at the end of the group, so now we know how many
+ groups were inside this one. */
+ if (this_group_regnum <= MAX_REGNUM)
+ {
+ unsigned char *inner_group_loc
+ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+
+ *inner_group_loc = regnum - this_group_regnum;
+ BUF_PUSH_3 (stop_memory, this_group_regnum,
+ regnum - this_group_regnum);
+ }
+ }
+ break;
+
+
+ case '|': /* `\|'. */
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (on_failure_jump, begalt, b + 6);
+ pending_exact = 0;
+ b += 3;
+
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ /* Mark and leave space for a jump after this alternative,
+ to be filled in later either by next alternative or
+ when know we're at the end of a series of alternatives. */
+ fixup_alt_jump = b;
+ GET_BUFFER_SPACE (3);
+ b += 3;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ || (p - 2 == pattern && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_EBRACE;
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') return REG_EBRACE;
+
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ laststart = b;
+ else
+ goto unfetch_interval;
+ }
+
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (jump, laststart, b + 3);
+ b += 3;
+ }
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+ GET_BUFFER_SPACE (nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 5 + (upper_bound > 1) * 5,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+ b += 5;
+
+ if (upper_bound > 1)
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart + 5,
+ upper_bound - 1);
+ b += 5;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 5;
+ }
+ }
+ pending_exact = 0;
+ beg_interval = NULL;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ assert (beg_interval);
+ p = beg_interval;
+ beg_interval = NULL;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ BUF_PUSH (at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ laststart = b;
+ BUF_PUSH (wordchar);
+ break;
+
+
+ case 'W':
+ laststart = b;
+ BUF_PUSH (notwordchar);
+ break;
+
+
+ case '<':
+ BUF_PUSH (wordbeg);
+ break;
+
+ case '>':
+ BUF_PUSH (wordend);
+ break;
+
+ case 'b':
+ BUF_PUSH (wordbound);
+ break;
+
+ case 'B':
+ BUF_PUSH (notwordbound);
+ break;
+
+ case '`':
+ BUF_PUSH (begbuf);
+ break;
+
+ case '\'':
+ BUF_PUSH (endbuf);
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ return REG_ESUBREG;
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, c1))
+ goto normal_char;
+
+ laststart = b;
+ BUF_PUSH_2 (duplicate, c1);
+ break;
+
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ /* If no exactn currently being built. */
+ if (!pending_exact
+
+ /* If last exactn not at current position. */
+ || pending_exact + *pending_exact + 1 != b
+
+ /* We have only one byte following the exactn for the count. */
+ || *pending_exact == (1 << BYTEWIDTH) - 1
+
+ /* If followed by a repetition operator. */
+ || *p == '*' || *p == '^'
+ || ((syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : (*p == '+' || *p == '?'))
+ || ((syntax & RE_INTERVALS)
+ && ((syntax & RE_NO_BK_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
+ {
+ /* Start building a new exactn. */
+
+ laststart = b;
+
+ BUF_PUSH_2 (exactn, 0);
+ pending_exact = b - 1;
+ }
+
+ BUF_PUSH (c);
+ (*pending_exact)++;
+ break;
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ /* Through the pattern now. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ if (!COMPILE_STACK_EMPTY)
+ return REG_EPAREN;
+
+ free (compile_stack.stack);
+
+ /* We have succeeded; set the length of the buffer. */
+ bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+ if (debug)
+ {
+ DEBUG_PRINT1 ("\nCompiled pattern: ");
+ print_compiled_pattern (bufp);
+ }
+#endif /* DEBUG */
+
+ return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'. */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
+
+static void
+store_op1 (op, loc, arg)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+store_op2 (op, loc, arg1, arg2)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg1);
+ STORE_NUMBER (loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
+
+static void
+insert_op1 (op, loc, arg, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 3;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 5;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
+
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ const char *pattern, *p;
+ reg_syntax_t syntax;
+{
+ const char *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
+
+static boolean
+at_endline_loc_p (p, pend, syntax)
+ const char *p, *pend;
+ int syntax;
+{
+ const char *next = p;
+ boolean next_backslash = *next == '\\';
+ const char *next_next = p + 1 < pend ? p + 1 : NULL;
+
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+ compile_stack_type compile_stack;
+ regnum_t regnum;
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t
+compile_range (p_ptr, pend, translate, syntax, b)
+ const char **p_ptr, *pend;
+ char *translate;
+ reg_syntax_t syntax;
+ unsigned char *b;
+{
+ unsigned this_char;
+
+ const char *p = *p_ptr;
+ int range_start, range_end;
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ /* Even though the pattern is a signed `char *', we need to fetch
+ with unsigned char *'s; if the high bit of the pattern character
+ is set, the range endpoints will be negative if we fetch using a
+ signed char *.
+
+ We also want to fetch the endpoints without translating them; the
+ appropriate translation is done in the bit-setting loop below. */
+ range_start = ((unsigned char *) p)[-2];
+ range_end = ((unsigned char *) p)[0];
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* If the start is after the end, the range is empty. */
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- the range is inclusive, so if `range_end' == 0xff
+ (assuming 8-bit characters), we would otherwise go into an infinite
+ loop, since all characters <= 0xff. */
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ }
+
+ return REG_NOERROR;
+}
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_SPACE each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+int re_max_failures = 2000;
+
+typedef const unsigned char *fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
+
+
+/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
+
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push PATTERN_OP on FAIL_STACK.
+
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (fail_stack)) \
+ ? 0 \
+ : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
+ 1))
+
+/* This pushes an item onto the failure stack. Must be a four-byte
+ value. Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ITEM(item) \
+ fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
+
+/* The complement operation. Assumes `fail_stack' is nonempty. */
+#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_ITEM
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
+ declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ int this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ PUSH_FAILURE_ITEM (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ PUSH_FAILURE_ITEM (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
+ PUSH_FAILURE_ITEM (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
+ PUSH_FAILURE_ITEM (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_ITEM (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_ITEM (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
+ int this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_ITEM (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ \
+ low_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ } \
+ \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
+ characters can start a string that matches the pattern. This fastmap
+ is used by re_search to skip quickly over impossible starting points.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as BUFP->fastmap.
+
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
+
+ Returns 0 if we succeed, -2 if an internal error. */
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ int j, k;
+ fail_stack_type fail_stack;
+#ifndef REGEX_MALLOC
+ char *destination;
+#endif
+ /* We don't push any register information onto the failure stack. */
+ unsigned num_regs = 0;
+
+ register char *fastmap = bufp->fastmap;
+ unsigned char *pattern = bufp->buffer;
+ unsigned long size = bufp->used;
+ const unsigned char *p = pattern;
+ register unsigned char *pend = pattern + size;
+
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
+
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert (fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK ();
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ bufp->can_be_null = 0;
+
+ while (p != pend || !FAIL_STACK_EMPTY ())
+ {
+ if (p == pend)
+ {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail];
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert (p < pend);
+
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((re_opcode_t) *p++))
+#else
+ switch ((re_opcode_t) *p++)
+#endif
+ {
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ return 0;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
+ case exactn:
+ fastmap[p[1]] = 1;
+ break;
+
+
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ fastmap[j] = 1;
+ break;
+
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed. */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ fastmap[j] = 1;
+ break;
+
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case anychar:
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = 0;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ return 0;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* not emacs */
+
+
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case push_dummy_failure:
+ continue;
+
+
+ case jump_n:
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case jump_past_alt:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (j > 0)
+ continue;
+
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if ((re_opcode_t) *p != on_failure_jump
+ && (re_opcode_t) *p != succeed_n)
+ continue;
+
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+
+ /* If what's on the stack is where we are now, pop it. */
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1] == p)
+ fail_stack.avail--;
+
+ continue;
+
+
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+ end of the pattern. We don't want to push such a point,
+ since when we restore it above, entering the switch will
+ increment `p' past the end of the pattern. We don't need
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
+ if (p + j < pend)
+ {
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))
+ return -2;
+ }
+ else
+ bufp->can_be_null = 1;
+
+ if (succeed_n_p)
+ {
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
+
+ continue;
+
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p += 2;
+
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 4;
+ succeed_n_p = true; /* Spaghetti code alert. */
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+
+ default:
+ abort (); /* We have listed all the cases. */
+ } /* switch *p++ */
+
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
+ p = pend;
+ } /* while p */
+
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
+ return 0;
+} /* re_compile_fastmap */
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t) 0;
+ }
+}
+
+/* Searching routines. */
+
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+
+int
+re_search (bufp, string, size, startpos, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, startpos, range;
+ struct re_registers *regs;
+{
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+ regs, size);
+}
+
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on.
+
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
+ only at STARTPOS; in general, the last start tried is STARTPOS +
+ RANGE.
+
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire BUFP->buffer and its contained
+ subexpressions.
+
+ Do not consider matching one past the index STOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ We return either the position in the strings at which the match was
+ found, -1 if no match, or -2 if error (such as failure
+ stack overflow). */
+
+int
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+{
+ int val;
+ register char *fastmap = bufp->fastmap;
+ register char *translate = bufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+
+ /* Check for out-of-range STARTPOS. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up RANGE if it might eventually take us outside
+ the virtual concatenation of STRING1 and STRING2. */
+ if (endpos < -1)
+ range = -1 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ search for a pattern that must be anchored. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+ {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap (bufp) == -2)
+ return -2;
+
+ /* Loop through the string, looking for a place to start matching. */
+ for (;;)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot be the start of a match. If the pattern can match the
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
+ if (fastmap && startpos < total_size && !bufp->can_be_null)
+ {
+ if (range > 0) /* Searching forwards. */
+ {
+ register const char *d;
+ register int lim = 0;
+ int irange = range;
+
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+
+ /* Written out as an if-else to avoid testing `translate'
+ inside the loop. */
+ if (translate)
+ while (range > lim
+ && !fastmap[(unsigned char)
+ translate[(unsigned char) *d++]])
+ range--;
+ else
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
+
+ startpos += irange - range;
+ }
+ else /* Searching backwards. */
+ {
+ register char c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[(unsigned char) TRANSLATE (c)])
+ goto advance;
+ }
+ }
+
+ /* If can't match the null string, and that's all we have left, fail. */
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null)
+ return -1;
+
+ val = re_match_2 (bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+ if (val >= 0)
+ return startpos;
+
+ if (val == -2)
+ return -2;
+
+ advance:
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+} /* re_search_2 */
+
+/* Declarations and macros for re_match_2. */
+
+static int bcmp_translate ();
+static boolean alt_match_null_string_p (),
+ common_op_match_null_string_p (),
+ group_match_null_string_p ();
+
+/* Structure for per-register (a.k.a. per-group) information.
+ This must not be longer than one word, because we push this value
+ onto the failure stack. Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+typedef union
+{
+ fail_stack_elt_t word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ unsigned r; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ while (0)
+
+
+/* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+#define REG_UNSET_VALUE ((char *) -1)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+
+/* Macros for dealing with the split strings in re_match_2. */
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+#define PREFETCH() \
+ while (d == dend) \
+ { \
+ /* End of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* End of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+
+
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1. */
+#define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
+ == Sword)
+
+/* Test if the character before D and the one at D differ with respect
+ to being word-constituent. */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+
+
+/* Free everything we malloc. */
+#ifdef REGEX_MALLOC
+#define FREE_VAR(var) if (var) free (var); var = NULL
+#define FREE_VARIABLES() \
+ do { \
+ FREE_VAR (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
+ } while (0)
+#else /* not REGEX_MALLOC */
+/* Some MIPS systems (at least) want this to free alloca'd storage. */
+#define FREE_VARIABLES() alloca (0)
+#endif /* not REGEX_MALLOC */
+
+
+/* These values must meet several constraints. They must not be valid
+ register values; since we have a limit of 255 registers (because
+ we use only one byte in the pattern for the register number), we can
+ use numbers larger than 255. They must differ by 1, because of
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
+ be larger than the value for the highest register, so we do not try
+ to actually save any registers when none are active. */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+
+/* Matching routines. */
+
+#ifndef emacs /* Emacs never uses this. */
+/* re_match is like re_match_2 except it takes only a single string. */
+
+int
+re_match (bufp, string, size, pos, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, pos;
+ struct re_registers *regs;
+ {
+ return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
+}
+#endif /* not emacs */
+
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+ and SIZE2, respectively). We start matching at POS, and stop
+ matching at STOP.
+
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
+
+ We return -1 if no match, -2 if an internal error (such as the
+ failure stack overflowing). Otherwise, we return the length of the
+ matched substring. */
+
+int
+re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ /* General temporaries. */
+ int mcnt;
+ unsigned char *p1;
+
+ /* Just past the end of the corresponding string. */
+ const char *end1, *end2;
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ const char *end_match_1, *end_match_2;
+
+ /* Where we are in the data, and the end of the current string. */
+ const char *d, *dend;
+
+ /* Where we are in the pattern, and the end of the pattern. */
+ unsigned char *p = bufp->buffer;
+ register unsigned char *pend = p + bufp->used;
+
+ /* We use this to map every character in the string. */
+ char *translate = bufp->translate;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+ fail_stack_type fail_stack;
+#ifdef DEBUG
+ static unsigned failure_id = 0;
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+ /* We fill all the registers internally, independent of what we
+ return, for use in backreferences. The number here includes
+ an element for register zero. */
+ unsigned num_regs = bufp->re_nsub + 1;
+
+ /* The currently active registers. */
+ unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+ const char **regstart, **regend;
+
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+ const char **old_regstart, **old_regend;
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+ register_info_type *reg_info;
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+ const char **best_regstart, **best_regend;
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const char *match_end = NULL;
+
+ /* Used when we pop values we don't care about. */
+ const char **reg_dummy;
+ register_info_type *reg_info_dummy;
+
+#ifdef DEBUG
+ /* Counts the total number of registers pushed. */
+ unsigned num_regs_pushed = 0;
+#endif
+
+ DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+ INIT_FAIL_STACK ();
+
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub)
+ {
+ regstart = REGEX_TALLOC (num_regs, const char *);
+ regend = REGEX_TALLOC (num_regs, const char *);
+ old_regstart = REGEX_TALLOC (num_regs, const char *);
+ old_regend = REGEX_TALLOC (num_regs, const char *);
+ best_regstart = REGEX_TALLOC (num_regs, const char *);
+ best_regend = REGEX_TALLOC (num_regs, const char *);
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC (num_regs, const char *);
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+#ifdef REGEX_MALLOC
+ else
+ {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* REGEX_MALLOC */
+
+ /* The starting position is bogus. */
+ if (pos < 0 || pos > size1 + size2)
+ {
+ FREE_VARIABLES ();
+ return -1;
+ }
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
+ if (size2 == 0 && string1 != NULL)
+ {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+ if (stop <= size1)
+ {
+ end_match_1 = string1 + stop;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ end_match_2 = string2 + stop - size1;
+ }
+
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal `string2'. */
+ if (size1 > 0 && pos <= size1)
+ {
+ d = string1 + pos;
+ dend = end_match_1;
+ }
+ else
+ {
+ d = string2 + pos - size1;
+ dend = end_match_2;
+ }
+
+ DEBUG_PRINT1 ("The compiled pattern is: ");
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+ DEBUG_PRINT1 ("The string to match is: `");
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+ DEBUG_PRINT1 ("'\n");
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if the match is complete, or it drops through if the match
+ fails at this starting point in the input data. */
+ for (;;)
+ {
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+
+ if (p == pend)
+ { /* End of pattern means we might have succeeded. */
+ DEBUG_PRINT1 ("end of pattern ... ");
+
+ /* If we haven't matched the entire string, and we want the
+ longest match, try backtracking. */
+ if (d != end_match_2)
+ {
+ DEBUG_PRINT1 ("backtracking.\n");
+
+ if (!FAIL_STACK_EMPTY ())
+ { /* More failure points to try. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+
+ /* If exceeds best match so far, save it. */
+ if (!best_regs_set
+ || (same_str_p && d > match_end)
+ || (!same_str_p && !MATCHING_IN_FIRST_STRING))
+ {
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+
+ /* If no failure points, don't restore garbage. */
+ else if (best_regs_set)
+ {
+ restore_best_regs:
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ DEBUG_PRINT1 ("Restoring best registers.\n");
+
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ } /* d != end_match_2 */
+
+ DEBUG_PRINT1 ("Accepting match.\n");
+
+ /* If caller wants register contents data back, do it. */
+ if (regs && !bufp->no_sub)
+ {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+ regs->start = TALLOC (regs->num_regs, regoff_t);
+ regs->end = TALLOC (regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ return -2;
+ bufp->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1)
+ {
+ regs->num_regs = num_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ return -2;
+ }
+ }
+ else
+ assert (bufp->regs_allocated == REGS_FIXED);
+
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0)
+ {
+ regs->start[0] = pos;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
+ : d - string2 + size1);
+ }
+
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
+ for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+ {
+ if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ else
+ {
+ regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
+ }
+ }
+
+ /* If the regs structure we return has more elements than
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
+ for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ } /* regs && !bufp->no_sub */
+
+ FREE_VARIABLES ();
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1
+ : string2 - size1);
+
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+ return mcnt;
+ }
+
+ /* Otherwise match next pattern command. */
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((re_opcode_t) *p++))
+#else
+ switch ((re_opcode_t) *p++)
+#endif
+ {
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case no_op:
+ DEBUG_PRINT1 ("EXECUTING no_op.\n");
+ break;
+
+
+ /* Match the next n pattern characters exactly. The following
+ byte in the pattern defines n, and the n bytes after that
+ are the characters to match. */
+ case exactn:
+ mcnt = *p++;
+ DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
+ {
+ do
+ {
+ PREFETCH ();
+ if (translate[(unsigned char) *d++] != (char) *p++)
+ goto fail;
+ }
+ while (--mcnt);
+ }
+ else
+ {
+ do
+ {
+ PREFETCH ();
+ if (*d++ != (char) *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED ();
+ break;
+
+
+ /* Match any character except possibly a newline or a null. */
+ case anychar:
+ DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+ PREFETCH ();
+
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+ || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+ goto fail;
+
+ SET_REGS_MATCHED ();
+ DEBUG_PRINT2 (" Matched `%d'.\n", *d);
+ d++;
+ break;
+
+
+ case charset:
+ case charset_not:
+ {
+ register unsigned char c;
+ boolean not = (re_opcode_t) *(p - 1) == charset_not;
+
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+
+ PREFETCH ();
+ c = TRANSLATE (*d); /* The character to match. */
+
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not) goto fail;
+
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+ }
+
+
+ /* The beginning of a group is represented by start_memory.
+ The arguments are the register number in the next byte, and the
+ number of groups inner to this one in the next. The text
+ matched within the group is recorded (in the internal
+ registers data structure) under the register number. */
+ case start_memory:
+ DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
+ = group_match_null_string_p (&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
+ DEBUG_PRINT2 (" old_regstart: %d\n",
+ POINTER_TO_OFFSET (old_regstart[*p]));
+
+ regstart[*p] = d;
+ DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* This is the new highest active register. */
+ highest_active_reg = *p;
+
+ /* If nothing was active before, this is the new lowest active
+ register. */
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *p;
+
+ /* Move past the register number and inner group count. */
+ p += 2;
+ break;
+
+
+ /* The stop_memory opcode represents the end of a group. Its
+ arguments are the same as start_memory's: the register
+ number, and the number of inner groups. */
+ case stop_memory:
+ DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ : regend[*p];
+ DEBUG_PRINT2 (" old_regend: %d\n",
+ POINTER_TO_OFFSET (old_regend[*p]));
+
+ regend[*p] = d;
+ DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+ /* This register isn't active anymore. */
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* If this was the only register active, nothing is active
+ anymore. */
+ if (lowest_active_reg == highest_active_reg)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ { /* We must scan for the new highest active register, since
+ it isn't necessarily one less than now: consider
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
+ new highest active register is 1. */
+ unsigned char r = *p - 1;
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))
+ r--;
+
+ /* If we end up at register zero, that means that we saved
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
+ registers 1 and 2 as a result of the *, but when we pop
+ back to the second ), we are at the stop_memory 1.
+ Thus, nothing is active. */
+ if (r == 0)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ highest_active_reg = r;
+ }
+
+ /* If just failed to match something this time around with a
+ group that's operated on by a repetition operator, try to
+ force exit from the ``loop'', and restore the register
+ information for this group that we had before trying this
+ last match. */
+ if ((!MATCHED_SOMETHING (reg_info[*p])
+ || (re_opcode_t) p[-3] == start_memory)
+ && (p + 2) < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ p1 = p + 2;
+ mcnt = 0;
+ switch ((re_opcode_t) *p1++)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (is_a_jump_n)
+ p1 += 2;
+ break;
+
+ default:
+ /* do nothing */ ;
+ }
+ p1 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump right before the start_memory
+ corresponding to this stop_memory, exit from the loop
+ by forcing a failure after pushing on the stack the
+ on_failure_jump's jump in the pattern, and d. */
+ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+ {
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p; r < *p + *(p + 1); r++)
+ {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if ((int) old_regend[r] >= (int) regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+ goto fail;
+ }
+ }
+
+ /* Move past the register number and the inner group count. */
+ p += 2;
+ break;
+
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ register const char *d2, *dend2;
+ int regno = *p++; /* Get which register to match against. */
+ DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+ /* Can't back reference a group which we've never matched. */
+ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+ goto fail;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((FIRST_STRING_P (regstart[regno])
+ == FIRST_STRING_P (regend[regno]))
+ ? regend[regno] : end_match_1);
+ for (;;)
+ {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2)
+ {
+ if (dend2 == end_match_2) break;
+ if (dend2 == regend[regno]) break;
+
+ /* End of string1 => advance to string2. */
+ d2 = string2;
+ dend2 = regend[regno];
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2) break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH ();
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? bcmp_translate (d, d2, mcnt, translate)
+ : bcmp (d, d2, mcnt))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+ }
+ }
+ break;
+
+
+ /* begline matches the empty string at the beginning of the string
+ (unless `not_bol' is set in `bufp'), and, if
+ `newline_anchor' is set, after newlines. */
+ case begline:
+ DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+ if (AT_STRINGS_BEG (d))
+ {
+ if (!bufp->not_bol) break;
+ }
+ else if (d[-1] == '\n' && bufp->newline_anchor)
+ {
+ break;
+ }
+ /* In all other cases, we fail. */
+ goto fail;
+
+
+ /* endline is the dual of begline. */
+ case endline:
+ DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+ if (AT_STRINGS_END (d))
+ {
+ if (!bufp->not_eol) break;
+ }
+
+ /* We have to ``prefetch'' the next character. */
+ else if ((d == end1 ? *string2 : *d) == '\n'
+ && bufp->newline_anchor)
+ {
+ break;
+ }
+ goto fail;
+
+
+ /* Match at the very beginning of the data. */
+ case begbuf:
+ DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+ if (AT_STRINGS_BEG (d))
+ break;
+ goto fail;
+
+
+ /* Match at the very end of the data. */
+ case endbuf:
+ DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+ if (AT_STRINGS_END (d))
+ break;
+ goto fail;
+
+
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It
+ pushes NULL as the value for the string on the stack. Then
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
+
+ Because this is used only in specific cases, we don't need to
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
+ case on_failure_keep_string_jump:
+ DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+
+ PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+ break;
+
+
+ /* Uses of on_failure_jump:
+
+ Each alternative starts with an on_failure_jump that points
+ to the beginning of the next alternative. Each alternative
+ except the last ends with a jump that in effect jumps past
+ the rest of the alternatives. (They really jump to the
+ ending jump of the following alternative, because tensioning
+ these jumps is a hassle.)
+
+ Repeats start with an on_failure_jump that points past both
+ the repetition text and either the following jump or
+ pop_failure_jump back to this on_failure_jump. */
+ case on_failure_jump:
+ on_failure:
+ DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+
+ /* If this on_failure_jump comes right before a group (i.e.,
+ the original * applied to a group), save the information
+ for that group and all inner ones, so that if we fail back
+ to this point, the group's information will be correct.
+ For example, in \(a*\)*\1, we need the preceding group,
+ and in \(\(a*\)b*\)\2, we need the inner group. */
+
+ /* We can't use `p' to check ahead because we push
+ a failure point to `p + mcnt' after we do this. */
+ p1 = p;
+
+ /* We need to skip no_op's before we look for the
+ start_memory in case this on_failure_jump is happening as
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+ against aba. */
+ while (p1 < pend && (re_opcode_t) *p1 == no_op)
+ p1++;
+
+ if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+ {
+ /* We have a new highest active register now. This will
+ get reset at the start_memory we are about to get to,
+ but we will have saved all the registers relevant to
+ this repetition op, as described above. */
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *(p1 + 1);
+ }
+
+ DEBUG_PRINT1 (":\n");
+ PUSH_FAILURE_POINT (p + mcnt, d, -2);
+ break;
+
+
+ /* A smart repeat ends with `maybe_pop_jump'.
+ We change it to either `pop_failure_jump' or `jump'. */
+ case maybe_pop_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+ {
+ register unsigned char *p2 = p;
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands. */
+ while (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3; /* Skip over args, too. */
+
+ /* If we're at the end of the pattern, we can change. */
+ if (p2 == pend)
+ {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
+
+ else if ((re_opcode_t) *p2 == exactn
+ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+ {
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+ p1 = p + mcnt;
+
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset
+ || (re_opcode_t) p1[3] == charset_not)
+ {
+ int not = (re_opcode_t) p1[3] == charset_not;
+
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ /* `not' is equal to 1 if c would match, which means
+ that we can't change to pop_failure_jump. */
+ if (!not)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ }
+ p -= 2; /* Point at relative address again. */
+ if ((re_opcode_t) p[-1] != pop_failure_jump)
+ {
+ p[-1] = (unsigned char) jump;
+ DEBUG_PRINT1 (" Match => jump.\n");
+ goto unconditional_jump;
+ }
+ /* Note fall through. */
+
+
+ /* The end of a simple repeat has a pop_failure_jump back to
+ its matching on_failure_jump, where the latter will push a
+ failure point. The pop_failure_jump takes off failure
+ points put on by this pop_failure_jump's matching
+ on_failure_jump; we got through the pattern to here from the
+ matching on_failure_jump, so didn't fail. */
+ case pop_failure_jump:
+ {
+ /* We need to pass separate storage for the lowest and
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ unsigned dummy_low_reg, dummy_high_reg;
+ unsigned char *pdummy;
+ const char *sdummy;
+
+ DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+ POP_FAILURE_POINT (sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
+ }
+ /* Note fall through. */
+
+
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ unconditional_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+ p += mcnt; /* Do the jump. */
+ DEBUG_PRINT2 ("(to 0x%x).\n", p);
+ break;
+
+
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
+
+
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at pop_failure_jump. We will end up at
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for pop_failure_jump to pop. */
+ case dummy_failure_jump:
+ DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+ /* It doesn't matter what we push for the string here. What
+ the code at `fail' tests is the value for the pattern. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ break;
+
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+ assert (mcnt >= 0);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt > 0)
+ {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
+ }
+ else if (mcnt == 0)
+ {
+ DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
+ p[2] = (unsigned char) no_op;
+ p[3] = (unsigned char) no_op;
+ goto on_failure;
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER (p + 2, mcnt);
+ goto unconditional_jump;
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+ case wordbound:
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ break;
+ goto fail;
+
+ case notwordbound:
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ goto fail;
+ break;
+
+ case wordbeg:
+ DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
+ break;
+ goto fail;
+
+ case wordend:
+ DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+ && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
+ break;
+ goto fail;
+
+#ifdef emacs
+#ifdef emacs19
+ case before_dot:
+ DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+ goto fail;
+ break;
+#else /* not emacs19 */
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
+ goto fail;
+ break;
+#endif /* not emacs19 */
+
+ case syntaxspec:
+ DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchsyntax;
+
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+ mcnt = (int) Sword;
+ matchsyntax:
+ PREFETCH ();
+ if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+ case notsyntaxspec:
+ DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchnotsyntax;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+ mcnt = (int) Sword;
+ matchnotsyntax:
+ PREFETCH ();
+ if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+#else /* not emacs */
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+ PREFETCH ();
+ if (!WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+ PREFETCH ();
+ if (WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+#endif /* not emacs */
+
+ default:
+ abort ();
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+
+ /* We goto here if a matching operation fails. */
+ fail:
+ if (!FAIL_STACK_EMPTY ())
+ { /* A restart point is known. Restore to that state. */
+ DEBUG_PRINT1 ("\nFAIL:\n");
+ POP_FAILURE_POINT (d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
+
+ /* If this failure point is a dummy, try the next one. */
+ if (!p)
+ goto fail;
+
+ /* If we failed to the end of the pattern, don't examine *p. */
+ assert (p <= pend);
+ if (p < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((re_opcode_t) *p)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case maybe_pop_jump:
+ case pop_failure_jump:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+ || (!is_a_jump_n
+ && (re_opcode_t) *p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ }
+ else
+ break; /* Matching at this starting point really fails. */
+ } /* for (;;) */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_VARIABLES ();
+
+ return -1; /* Failure to match. */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2. */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static boolean
+group_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ /* Point to after the args to the start_memory. */
+ unsigned char *p1 = *p + 2;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0)
+ {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+ {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+ reg_info))
+ return false;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((re_opcode_t) *p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+ {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 3;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER (mcnt, p1 - 2);
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ return false;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ assert (p1[1] == **p);
+ *p = p1 + 2;
+ return true;
+
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static boolean
+alt_match_null_string_p (p, end, reg_info)
+ unsigned char *p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ unsigned char *p1 = p;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static boolean
+common_op_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ boolean ret;
+ int reg_no;
+ unsigned char *p1 = *p;
+
+ switch ((re_opcode_t) *p1++)
+ {
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+ ret = group_match_null_string_p (&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return false;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return false;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += 2;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ if (mcnt == 0)
+ {
+ p1 -= 4;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ }
+ else
+ return false;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ return false;
+ break;
+
+ case set_number_at:
+ p1 += 4;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return false;
+ }
+
+ *p = p1;
+ return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+ bytes; nonzero otherwise. */
+
+static int
+bcmp_translate (s1, s2, len, translate)
+ unsigned char *s1, *s2;
+ register int len;
+ char *translate;
+{
+ register unsigned char *p1 = s1, *p2 = s2;
+ while (len)
+ {
+ if (translate[*p1++] != translate[*p2++]) return 1;
+ len--;
+ }
+ return 0;
+}
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry.
+
+ We call regex_compile to do the actual compilation. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ int length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+ return re_error_msg[(int) ret];
+}
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them if this is an Emacs or POSIX compilation. */
+
+#if !defined (emacs) && !defined (_POSIX_SOURCE)
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return "No previous regular expression";
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer)
+ {
+ re_comp_buf.buffer = (unsigned char *) malloc (200);
+ if (re_comp_buf.buffer == NULL)
+ return "Memory exhausted";
+ re_comp_buf.allocated = 200;
+
+ re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+ if (re_comp_buf.fastmap == NULL)
+ return "Memory exhausted";
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+ /* Yes, we're discarding `const' here. */
+ return (char *) re_error_msg[(int) ret];
+}
+
+
+int
+re_exec (s)
+ const char *s;
+{
+ const int len = strlen (s);
+ return
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+#endif /* not emacs and not _POSIX_SOURCE */
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' and `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *preg;
+ const char *pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ unsigned syntax
+ = (cflags & REG_EXTENDED) ?
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+
+ /* Don't bother to use a fastmap when searching. This simplifies the
+ REG_NEWLINE case: if we used a fastmap, we'd have to put all the
+ characters after newlines into the fastmap. This way, we just try
+ every character. */
+ preg->fastmap = 0;
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate = (char *) malloc (CHAR_SET_SIZE);
+ if (preg->translate == NULL)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+ }
+ else
+ preg->translate = NULL;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *preg;
+ const char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = TALLOC (nmatch, regoff_t);
+ regs.end = TALLOC (nmatch, regoff_t);
+ if (regs.start == NULL || regs.end == NULL)
+ return (int) REG_NOMATCH;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg, string, len,
+ /* start: */ 0, /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ free (regs.end);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0
+ || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+ strncpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+ }
+ else
+ strcpy (errbuf, msg);
+ }
+
+ return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ if (preg->buffer != NULL)
+ free (preg->buffer);
+ preg->buffer = NULL;
+
+ preg->allocated = 0;
+ preg->used = 0;
+
+ if (preg->fastmap != NULL)
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != NULL)
+ free (preg->translate);
+ preg->translate = NULL;
+}
+
+#endif /* not emacs */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/usr.bin/tar/regex.h b/gnu/usr.bin/tar/regex.h
new file mode 100644
index 000000000000..0840861da369
--- /dev/null
+++ b/gnu/usr.bin/tar/regex.h
@@ -0,0 +1,490 @@
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.11.
+
+ Copyright (C) 1985, 89, 90, 91, 92 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#ifdef VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+#include <stddef.h>
+#endif
+
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ char *translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+
+/* search.c (search_buffer) in Emacs needs this one opcode value. It is
+ defined both in `regex.c' and here. */
+#define RE_EXACTN_VALUE 1
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, int length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+ _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags));
+extern size_t regerror
+ _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/usr.bin/tar/rmt.h b/gnu/usr.bin/tar/rmt.h
new file mode 100644
index 000000000000..2155223954c3
--- /dev/null
+++ b/gnu/usr.bin/tar/rmt.h
@@ -0,0 +1,98 @@
+/* Definitions for communicating with a remote tape drive.
+ Copyright (C) 1988, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if !defined(_POSIX_VERSION)
+#ifdef __MSDOS__
+#include <io.h>
+#else /* !__MSDOS__ */
+extern off_t lseek ();
+#endif /* __MSDOS__ */
+#endif /* _POSIX_VERSION */
+
+#ifdef NO_REMOTE
+#define _isrmt(f) 0
+#define rmtopen open
+#define rmtaccess access
+#define rmtstat stat
+#define rmtcreat creat
+#define rmtlstat lstat
+#define rmtread read
+#define rmtwrite write
+#define rmtlseek lseek
+#define rmtclose close
+#define rmtioctl ioctl
+#define rmtdup dup
+#define rmtfstat fstat
+#define rmtfcntl fcntl
+#define rmtisatty isatty
+
+#else /* !NO_REMOTE */
+
+#define __REM_BIAS 128
+#define RMTIOCTL
+
+#ifndef O_CREAT
+#define O_CREAT 01000
+#endif
+
+extern char *__rmt_path;
+
+#if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
+#include <string.h>
+#ifndef index
+#define index strchr
+#endif
+#else
+extern char *index ();
+#endif
+
+#define _remdev(path) (!f_force_local && (__rmt_path=index(path, ':')))
+#define _isrmt(fd) ((fd) >= __REM_BIAS)
+
+#define rmtopen(path,oflag,mode) (_remdev(path) ? __rmt_open(path, oflag, mode, __REM_BIAS) : open(path, oflag, mode))
+#define rmtaccess(path, amode) (_remdev(path) ? 0 : access(path, amode))
+#define rmtstat(path, buf) (_remdev(path) ? (errno = EOPNOTSUPP), -1 : stat(path, buf))
+#define rmtcreat(path, mode) (_remdev(path) ? __rmt_open (path, 1 | O_CREAT, mode, __REM_BIAS) : creat(path, mode))
+#define rmtlstat(path,buf) (_remdev(path) ? (errno = EOPNOTSUPP), -1 : lstat(path,buf))
+
+#define rmtread(fd, buf, n) (_isrmt(fd) ? __rmt_read(fd - __REM_BIAS, buf, n) : read(fd, buf, n))
+#define rmtwrite(fd, buf, n) (_isrmt(fd) ? __rmt_write(fd - __REM_BIAS, buf, n) : write(fd, buf, n))
+#define rmtlseek(fd, off, wh) (_isrmt(fd) ? __rmt_lseek(fd - __REM_BIAS, off, wh) : lseek(fd, off, wh))
+#define rmtclose(fd) (_isrmt(fd) ? __rmt_close(fd - __REM_BIAS) : close(fd))
+#ifdef RMTIOCTL
+#define rmtioctl(fd,req,arg) (_isrmt(fd) ? __rmt_ioctl(fd - __REM_BIAS, req, arg) : ioctl(fd, req, arg))
+#else
+#define rmtioctl(fd,req,arg) (_isrmt(fd) ? (errno = EOPNOTSUPP), -1 : ioctl(fd, req, arg))
+#endif
+#define rmtdup(fd) (_isrmt(fd) ? (errno = EOPNOTSUPP), -1 : dup(fd))
+#define rmtfstat(fd, buf) (_isrmt(fd) ? (errno = EOPNOTSUPP), -1 : fstat(fd, buf))
+#define rmtfcntl(fd,cmd,arg) (_isrmt(fd) ? (errno = EOPNOTSUPP), -1 : fcntl (fd, cmd, arg))
+#define rmtisatty(fd) (_isrmt(fd) ? 0 : isatty(fd))
+
+#undef RMTIOCTL
+
+int __rmt_open ();
+int __rmt_close ();
+int __rmt_read ();
+int __rmt_write ();
+long __rmt_lseek ();
+int __rmt_ioctl ();
+#endif /* !NO_REMOTE */
diff --git a/gnu/usr.bin/tar/rtapelib.c b/gnu/usr.bin/tar/rtapelib.c
new file mode 100644
index 000000000000..eece76ffcd0f
--- /dev/null
+++ b/gnu/usr.bin/tar/rtapelib.c
@@ -0,0 +1,582 @@
+/* Functions for communicating with a remote tape drive.
+ Copyright (C) 1988, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* The man page rmt(8) for /etc/rmt documents the remote mag tape
+ protocol which rdump and rrestore use. Unfortunately, the man
+ page is *WRONG*. The author of the routines I'm including originally
+ wrote his code just based on the man page, and it didn't work, so he
+ went to the rdump source to figure out why. The only thing he had to
+ change was to check for the 'F' return code in addition to the 'E',
+ and to separate the various arguments with \n instead of a space. I
+ personally don't think that this is much of a problem, but I wanted to
+ point it out. -- Arnold Robbins
+
+ Originally written by Jeff Lee, modified some by Arnold Robbins.
+ Redone as a library that can replace open, read, write, etc., by
+ Fred Fish, with some additional work by Arnold Robbins.
+ Modified to make all rmtXXX calls into macros for speed by Jay Fenlason.
+ Use -DHAVE_NETDB_H for rexec code, courtesy of Dan Kegel, srs!dan. */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#ifdef HAVE_SYS_MTIO_H
+#include <sys/ioctl.h>
+#include <sys/mtio.h>
+#endif
+
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+
+#include <errno.h>
+#include <setjmp.h>
+#include <sys/stat.h>
+
+#ifndef errno
+extern int errno;
+#endif
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef STDC_HEADERS
+#include <string.h>
+#include <stdlib.h>
+#endif
+
+/* Maximum size of a fully qualified host name. */
+#define MAXHOSTLEN 257
+
+/* Size of buffers for reading and writing commands to rmt.
+ (An arbitrary limit.) */
+#define CMDBUFSIZE 64
+
+#ifndef RETSIGTYPE
+#define RETSIGTYPE void
+#endif
+
+/* Maximum number of simultaneous remote tape connections.
+ (Another arbitrary limit.) */
+#define MAXUNIT 4
+
+/* Return the parent's read side of remote tape connection FILDES. */
+#define READ(fildes) (from_rmt[fildes][0])
+
+/* Return the parent's write side of remote tape connection FILDES. */
+#define WRITE(fildes) (to_rmt[fildes][1])
+
+/* The pipes for receiving data from remote tape drives. */
+static int from_rmt[MAXUNIT][2] =
+{-1, -1, -1, -1, -1, -1, -1, -1};
+
+/* The pipes for sending data to remote tape drives. */
+static int to_rmt[MAXUNIT][2] =
+{-1, -1, -1, -1, -1, -1, -1, -1};
+
+/* Temporary variable used by macros in rmt.h. */
+char *__rmt_path;
+
+/* Close remote tape connection FILDES. */
+
+static void
+_rmt_shutdown (fildes)
+ int fildes;
+{
+ close (READ (fildes));
+ close (WRITE (fildes));
+ READ (fildes) = -1;
+ WRITE (fildes) = -1;
+}
+
+/* Attempt to perform the remote tape command specified in BUF
+ on remote tape connection FILDES.
+ Return 0 if successful, -1 on error. */
+
+static int
+command (fildes, buf)
+ int fildes;
+ char *buf;
+{
+ register int buflen;
+ RETSIGTYPE (*pipe_handler) ();
+
+ /* Save the current pipe handler and try to make the request. */
+
+ pipe_handler = signal (SIGPIPE, SIG_IGN);
+ buflen = strlen (buf);
+ if (write (WRITE (fildes), buf, buflen) == buflen)
+ {
+ signal (SIGPIPE, pipe_handler);
+ return 0;
+ }
+
+ /* Something went wrong. Close down and go home. */
+
+ signal (SIGPIPE, pipe_handler);
+ _rmt_shutdown (fildes);
+ errno = EIO;
+ return -1;
+}
+
+/* Read and return the status from remote tape connection FILDES.
+ If an error occurred, return -1 and set errno. */
+
+static int
+status (fildes)
+ int fildes;
+{
+ int i;
+ char c, *cp;
+ char buffer[CMDBUFSIZE];
+
+ /* Read the reply command line. */
+
+ for (i = 0, cp = buffer; i < CMDBUFSIZE; i++, cp++)
+ {
+ if (read (READ (fildes), cp, 1) != 1)
+ {
+ _rmt_shutdown (fildes);
+ errno = EIO;
+ return -1;
+ }
+ if (*cp == '\n')
+ {
+ *cp = '\0';
+ break;
+ }
+ }
+
+ if (i == CMDBUFSIZE)
+ {
+ _rmt_shutdown (fildes);
+ errno = EIO;
+ return -1;
+ }
+
+ /* Check the return status. */
+
+ for (cp = buffer; *cp; cp++)
+ if (*cp != ' ')
+ break;
+
+ if (*cp == 'E' || *cp == 'F')
+ {
+ errno = atoi (cp + 1);
+ /* Skip the error message line. */
+ while (read (READ (fildes), &c, 1) == 1)
+ if (c == '\n')
+ break;
+
+ if (*cp == 'F')
+ _rmt_shutdown (fildes);
+
+ return -1;
+ }
+
+ /* Check for mis-synced pipes. */
+
+ if (*cp != 'A')
+ {
+ _rmt_shutdown (fildes);
+ errno = EIO;
+ return -1;
+ }
+
+ /* Got an `A' (success) response. */
+ return atoi (cp + 1);
+}
+
+#ifdef HAVE_NETDB_H
+/* Execute /etc/rmt as user USER on remote system HOST using rexec.
+ Return a file descriptor of a bidirectional socket for stdin and stdout.
+ If USER is NULL, or an empty string, use the current username.
+
+ By default, this code is not used, since it requires that
+ the user have a .netrc file in his/her home directory, or that the
+ application designer be willing to have rexec prompt for login and
+ password info. This may be unacceptable, and .rhosts files for use
+ with rsh are much more common on BSD systems. */
+
+static int
+_rmt_rexec (host, user)
+ char *host;
+ char *user;
+{
+ struct servent *rexecserv;
+ int save_stdin = dup (fileno (stdin));
+ int save_stdout = dup (fileno (stdout));
+ int tape_fd; /* Return value. */
+
+ /* When using cpio -o < filename, stdin is no longer the tty.
+ But the rexec subroutine reads the login and the passwd on stdin,
+ to allow remote execution of the command.
+ So, reopen stdin and stdout on /dev/tty before the rexec and
+ give them back their original value after. */
+ if (freopen ("/dev/tty", "r", stdin) == NULL)
+ freopen ("/dev/null", "r", stdin);
+ if (freopen ("/dev/tty", "w", stdout) == NULL)
+ freopen ("/dev/null", "w", stdout);
+
+ rexecserv = getservbyname ("exec", "tcp");
+ if (NULL == rexecserv)
+ {
+ fprintf (stderr, "exec/tcp: service not available");
+ exit (1);
+ }
+ if (user != NULL && *user == '\0')
+ user = NULL;
+ tape_fd = rexec (&host, rexecserv->s_port, user, NULL,
+ "/etc/rmt", (int *) NULL);
+ fclose (stdin);
+ fdopen (save_stdin, "r");
+ fclose (stdout);
+ fdopen (save_stdout, "w");
+
+ return tape_fd;
+}
+
+#endif /* HAVE_NETDB_H */
+
+/* Open a magtape device on the system specified in PATH, as the given user.
+ PATH has the form `[user@]system:/dev/????'.
+ If COMPAT is defined, it can also have the form `system[.user]:/dev/????'.
+
+ OFLAG is O_RDONLY, O_WRONLY, etc.
+ MODE is ignored; 0666 is always used.
+
+ If successful, return the remote tape pipe number plus BIAS.
+ On error, return -1. */
+
+int
+__rmt_open (path, oflag, mode, bias)
+ char *path;
+ int oflag;
+ int mode;
+ int bias;
+{
+ int i, rc;
+ char buffer[CMDBUFSIZE]; /* Command buffer. */
+ char system[MAXHOSTLEN]; /* The remote host name. */
+ char device[CMDBUFSIZE]; /* The remote device name. */
+ char login[CMDBUFSIZE]; /* The remote user name. */
+ char *sys, *dev, *user; /* For copying into the above buffers. */
+
+ sys = system;
+ dev = device;
+ user = login;
+
+ /* Find an unused pair of file descriptors. */
+
+ for (i = 0; i < MAXUNIT; i++)
+ if (READ (i) == -1 && WRITE (i) == -1)
+ break;
+
+ if (i == MAXUNIT)
+ {
+ errno = EMFILE;
+ return -1;
+ }
+
+ /* Pull apart the system and device, and optional user.
+ Don't munge the original string. */
+
+ while (*path != '@'
+#ifdef COMPAT
+ && *path != '.'
+#endif
+ && *path != ':')
+ {
+ *sys++ = *path++;
+ }
+ *sys = '\0';
+ path++;
+
+ if (*(path - 1) == '@')
+ {
+ /* Saw user part of user@host. Start over. */
+ strcpy (user, system);
+ sys = system;
+ while (*path != ':')
+ {
+ *sys++ = *path++;
+ }
+ *sys = '\0';
+ path++;
+ }
+#ifdef COMPAT
+ else if (*(path - 1) == '.')
+ {
+ while (*path != ':')
+ {
+ *user++ = *path++;
+ }
+ *user = '\0';
+ path++;
+ }
+#endif
+ else
+ *user = '\0';
+
+ while (*path)
+ {
+ *dev++ = *path++;
+ }
+ *dev = '\0';
+
+#ifdef HAVE_NETDB_H
+ /* Execute the remote command using rexec. */
+ READ (i) = WRITE (i) = _rmt_rexec (system, login);
+ if (READ (i) < 0)
+ return -1;
+#else /* !HAVE_NETDB_H */
+ /* Set up the pipes for the `rsh' command, and fork. */
+
+ if (pipe (to_rmt[i]) == -1 || pipe (from_rmt[i]) == -1)
+ return -1;
+
+ rc = fork ();
+ if (rc == -1)
+ return -1;
+
+ if (rc == 0)
+ {
+ /* Child. */
+ close (0);
+ dup (to_rmt[i][0]);
+ close (to_rmt[i][0]);
+ close (to_rmt[i][1]);
+
+ close (1);
+ dup (from_rmt[i][1]);
+ close (from_rmt[i][0]);
+ close (from_rmt[i][1]);
+
+ setuid (getuid ());
+ setgid (getgid ());
+
+ if (*login)
+ {
+ execl ("/usr/ucb/rsh", "rsh", system, "-l", login,
+ "/etc/rmt", (char *) 0);
+ execl ("/usr/bin/remsh", "remsh", system, "-l", login,
+ "/etc/rmt", (char *) 0);
+ execl ("/usr/bin/rsh", "rsh", system, "-l", login,
+ "/etc/rmt", (char *) 0);
+ execl ("/usr/bsd/rsh", "rsh", system, "-l", login,
+ "/etc/rmt", (char *) 0);
+ execl ("/usr/bin/nsh", "nsh", system, "-l", login,
+ "/etc/rmt", (char *) 0);
+ }
+ else
+ {
+ execl ("/usr/ucb/rsh", "rsh", system,
+ "/etc/rmt", (char *) 0);
+ execl ("/usr/bin/remsh", "remsh", system,
+ "/etc/rmt", (char *) 0);
+ execl ("/usr/bin/rsh", "rsh", system,
+ "/etc/rmt", (char *) 0);
+ execl ("/usr/bsd/rsh", "rsh", system,
+ "/etc/rmt", (char *) 0);
+ execl ("/usr/bin/nsh", "nsh", system,
+ "/etc/rmt", (char *) 0);
+ }
+
+ /* Bad problems if we get here. */
+
+ perror ("cannot execute remote shell");
+ _exit (1);
+ }
+
+ /* Parent. */
+ close (to_rmt[i][0]);
+ close (from_rmt[i][1]);
+#endif /* !HAVE_NETDB_H */
+
+ /* Attempt to open the tape device. */
+
+ sprintf (buffer, "O%s\n%d\n", device, oflag);
+ if (command (i, buffer) == -1 || status (i) == -1)
+ return -1;
+
+ return i + bias;
+}
+
+/* Close remote tape connection FILDES and shut down.
+ Return 0 if successful, -1 on error. */
+
+int
+__rmt_close (fildes)
+ int fildes;
+{
+ int rc;
+
+ if (command (fildes, "C\n") == -1)
+ return -1;
+
+ rc = status (fildes);
+ _rmt_shutdown (fildes);
+ return rc;
+}
+
+/* Read up to NBYTE bytes into BUF from remote tape connection FILDES.
+ Return the number of bytes read on success, -1 on error. */
+
+int
+__rmt_read (fildes, buf, nbyte)
+ int fildes;
+ char *buf;
+ unsigned int nbyte;
+{
+ int rc, i;
+ char buffer[CMDBUFSIZE];
+
+ sprintf (buffer, "R%d\n", nbyte);
+ if (command (fildes, buffer) == -1 || (rc = status (fildes)) == -1)
+ return -1;
+
+ for (i = 0; i < rc; i += nbyte, buf += nbyte)
+ {
+ nbyte = read (READ (fildes), buf, rc - i);
+ if (nbyte <= 0)
+ {
+ _rmt_shutdown (fildes);
+ errno = EIO;
+ return -1;
+ }
+ }
+
+ return rc;
+}
+
+/* Write NBYTE bytes from BUF to remote tape connection FILDES.
+ Return the number of bytes written on success, -1 on error. */
+
+int
+__rmt_write (fildes, buf, nbyte)
+ int fildes;
+ char *buf;
+ unsigned int nbyte;
+{
+ char buffer[CMDBUFSIZE];
+ RETSIGTYPE (*pipe_handler) ();
+
+ sprintf (buffer, "W%d\n", nbyte);
+ if (command (fildes, buffer) == -1)
+ return -1;
+
+ pipe_handler = signal (SIGPIPE, SIG_IGN);
+ if (write (WRITE (fildes), buf, nbyte) == nbyte)
+ {
+ signal (SIGPIPE, pipe_handler);
+ return status (fildes);
+ }
+
+ /* Write error. */
+ signal (SIGPIPE, pipe_handler);
+ _rmt_shutdown (fildes);
+ errno = EIO;
+ return -1;
+}
+
+/* Perform an imitation lseek operation on remote tape connection FILDES.
+ Return the new file offset if successful, -1 if on error. */
+
+long
+__rmt_lseek (fildes, offset, whence)
+ int fildes;
+ long offset;
+ int whence;
+{
+ char buffer[CMDBUFSIZE];
+
+ sprintf (buffer, "L%ld\n%d\n", offset, whence);
+ if (command (fildes, buffer) == -1)
+ return -1;
+
+ return status (fildes);
+}
+
+/* Perform a raw tape operation on remote tape connection FILDES.
+ Return the results of the ioctl, or -1 on error. */
+
+#ifdef MTIOCTOP
+int
+__rmt_ioctl (fildes, op, arg)
+ int fildes, op;
+ char *arg;
+{
+ char c;
+ int rc, cnt;
+ char buffer[CMDBUFSIZE];
+
+ switch (op)
+ {
+ default:
+ errno = EINVAL;
+ return -1;
+
+ case MTIOCTOP:
+ /* MTIOCTOP is the easy one. Nothing is transfered in binary. */
+ sprintf (buffer, "I%d\n%d\n", ((struct mtop *) arg)->mt_op,
+ ((struct mtop *) arg)->mt_count);
+ if (command (fildes, buffer) == -1)
+ return -1;
+ return status (fildes); /* Return the count. */
+
+ case MTIOCGET:
+ /* Grab the status and read it directly into the structure.
+ This assumes that the status buffer is not padded
+ and that 2 shorts fit in a long without any word
+ alignment problems; i.e., the whole struct is contiguous.
+ NOTE - this is probably NOT a good assumption. */
+
+ if (command (fildes, "S") == -1 || (rc = status (fildes)) == -1)
+ return -1;
+
+ for (; rc > 0; rc -= cnt, arg += cnt)
+ {
+ cnt = read (READ (fildes), arg, rc);
+ if (cnt <= 0)
+ {
+ _rmt_shutdown (fildes);
+ errno = EIO;
+ return -1;
+ }
+ }
+
+ /* Check for byte position. mt_type is a small integer field
+ (normally) so we will check its magnitude. If it is larger than
+ 256, we will assume that the bytes are swapped and go through
+ and reverse all the bytes. */
+
+ if (((struct mtget *) arg)->mt_type < 256)
+ return 0;
+
+ for (cnt = 0; cnt < rc; cnt += 2)
+ {
+ c = arg[cnt];
+ arg[cnt] = arg[cnt + 1];
+ arg[cnt + 1] = c;
+ }
+
+ return 0;
+ }
+}
+
+#endif
diff --git a/gnu/usr.bin/tar/tar.c b/gnu/usr.bin/tar/tar.c
new file mode 100644
index 000000000000..938258233980
--- /dev/null
+++ b/gnu/usr.bin/tar/tar.c
@@ -0,0 +1,1504 @@
+/* Tar -- a tape archiver.
+ Copyright (C) 1988, 1992, 1993 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+ * A tar (tape archiver) program.
+ *
+ * Written by John Gilmore, ihnp4!hoptoad!gnu, starting 25 Aug 85.
+ */
+
+#include <stdio.h>
+#include <sys/types.h> /* Needed for typedefs in tar.h */
+#include "getopt.h"
+
+/*
+ * The following causes "tar.h" to produce definitions of all the
+ * global variables, rather than just "extern" declarations of them.
+ */
+#define TAR_EXTERN /**/
+#include "tar.h"
+
+#include "port.h"
+#include "regex.h"
+#include "fnmatch.h"
+
+/*
+ * We should use a conversion routine that does reasonable error
+ * checking -- atoi doesn't. For now, punt. FIXME.
+ */
+#define intconv atoi
+PTR ck_malloc ();
+PTR ck_realloc ();
+extern int getoldopt ();
+extern void read_and ();
+extern void list_archive ();
+extern void extract_archive ();
+extern void diff_archive ();
+extern void create_archive ();
+extern void update_archive ();
+extern void junk_archive ();
+extern void init_volume_number ();
+extern void closeout_volume_number ();
+
+/* JF */
+extern time_t get_date ();
+
+time_t new_time;
+
+static FILE *namef; /* File to read names from */
+static char **n_argv; /* Argv used by name routines */
+static int n_argc; /* Argc used by name routines */
+static char **n_ind; /* Store an array of names */
+static int n_indalloc; /* How big is the array? */
+static int n_indused; /* How many entries does it have? */
+static int n_indscan; /* How many of the entries have we scanned? */
+
+
+extern FILE *msg_file;
+
+int check_exclude ();
+void add_exclude ();
+void add_exclude_file ();
+void addname ();
+void describe ();
+void diff_init ();
+void extr_init ();
+int is_regex ();
+void name_add ();
+void name_init ();
+void options ();
+char *un_quote_string ();
+
+#ifndef S_ISLNK
+#define lstat stat
+#endif
+
+#ifndef DEFBLOCKING
+#define DEFBLOCKING 20
+#endif
+
+#ifndef DEF_AR_FILE
+#define DEF_AR_FILE "tar.out"
+#endif
+
+/* For long options that unconditionally set a single flag, we have getopt
+ do it. For the others, we share the code for the equivalent short
+ named option, the name of which is stored in the otherwise-unused `val'
+ field of the `struct option'; for long options that have no equivalent
+ short option, we use nongraphic characters as pseudo short option
+ characters, starting (for no particular reason) with character 10. */
+
+struct option long_options[] =
+{
+ {"create", 0, 0, 'c'},
+ {"append", 0, 0, 'r'},
+ {"extract", 0, 0, 'x'},
+ {"get", 0, 0, 'x'},
+ {"list", 0, 0, 't'},
+ {"update", 0, 0, 'u'},
+ {"catenate", 0, 0, 'A'},
+ {"concatenate", 0, 0, 'A'},
+ {"compare", 0, 0, 'd'},
+ {"diff", 0, 0, 'd'},
+ {"delete", 0, 0, 14},
+ {"help", 0, 0, 12},
+
+ {"null", 0, 0, 16},
+ {"directory", 1, 0, 'C'},
+ {"record-number", 0, &f_sayblock, 1},
+ {"files-from", 1, 0, 'T'},
+ {"label", 1, 0, 'V'},
+ {"exclude-from", 1, 0, 'X'},
+ {"exclude", 1, 0, 15},
+ {"file", 1, 0, 'f'},
+ {"block-size", 1, 0, 'b'},
+ {"version", 0, 0, 11},
+ {"verbose", 0, 0, 'v'},
+ {"totals", 0, &f_totals, 1},
+
+ {"read-full-blocks", 0, &f_reblock, 1},
+ {"starting-file", 1, 0, 'K'},
+ {"to-stdout", 0, &f_exstdout, 1},
+ {"ignore-zeros", 0, &f_ignorez, 1},
+ {"keep-old-files", 0, 0, 'k'},
+ {"same-permissions", 0, &f_use_protection, 1},
+ {"preserve-permissions", 0, &f_use_protection, 1},
+ {"modification-time", 0, &f_modified, 1},
+ {"preserve", 0, 0, 10},
+ {"same-order", 0, &f_sorted_names, 1},
+ {"same-owner", 0, &f_do_chown, 1},
+ {"preserve-order", 0, &f_sorted_names, 1},
+
+ {"newer", 1, 0, 'N'},
+ {"after-date", 1, 0, 'N'},
+ {"newer-mtime", 1, 0, 13},
+ {"incremental", 0, 0, 'G'},
+ {"listed-incremental", 1, 0, 'g'},
+ {"multi-volume", 0, &f_multivol, 1},
+ {"info-script", 1, 0, 'F'},
+ {"new-volume-script", 1, 0, 'F'},
+ {"absolute-paths", 0, &f_absolute_paths, 1},
+ {"interactive", 0, &f_confirm, 1},
+ {"confirmation", 0, &f_confirm, 1},
+
+ {"verify", 0, &f_verify, 1},
+ {"dereference", 0, &f_follow_links, 1},
+ {"one-file-system", 0, &f_local_filesys, 1},
+ {"old-archive", 0, 0, 'o'},
+ {"portability", 0, 0, 'o'},
+ {"compress", 0, 0, 'Z'},
+ {"uncompress", 0, 0, 'Z'},
+ {"block-compress", 0, &f_compress_block, 1},
+ {"gzip", 0, 0, 'z'},
+ {"ungzip", 0, 0, 'z'},
+ {"use-compress-program", 1, 0, 18},
+
+
+ {"same-permissions", 0, &f_use_protection, 1},
+ {"sparse", 0, &f_sparse_files, 1},
+ {"tape-length", 1, 0, 'L'},
+ {"remove-files", 0, &f_remove_files, 1},
+ {"ignore-failed-read", 0, &f_ignore_failed_read, 1},
+ {"checkpoint", 0, &f_checkpoint, 1},
+ {"show-omitted-dirs", 0, &f_show_omitted_dirs, 1},
+ {"volno-file", 1, 0, 17},
+ {"force-local", 0, &f_force_local, 1},
+ {"atime-preserve", 0, &f_atime_preserve, 1},
+
+ {0, 0, 0, 0}
+};
+
+/*
+ * Main routine for tar.
+ */
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ extern char version_string[];
+
+ tar = argv[0]; /* JF: was "tar" Set program name */
+ filename_terminator = '\n';
+ errors = 0;
+
+ options (argc, argv);
+
+ if (!n_argv)
+ name_init (argc, argv);
+
+ if (f_volno_file)
+ init_volume_number ();
+
+ switch (cmd_mode)
+ {
+ case CMD_CAT:
+ case CMD_UPDATE:
+ case CMD_APPEND:
+ update_archive ();
+ break;
+ case CMD_DELETE:
+ junk_archive ();
+ break;
+ case CMD_CREATE:
+ create_archive ();
+ if (f_totals)
+ fprintf (stderr, "Total bytes written: %d\n", tot_written);
+ break;
+ case CMD_EXTRACT:
+ if (f_volhdr)
+ {
+ const char *err;
+ label_pattern = (struct re_pattern_buffer *)
+ ck_malloc (sizeof *label_pattern);
+ err = re_compile_pattern (f_volhdr, strlen (f_volhdr),
+ label_pattern);
+ if (err)
+ {
+ fprintf (stderr, "Bad regular expression: %s\n",
+ err);
+ errors++;
+ break;
+ }
+
+ }
+ extr_init ();
+ read_and (extract_archive);
+ break;
+ case CMD_LIST:
+ if (f_volhdr)
+ {
+ const char *err;
+ label_pattern = (struct re_pattern_buffer *)
+ ck_malloc (sizeof *label_pattern);
+ err = re_compile_pattern (f_volhdr, strlen (f_volhdr),
+ label_pattern);
+ if (err)
+ {
+ fprintf (stderr, "Bad regular expression: %s\n",
+ err);
+ errors++;
+ break;
+ }
+ }
+ read_and (list_archive);
+#if 0
+ if (!errors)
+ errors = different;
+#endif
+ break;
+ case CMD_DIFF:
+ diff_init ();
+ read_and (diff_archive);
+ break;
+ case CMD_VERSION:
+ fprintf (stderr, "%s\n", version_string);
+ break;
+ case CMD_NONE:
+ msg ("you must specify exactly one of the r, c, t, x, or d options\n");
+ fprintf (stderr, "For more information, type ``%s --help''.\n", tar);
+ exit (EX_ARGSBAD);
+ }
+ if (f_volno_file)
+ closeout_volume_number ();
+ exit (errors);
+ /* NOTREACHED */
+}
+
+
+/*
+ * Parse the options for tar.
+ */
+void
+options (argc, argv)
+ int argc;
+ char **argv;
+{
+ register int c; /* Option letter */
+ int ind = -1;
+
+ /* Set default option values */
+ blocking = DEFBLOCKING; /* From Makefile */
+ ar_files = (char **) ck_malloc (sizeof (char *) * 10);
+ ar_files_len = 10;
+ n_ar_files = 0;
+ cur_ar_file = 0;
+
+ /* Parse options */
+ while ((c = getoldopt (argc, argv,
+ "-01234567Ab:BcC:df:F:g:GhikK:lL:mMN:oOpPrRsStT:uvV:wWxX:zZ",
+ long_options, &ind)) != EOF)
+ {
+ switch (c)
+ {
+ case 0: /* long options that set a single flag */
+ break;
+ case 1:
+ /* File name or non-parsed option */
+ name_add (optarg);
+ break;
+ case 'C':
+ name_add ("-C");
+ name_add (optarg);
+ break;
+ case 10: /* preserve */
+ f_use_protection = f_sorted_names = 1;
+ break;
+ case 11:
+ if (cmd_mode != CMD_NONE)
+ goto badopt;
+ cmd_mode = CMD_VERSION;
+ break;
+ case 12: /* help */
+ printf ("This is GNU tar, the tape archiving program.\n");
+ describe ();
+ exit (1);
+ case 13:
+ f_new_files++;
+ goto get_newer;
+
+ case 14: /* Delete in the archive */
+ if (cmd_mode != CMD_NONE)
+ goto badopt;
+ cmd_mode = CMD_DELETE;
+ break;
+
+ case 15:
+ f_exclude++;
+ add_exclude (optarg);
+ break;
+
+ case 16: /* -T reads null terminated filenames. */
+ filename_terminator = '\0';
+ break;
+
+ case 17:
+ f_volno_file = optarg;
+ break;
+
+ case 18:
+ if (f_compressprog)
+ {
+ msg ("Only one compression option permitted\n");
+ exit (EX_ARGSBAD);
+ }
+ f_compressprog = optarg;
+ break;
+
+ case 'g': /* We are making a GNU dump; save
+ directories at the beginning of
+ the archive, and include in each
+ directory its contents */
+ if (f_oldarch)
+ goto badopt;
+ f_gnudump++;
+ gnu_dumpfile = optarg;
+ break;
+
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ {
+ /* JF this'll have to be modified for other
+ systems, of course! */
+ int d, add;
+ static char buf[50];
+
+ d = getoldopt (argc, argv, "lmh");
+#ifdef MAYBEDEF
+ sprintf (buf, "/dev/rmt/%d%c", c, d);
+#else
+#ifndef LOW_NUM
+#define LOW_NUM 0
+#define MID_NUM 8
+#define HGH_NUM 16
+#endif
+ if (d == 'l')
+ add = LOW_NUM;
+ else if (d == 'm')
+ add = MID_NUM;
+ else if (d == 'h')
+ add = HGH_NUM;
+ else
+ goto badopt;
+
+ sprintf (buf, "/dev/rmt%d", add + c - '0');
+#endif
+ if (n_ar_files == ar_files_len)
+ ar_files
+ = (char **)
+ ck_malloc (sizeof (char *)
+ * (ar_files_len *= 2));
+ ar_files[n_ar_files++] = buf;
+ }
+ break;
+
+ case 'A': /* Arguments are tar files,
+ just cat them onto the end
+ of the archive. */
+ if (cmd_mode != CMD_NONE)
+ goto badopt;
+ cmd_mode = CMD_CAT;
+ break;
+
+ case 'b': /* Set blocking factor */
+ blocking = intconv (optarg);
+ break;
+
+ case 'B': /* Try to reblock input */
+ f_reblock++; /* For reading 4.2BSD pipes */
+ break;
+
+ case 'c': /* Create an archive */
+ if (cmd_mode != CMD_NONE)
+ goto badopt;
+ cmd_mode = CMD_CREATE;
+ break;
+
+#if 0
+ case 'C':
+ if (chdir (optarg) < 0)
+ msg_perror ("Can't change directory to %d", optarg);
+ break;
+#endif
+
+ case 'd': /* Find difference tape/disk */
+ if (cmd_mode != CMD_NONE)
+ goto badopt;
+ cmd_mode = CMD_DIFF;
+ break;
+
+ case 'f': /* Use ar_file for the archive */
+ if (n_ar_files == ar_files_len)
+ ar_files
+ = (char **) ck_malloc (sizeof (char *)
+ * (ar_files_len *= 2));
+
+ ar_files[n_ar_files++] = optarg;
+ break;
+
+ case 'F':
+ /* Since -F is only useful with -M , make it implied */
+ f_run_script_at_end++;/* run this script at the end */
+ info_script = optarg; /* of each tape */
+ f_multivol++;
+ break;
+
+ case 'G': /* We are making a GNU dump; save
+ directories at the beginning of
+ the archive, and include in each
+ directory its contents */
+ if (f_oldarch)
+ goto badopt;
+ f_gnudump++;
+ gnu_dumpfile = 0;
+ break;
+
+ case 'h':
+ f_follow_links++; /* follow symbolic links */
+ break;
+
+ case 'i':
+ f_ignorez++; /* Ignore zero records (eofs) */
+ /*
+ * This can't be the default, because Unix tar
+ * writes two records of zeros, then pads out the
+ * block with garbage.
+ */
+ break;
+
+ case 'k': /* Don't overwrite files */
+#ifdef NO_OPEN3
+ msg ("can't keep old files on this system");
+ exit (EX_ARGSBAD);
+#else
+ f_keep++;
+#endif
+ break;
+
+ case 'K':
+ f_startfile++;
+ addname (optarg);
+ break;
+
+ case 'l': /* When dumping directories, don't
+ dump files/subdirectories that are
+ on other filesystems. */
+ f_local_filesys++;
+ break;
+
+ case 'L':
+ tape_length = intconv (optarg);
+ f_multivol++;
+ break;
+ case 'm':
+ f_modified++;
+ break;
+
+ case 'M': /* Make Multivolume archive:
+ When we can't write any more
+ into the archive, re-open it,
+ and continue writing */
+ f_multivol++;
+ break;
+
+ case 'N': /* Only write files newer than X */
+ get_newer:
+ f_new_files++;
+ new_time = get_date (optarg, (PTR) 0);
+ if (new_time == (time_t) - 1)
+ {
+ msg ("invalid date format `%s'", optarg);
+ exit (EX_ARGSBAD);
+ }
+ break;
+
+ case 'o': /* Generate old archive */
+ if (f_gnudump /* || f_dironly */ )
+ goto badopt;
+ f_oldarch++;
+ break;
+
+ case 'O':
+ f_exstdout++;
+ break;
+
+ case 'p':
+ f_use_protection++;
+ break;
+
+ case 'P':
+ f_absolute_paths++;
+ break;
+
+ case 'r': /* Append files to the archive */
+ if (cmd_mode != CMD_NONE)
+ goto badopt;
+ cmd_mode = CMD_APPEND;
+ break;
+
+ case 'R':
+ f_sayblock++; /* Print block #s for debug */
+ break; /* of bad tar archives */
+
+ case 's':
+ f_sorted_names++; /* Names to extr are sorted */
+ break;
+
+ case 'S': /* deal with sparse files */
+ f_sparse_files++;
+ break;
+ case 't':
+ if (cmd_mode != CMD_NONE)
+ goto badopt;
+ cmd_mode = CMD_LIST;
+ f_verbose++; /* "t" output == "cv" or "xv" */
+ break;
+
+ case 'T':
+ name_file = optarg;
+ f_namefile++;
+ break;
+
+ case 'u': /* Append files to the archive that
+ aren't there, or are newer than the
+ copy in the archive */
+ if (cmd_mode != CMD_NONE)
+ goto badopt;
+ cmd_mode = CMD_UPDATE;
+ break;
+
+ case 'v':
+ f_verbose++;
+ break;
+
+ case 'V':
+ f_volhdr = optarg;
+ break;
+
+ case 'w':
+ f_confirm++;
+ break;
+
+ case 'W':
+ f_verify++;
+ break;
+
+ case 'x': /* Extract files from the archive */
+ if (cmd_mode != CMD_NONE)
+ goto badopt;
+ cmd_mode = CMD_EXTRACT;
+ break;
+
+ case 'X':
+ f_exclude++;
+ add_exclude_file (optarg);
+ break;
+
+ case 'z':
+ if (f_compressprog)
+ {
+ msg ("Only one compression option permitted\n");
+ exit (EX_ARGSBAD);
+ }
+ f_compressprog = "gzip";
+ break;
+
+ case 'Z':
+ if (f_compressprog)
+ {
+ msg ("Only one compression option permitted\n");
+ exit (EX_ARGSBAD);
+ }
+ f_compressprog = "compress";
+ break;
+
+ case '?':
+ badopt:
+ msg ("Unknown option. Use '%s --help' for a complete list of options.", tar);
+ exit (EX_ARGSBAD);
+
+ }
+ }
+
+ blocksize = blocking * RECORDSIZE;
+ if (n_ar_files == 0)
+ {
+ n_ar_files = 1;
+ ar_files[0] = getenv ("TAPE"); /* From environment, or */
+ if (ar_files[0] == 0)
+ ar_files[0] = DEF_AR_FILE; /* From Makefile */
+ }
+ if (n_ar_files > 1 && !f_multivol)
+ {
+ msg ("Multiple archive files requires --multi-volume\n");
+ exit (EX_ARGSBAD);
+ }
+ if (f_compress_block && !f_compressprog)
+ {
+ msg ("You must use a compression option (--gzip, --compress\n\
+or --use-compress-program) with --block-compress.\n");
+ exit (EX_ARGSBAD);
+ }
+}
+
+
+/*
+ * Print as much help as the user's gonna get.
+ *
+ * We have to sprinkle in the KLUDGE lines because too many compilers
+ * cannot handle character strings longer than about 512 bytes. Yuk!
+ * In particular, MS-DOS and Xenix MSC and PDP-11 V7 Unix have this
+ * problem.
+ */
+void
+describe ()
+{
+ puts ("choose one of the following:");
+ fputs ("\
+-A, --catenate,\n\
+ --concatenate append tar files to an archive\n\
+-c, --create create a new archive\n\
+-d, --diff,\n\
+ --compare find differences between archive and file system\n\
+--delete delete from the archive (not for use on mag tapes!)\n\
+-r, --append append files to the end of an archive\n\
+-t, --list list the contents of an archive\n\
+-u, --update only append files that are newer than copy in archive\n\
+-x, --extract,\n\
+ --get extract files from an archive\n", stdout);
+
+ fprintf (stdout, "\
+Other options:\n\
+--atime-preserve don't change access times on dumped files\n\
+-b, --block-size N block size of Nx512 bytes (default N=%d)\n", DEFBLOCKING);
+ fputs ("\
+-B, --read-full-blocks reblock as we read (for reading 4.2BSD pipes)\n\
+-C, --directory DIR change to directory DIR\n\
+--checkpoint print directory names while reading the archive\n\
+", stdout); /* KLUDGE */
+ fprintf (stdout, "\
+-f, --file [HOSTNAME:]F use archive file or device F (default %s)\n",
+ DEF_AR_FILE);
+ fputs ("\
+--force-local archive file is local even if has a colon\n\
+-F, --info-script F\n\
+ --new-volume-script F run script at end of each tape (implies -M)\n\
+-G, --incremental create/list/extract old GNU-format incremental backup\n\
+-g, --listed-incremental F create/list/extract new GNU-format incremental backup\n\
+-h, --dereference don't dump symlinks; dump the files they point to\n\
+-i, --ignore-zeros ignore blocks of zeros in archive (normally mean EOF)\n\
+--ignore-failed-read don't exit with non-zero status on unreadable files\n\
+-k, --keep-old-files keep existing files; don't overwrite them from archive\n\
+-K, --starting-file F begin at file F in the archive\n\
+-l, --one-file-system stay in local file system when creating an archive\n\
+-L, --tape-length N change tapes after writing N*1024 bytes\n\
+", stdout); /* KLUDGE */
+ fputs ("\
+-m, --modification-time don't extract file modified time\n\
+-M, --multi-volume create/list/extract multi-volume archive\n\
+-N, --after-date DATE,\n\
+ --newer DATE only store files newer than DATE\n\
+-o, --old-archive,\n\
+ --portability write a V7 format archive, rather than ANSI format\n\
+-O, --to-stdout extract files to standard output\n\
+-p, --same-permissions,\n\
+ --preserve-permissions extract all protection information\n\
+-P, --absolute-paths don't strip leading `/'s from file names\n\
+--preserve like -p -s\n\
+", stdout); /* KLUDGE */
+ fputs ("\
+-R, --record-number show record number within archive with each message\n\
+--remove-files remove files after adding them to the archive\n\
+-s, --same-order,\n\
+ --preserve-order list of names to extract is sorted to match archive\n\
+--same-owner create extracted files with the same ownership \n\
+-S, --sparse handle sparse files efficiently\n\
+-T, --files-from F get names to extract or create from file F\n\
+--null -T reads null-terminated names, disable -C\n\
+--totals print total bytes written with --create\n\
+-v, --verbose verbosely list files processed\n\
+-V, --label NAME create archive with volume name NAME\n\
+--version print tar program version number\n\
+-w, --interactive,\n\
+ --confirmation ask for confirmation for every action\n\
+", stdout); /* KLUDGE */
+ fputs ("\
+-W, --verify attempt to verify the archive after writing it\n\
+--exclude FILE exclude file FILE\n\
+-X, --exclude-from FILE exclude files listed in FILE\n\
+-Z, --compress,\n\
+ --uncompress filter the archive through compress\n\
+-z, --gzip,\n\
+ --ungzip filter the archive through gzip\n\
+--use-compress-program PROG\n\
+ filter the archive through PROG (which must accept -d)\n\
+--block-compress block the output of compression program for tapes\n\
+-[0-7][lmh] specify drive and density\n\
+", stdout);
+}
+
+void
+name_add (name)
+ char *name;
+{
+ if (n_indalloc == n_indused)
+ {
+ n_indalloc += 10;
+ n_ind = (char **) (n_indused ? ck_realloc (n_ind, n_indalloc * sizeof (char *)): ck_malloc (n_indalloc * sizeof (char *)));
+ }
+ n_ind[n_indused++] = name;
+}
+
+/*
+ * Set up to gather file names for tar.
+ *
+ * They can either come from stdin or from argv.
+ */
+void
+name_init (argc, argv)
+ int argc;
+ char **argv;
+{
+
+ if (f_namefile)
+ {
+ if (optind < argc)
+ {
+ msg ("too many args with -T option");
+ exit (EX_ARGSBAD);
+ }
+ if (!strcmp (name_file, "-"))
+ {
+ namef = stdin;
+ }
+ else
+ {
+ namef = fopen (name_file, "r");
+ if (namef == NULL)
+ {
+ msg_perror ("can't open file %s", name_file);
+ exit (EX_BADFILE);
+ }
+ }
+ }
+ else
+ {
+ /* Get file names from argv, after options. */
+ n_argc = argc;
+ n_argv = argv;
+ }
+}
+
+/* Read the next filename read from STREAM and null-terminate it.
+ Put it into BUFFER, reallocating and adjusting *PBUFFER_SIZE if necessary.
+ Return the new value for BUFFER, or NULL at end of file. */
+
+char *
+read_name_from_file (buffer, pbuffer_size, stream)
+ char *buffer;
+ size_t *pbuffer_size;
+ FILE *stream;
+{
+ register int c;
+ register int indx = 0;
+ register size_t buffer_size = *pbuffer_size;
+
+ while ((c = getc (stream)) != EOF && c != filename_terminator)
+ {
+ if (indx == buffer_size)
+ {
+ buffer_size += NAMSIZ;
+ buffer = ck_realloc (buffer, buffer_size + 2);
+ }
+ buffer[indx++] = c;
+ }
+ if (indx == 0 && c == EOF)
+ return NULL;
+ if (indx == buffer_size)
+ {
+ buffer_size += NAMSIZ;
+ buffer = ck_realloc (buffer, buffer_size + 2);
+ }
+ buffer[indx] = '\0';
+ *pbuffer_size = buffer_size;
+ return buffer;
+}
+
+/*
+ * Get the next name from argv or the name file.
+ *
+ * Result is in static storage and can't be relied upon across two calls.
+ *
+ * If CHANGE_DIRS is non-zero, treat a filename of the form "-C" as
+ * meaning that the next filename is the name of a directory to change to.
+ * If `filename_terminator' is '\0', CHANGE_DIRS is effectively always 0.
+ */
+
+char *
+name_next (change_dirs)
+ int change_dirs;
+{
+ static char *buffer; /* Holding pattern */
+ static int buffer_siz;
+ register char *p;
+ register char *q = 0;
+ register int next_name_is_dir = 0;
+ extern char *un_quote_string ();
+
+ if (buffer_siz == 0)
+ {
+ buffer = ck_malloc (NAMSIZ + 2);
+ buffer_siz = NAMSIZ;
+ }
+ if (filename_terminator == '\0')
+ change_dirs = 0;
+tryagain:
+ if (namef == NULL)
+ {
+ if (n_indscan < n_indused)
+ p = n_ind[n_indscan++];
+ else if (optind < n_argc)
+ /* Names come from argv, after options */
+ p = n_argv[optind++];
+ else
+ {
+ if (q)
+ msg ("Missing filename after -C");
+ return NULL;
+ }
+
+ /* JF trivial support for -C option. I don't know if
+ chdir'ing at this point is dangerous or not.
+ It seems to work, which is all I ask. */
+ if (change_dirs && !q && p[0] == '-' && p[1] == 'C' && p[2] == '\0')
+ {
+ q = p;
+ goto tryagain;
+ }
+ if (q)
+ {
+ if (chdir (p) < 0)
+ msg_perror ("Can't chdir to %s", p);
+ q = 0;
+ goto tryagain;
+ }
+ /* End of JF quick -C hack */
+
+#if 0
+ if (f_exclude && check_exclude (p))
+ goto tryagain;
+#endif
+ return un_quote_string (p);
+ }
+ while (p = read_name_from_file (buffer, &buffer_siz, namef))
+ {
+ buffer = p;
+ if (*p == '\0')
+ continue; /* Ignore empty lines. */
+ q = p + strlen (p) - 1;
+ while (q > p && *q == '/')/* Zap trailing "/"s. */
+ *q-- = '\0';
+ if (change_dirs && next_name_is_dir == 0
+ && p[0] == '-' && p[1] == 'C' && p[2] == '\0')
+ {
+ next_name_is_dir = 1;
+ goto tryagain;
+ }
+ if (next_name_is_dir)
+ {
+ if (chdir (p) < 0)
+ msg_perror ("Can't change to directory %s", p);
+ next_name_is_dir = 0;
+ goto tryagain;
+ }
+#if 0
+ if (f_exclude && check_exclude (p))
+ goto tryagain;
+#endif
+ return un_quote_string (p);
+ }
+ return NULL;
+}
+
+
+/*
+ * Close the name file, if any.
+ */
+void
+name_close ()
+{
+
+ if (namef != NULL && namef != stdin)
+ fclose (namef);
+}
+
+
+/*
+ * Gather names in a list for scanning.
+ * Could hash them later if we really care.
+ *
+ * If the names are already sorted to match the archive, we just
+ * read them one by one. name_gather reads the first one, and it
+ * is called by name_match as appropriate to read the next ones.
+ * At EOF, the last name read is just left in the buffer.
+ * This option lets users of small machines extract an arbitrary
+ * number of files by doing "tar t" and editing down the list of files.
+ */
+void
+name_gather ()
+{
+ register char *p;
+ static struct name *namebuf; /* One-name buffer */
+ static namelen;
+ static char *chdir_name;
+
+ if (f_sorted_names)
+ {
+ if (!namelen)
+ {
+ namelen = NAMSIZ;
+ namebuf = (struct name *) ck_malloc (sizeof (struct name) + NAMSIZ);
+ }
+ p = name_next (0);
+ if (p)
+ {
+ if (*p == '-' && p[1] == 'C' && p[2] == '\0')
+ {
+ chdir_name = name_next (0);
+ p = name_next (0);
+ if (!p)
+ {
+ msg ("Missing file name after -C");
+ exit (EX_ARGSBAD);
+ }
+ namebuf->change_dir = chdir_name;
+ }
+ namebuf->length = strlen (p);
+ if (namebuf->length >= namelen)
+ {
+ namebuf = (struct name *) ck_realloc (namebuf, sizeof (struct name) + namebuf->length);
+ namelen = namebuf->length;
+ }
+ strncpy (namebuf->name, p, namebuf->length);
+ namebuf->name[namebuf->length] = 0;
+ namebuf->next = (struct name *) NULL;
+ namebuf->found = 0;
+ namelist = namebuf;
+ namelast = namelist;
+ }
+ return;
+ }
+
+ /* Non sorted names -- read them all in */
+ while (p = name_next (0))
+ addname (p);
+}
+
+/*
+ * Add a name to the namelist.
+ */
+void
+addname (name)
+ char *name; /* pointer to name */
+{
+ register int i; /* Length of string */
+ register struct name *p; /* Current struct pointer */
+ static char *chdir_name;
+ char *new_name ();
+
+ if (name[0] == '-' && name[1] == 'C' && name[2] == '\0')
+ {
+ chdir_name = name_next (0);
+ name = name_next (0);
+ if (!chdir_name)
+ {
+ msg ("Missing file name after -C");
+ exit (EX_ARGSBAD);
+ }
+ if (chdir_name[0] != '/')
+ {
+ char *path = ck_malloc (PATH_MAX);
+#if defined(__MSDOS__) || defined(HAVE_GETCWD) || defined(_POSIX_VERSION)
+ if (!getcwd (path, PATH_MAX))
+ {
+ msg ("Couldn't get current directory.");
+ exit (EX_SYSTEM);
+ }
+#else
+ char *getwd ();
+
+ if (!getwd (path))
+ {
+ msg ("Couldn't get current directory: %s", path);
+ exit (EX_SYSTEM);
+ }
+#endif
+ chdir_name = new_name (path, chdir_name);
+ free (path);
+ }
+ }
+
+ if (name)
+ {
+ i = strlen (name);
+ /*NOSTRICT*/
+ p = (struct name *) malloc ((unsigned) (sizeof (struct name) + i));
+ }
+ else
+ p = (struct name *) malloc ((unsigned) (sizeof (struct name)));
+ if (!p)
+ {
+ if (name)
+ msg ("cannot allocate mem for name '%s'.", name);
+ else
+ msg ("cannot allocate mem for chdir record.");
+ exit (EX_SYSTEM);
+ }
+ p->next = (struct name *) NULL;
+ if (name)
+ {
+ p->fake = 0;
+ p->length = i;
+ strncpy (p->name, name, i);
+ p->name[i] = '\0'; /* Null term */
+ }
+ else
+ p->fake = 1;
+ p->found = 0;
+ p->regexp = 0; /* Assume not a regular expression */
+ p->firstch = 1; /* Assume first char is literal */
+ p->change_dir = chdir_name;
+ p->dir_contents = 0; /* JF */
+ if (name)
+ {
+ if (index (name, '*') || index (name, '[') || index (name, '?'))
+ {
+ p->regexp = 1; /* No, it's a regexp */
+ if (name[0] == '*' || name[0] == '[' || name[0] == '?')
+ p->firstch = 0; /* Not even 1st char literal */
+ }
+ }
+
+ if (namelast)
+ namelast->next = p;
+ namelast = p;
+ if (!namelist)
+ namelist = p;
+}
+
+/*
+ * Return nonzero if name P (from an archive) matches any name from
+ * the namelist, zero if not.
+ */
+int
+name_match (p)
+ register char *p;
+{
+ register struct name *nlp;
+ register int len;
+
+again:
+ if (0 == (nlp = namelist)) /* Empty namelist is easy */
+ return 1;
+ if (nlp->fake)
+ {
+ if (nlp->change_dir && chdir (nlp->change_dir))
+ msg_perror ("Can't change to directory %d", nlp->change_dir);
+ namelist = 0;
+ return 1;
+ }
+ len = strlen (p);
+ for (; nlp != 0; nlp = nlp->next)
+ {
+ /* If first chars don't match, quick skip */
+ if (nlp->firstch && nlp->name[0] != p[0])
+ continue;
+
+ /* Regular expressions (shell globbing, actually). */
+ if (nlp->regexp)
+ {
+ if (fnmatch (nlp->name, p, FNM_LEADING_DIR) == 0)
+ {
+ nlp->found = 1; /* Remember it matched */
+ if (f_startfile)
+ {
+ free ((void *) namelist);
+ namelist = 0;
+ }
+ if (nlp->change_dir && chdir (nlp->change_dir))
+ msg_perror ("Can't change to directory %s", nlp->change_dir);
+ return 1; /* We got a match */
+ }
+ continue;
+ }
+
+ /* Plain Old Strings */
+ if (nlp->length <= len /* Archive len >= specified */
+ && (p[nlp->length] == '\0' || p[nlp->length] == '/')
+ /* Full match on file/dirname */
+ && strncmp (p, nlp->name, nlp->length) == 0) /* Name compare */
+ {
+ nlp->found = 1; /* Remember it matched */
+ if (f_startfile)
+ {
+ free ((void *) namelist);
+ namelist = 0;
+ }
+ if (nlp->change_dir && chdir (nlp->change_dir))
+ msg_perror ("Can't change to directory %s", nlp->change_dir);
+ return 1; /* We got a match */
+ }
+ }
+
+ /*
+ * Filename from archive not found in namelist.
+ * If we have the whole namelist here, just return 0.
+ * Otherwise, read the next name in and compare it.
+ * If this was the last name, namelist->found will remain on.
+ * If not, we loop to compare the newly read name.
+ */
+ if (f_sorted_names && namelist->found)
+ {
+ name_gather (); /* Read one more */
+ if (!namelist->found)
+ goto again;
+ }
+ return 0;
+}
+
+
+/*
+ * Print the names of things in the namelist that were not matched.
+ */
+void
+names_notfound ()
+{
+ register struct name *nlp, *next;
+ register char *p;
+
+ for (nlp = namelist; nlp != 0; nlp = next)
+ {
+ next = nlp->next;
+ if (!nlp->found)
+ msg ("%s not found in archive", nlp->name);
+
+ /*
+ * We could free() the list, but the process is about
+ * to die anyway, so save some CPU time. Amigas and
+ * other similarly broken software will need to waste
+ * the time, though.
+ */
+#ifdef amiga
+ if (!f_sorted_names)
+ free (nlp);
+#endif
+ }
+ namelist = (struct name *) NULL;
+ namelast = (struct name *) NULL;
+
+ if (f_sorted_names)
+ {
+ while (0 != (p = name_next (1)))
+ msg ("%s not found in archive", p);
+ }
+}
+
+/* These next routines were created by JF */
+
+void
+name_expand ()
+{
+ ;
+}
+
+/* This is like name_match(), except that it returns a pointer to the name
+ it matched, and doesn't set ->found The caller will have to do that
+ if it wants to. Oh, and if the namelist is empty, it returns 0, unlike
+ name_match(), which returns TRUE */
+
+struct name *
+name_scan (p)
+ register char *p;
+{
+ register struct name *nlp;
+ register int len;
+
+again:
+ if (0 == (nlp = namelist)) /* Empty namelist is easy */
+ return 0;
+ len = strlen (p);
+ for (; nlp != 0; nlp = nlp->next)
+ {
+ /* If first chars don't match, quick skip */
+ if (nlp->firstch && nlp->name[0] != p[0])
+ continue;
+
+ /* Regular expressions */
+ if (nlp->regexp)
+ {
+ if (fnmatch (nlp->name, p, FNM_LEADING_DIR) == 0)
+ return nlp; /* We got a match */
+ continue;
+ }
+
+ /* Plain Old Strings */
+ if (nlp->length <= len /* Archive len >= specified */
+ && (p[nlp->length] == '\0' || p[nlp->length] == '/')
+ /* Full match on file/dirname */
+ && strncmp (p, nlp->name, nlp->length) == 0) /* Name compare */
+ return nlp; /* We got a match */
+ }
+
+ /*
+ * Filename from archive not found in namelist.
+ * If we have the whole namelist here, just return 0.
+ * Otherwise, read the next name in and compare it.
+ * If this was the last name, namelist->found will remain on.
+ * If not, we loop to compare the newly read name.
+ */
+ if (f_sorted_names && namelist->found)
+ {
+ name_gather (); /* Read one more */
+ if (!namelist->found)
+ goto again;
+ }
+ return (struct name *) 0;
+}
+
+/* This returns a name from the namelist which doesn't have ->found set.
+ It sets ->found before returning, so successive calls will find and return
+ all the non-found names in the namelist */
+
+struct name *gnu_list_name;
+
+char *
+name_from_list ()
+{
+ if (!gnu_list_name)
+ gnu_list_name = namelist;
+ while (gnu_list_name && gnu_list_name->found)
+ gnu_list_name = gnu_list_name->next;
+ if (gnu_list_name)
+ {
+ gnu_list_name->found++;
+ if (gnu_list_name->change_dir)
+ if (chdir (gnu_list_name->change_dir) < 0)
+ msg_perror ("can't chdir to %s", gnu_list_name->change_dir);
+ return gnu_list_name->name;
+ }
+ return (char *) 0;
+}
+
+void
+blank_name_list ()
+{
+ struct name *n;
+
+ gnu_list_name = 0;
+ for (n = namelist; n; n = n->next)
+ n->found = 0;
+}
+
+char *
+new_name (path, name)
+ char *path, *name;
+{
+ char *path_buf;
+
+ path_buf = (char *) malloc (strlen (path) + strlen (name) + 2);
+ if (path_buf == 0)
+ {
+ msg ("Can't allocate memory for name '%s/%s", path, name);
+ exit (EX_SYSTEM);
+ }
+ (void) sprintf (path_buf, "%s/%s", path, name);
+ return path_buf;
+}
+
+/* returns non-zero if the luser typed 'y' or 'Y', zero otherwise. */
+
+int
+confirm (action, file)
+ char *action, *file;
+{
+ int c, nl;
+ static FILE *confirm_file = 0;
+ extern FILE *msg_file;
+ extern char TTY_NAME[];
+
+ fprintf (msg_file, "%s %s?", action, file);
+ fflush (msg_file);
+ if (!confirm_file)
+ {
+ confirm_file = (archive == 0) ? fopen (TTY_NAME, "r") : stdin;
+ if (!confirm_file)
+ {
+ msg ("Can't read confirmation from user");
+ exit (EX_SYSTEM);
+ }
+ }
+ c = getc (confirm_file);
+ for (nl = c; nl != '\n' && nl != EOF; nl = getc (confirm_file))
+ ;
+ return (c == 'y' || c == 'Y');
+}
+
+char *x_buffer = 0;
+int size_x_buffer;
+int free_x_buffer;
+
+char **exclude = 0;
+int size_exclude = 0;
+int free_exclude = 0;
+
+char **re_exclude = 0;
+int size_re_exclude = 0;
+int free_re_exclude = 0;
+
+void
+add_exclude (name)
+ char *name;
+{
+ /* char *rname;*/
+ /* char **tmp_ptr;*/
+ int size_buf;
+
+ un_quote_string (name);
+ size_buf = strlen (name);
+
+ if (x_buffer == 0)
+ {
+ x_buffer = (char *) ck_malloc (size_buf + 1024);
+ free_x_buffer = 1024;
+ }
+ else if (free_x_buffer <= size_buf)
+ {
+ char *old_x_buffer;
+ char **tmp_ptr;
+
+ old_x_buffer = x_buffer;
+ x_buffer = (char *) ck_realloc (x_buffer, size_x_buffer + 1024);
+ free_x_buffer = 1024;
+ for (tmp_ptr = exclude; tmp_ptr < exclude + size_exclude; tmp_ptr++)
+ *tmp_ptr = x_buffer + ((*tmp_ptr) - old_x_buffer);
+ for (tmp_ptr = re_exclude; tmp_ptr < re_exclude + size_re_exclude; tmp_ptr++)
+ *tmp_ptr = x_buffer + ((*tmp_ptr) - old_x_buffer);
+ }
+
+ if (is_regex (name))
+ {
+ if (free_re_exclude == 0)
+ {
+ re_exclude = (char **) (re_exclude ? ck_realloc (re_exclude, (size_re_exclude + 32) * sizeof (char *)): ck_malloc (sizeof (char *) * 32));
+ free_re_exclude += 32;
+ }
+ re_exclude[size_re_exclude] = x_buffer + size_x_buffer;
+ size_re_exclude++;
+ free_re_exclude--;
+ }
+ else
+ {
+ if (free_exclude == 0)
+ {
+ exclude = (char **) (exclude ? ck_realloc (exclude, (size_exclude + 32) * sizeof (char *)): ck_malloc (sizeof (char *) * 32));
+ free_exclude += 32;
+ }
+ exclude[size_exclude] = x_buffer + size_x_buffer;
+ size_exclude++;
+ free_exclude--;
+ }
+ strcpy (x_buffer + size_x_buffer, name);
+ size_x_buffer += size_buf + 1;
+ free_x_buffer -= size_buf + 1;
+}
+
+void
+add_exclude_file (file)
+ char *file;
+{
+ FILE *fp;
+ char buf[1024];
+
+ if (strcmp (file, "-"))
+ fp = fopen (file, "r");
+ else
+ /* Let's hope the person knows what they're doing. */
+ /* Using -X - -T - -f - will get you *REALLY* strange
+ results. . . */
+ fp = stdin;
+
+ if (!fp)
+ {
+ msg_perror ("can't open %s", file);
+ exit (2);
+ }
+ while (fgets (buf, 1024, fp))
+ {
+ /* int size_buf;*/
+ char *end_str;
+
+ end_str = rindex (buf, '\n');
+ if (end_str)
+ *end_str = '\0';
+ add_exclude (buf);
+
+ }
+ fclose (fp);
+}
+
+int
+is_regex (str)
+ char *str;
+{
+ return index (str, '*') || index (str, '[') || index (str, '?');
+}
+
+/* Returns non-zero if the file 'name' should not be added/extracted */
+int
+check_exclude (name)
+ char *name;
+{
+ int n;
+ char *str;
+ extern char *strstr ();
+
+ for (n = 0; n < size_re_exclude; n++)
+ {
+ if (fnmatch (re_exclude[n], name, FNM_LEADING_DIR) == 0)
+ return 1;
+ }
+ for (n = 0; n < size_exclude; n++)
+ {
+ /* Accept the output from strstr only if it is the last
+ part of the string. There is certainly a faster way to
+ do this. . . */
+ if ((str = strstr (name, exclude[n]))
+ && (str == name || str[-1] == '/')
+ && str[strlen (exclude[n])] == '\0')
+ return 1;
+ }
+ return 0;
+}
diff --git a/gnu/usr.bin/tar/tar.h b/gnu/usr.bin/tar/tar.h
new file mode 100644
index 000000000000..c3fec78743bb
--- /dev/null
+++ b/gnu/usr.bin/tar/tar.h
@@ -0,0 +1,291 @@
+/* Declarations for tar archives.
+ Copyright (C) 1988, 1992, 1993 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* major() and minor() macros (among other things) defined here for hpux */
+#ifdef hpux
+#include <sys/mknod.h>
+#endif
+
+/*
+ * Kludge for handling systems that can't cope with multiple
+ * external definitions of a variable. In ONE routine (tar.c),
+ * we #define TAR_EXTERN to null; here, we set it to "extern" if
+ * it is not already set.
+ */
+#ifndef TAR_EXTERN
+#define TAR_EXTERN extern
+#endif
+
+/*
+ * Header block on tape.
+ *
+ * I'm going to use traditional DP naming conventions here.
+ * A "block" is a big chunk of stuff that we do I/O on.
+ * A "record" is a piece of info that we care about.
+ * Typically many "record"s fit into a "block".
+ */
+#define RECORDSIZE 512
+#define NAMSIZ 100
+#define TUNMLEN 32
+#define TGNMLEN 32
+#define SPARSE_EXT_HDR 21
+#define SPARSE_IN_HDR 4
+
+struct sparse
+ {
+ char offset[12];
+ char numbytes[12];
+ };
+
+struct sp_array
+ {
+ int offset;
+ int numbytes;
+ };
+
+union record
+ {
+ char charptr[RECORDSIZE];
+ struct header
+ {
+ char arch_name[NAMSIZ];
+ char mode[8];
+ char uid[8];
+ char gid[8];
+ char size[12];
+ char mtime[12];
+ char chksum[8];
+ char linkflag;
+ char arch_linkname[NAMSIZ];
+ char magic[8];
+ char uname[TUNMLEN];
+ char gname[TGNMLEN];
+ char devmajor[8];
+ char devminor[8];
+ /* these following fields were added by JF for gnu */
+ /* and are NOT standard */
+ char atime[12];
+ char ctime[12];
+ char offset[12];
+ char longnames[4];
+#ifdef NEEDPAD
+ char pad;
+#endif
+ struct sparse sp[SPARSE_IN_HDR];
+ char isextended;
+ char realsize[12]; /* true size of the sparse file */
+ /* char ending_blanks[12];*//* number of nulls at the
+ end of the file, if any */
+ }
+ header;
+ struct extended_header
+ {
+ struct sparse sp[21];
+ char isextended;
+ }
+ ext_hdr;
+ };
+
+/* The checksum field is filled with this while the checksum is computed. */
+#define CHKBLANKS " " /* 8 blanks, no null */
+
+/* The magic field is filled with this if uname and gname are valid. */
+#define TMAGIC "ustar " /* 7 chars and a null */
+
+/* The linkflag defines the type of file */
+#define LF_OLDNORMAL '\0' /* Normal disk file, Unix compat */
+#define LF_NORMAL '0' /* Normal disk file */
+#define LF_LINK '1' /* Link to previously dumped file */
+#define LF_SYMLINK '2' /* Symbolic link */
+#define LF_CHR '3' /* Character special file */
+#define LF_BLK '4' /* Block special file */
+#define LF_DIR '5' /* Directory */
+#define LF_FIFO '6' /* FIFO special file */
+#define LF_CONTIG '7' /* Contiguous file */
+/* Further link types may be defined later. */
+
+/* Note that the standards committee allows only capital A through
+ capital Z for user-defined expansion. This means that defining something
+ as, say '8' is a *bad* idea. */
+#define LF_DUMPDIR 'D' /* This is a dir entry that contains
+ the names of files that were in
+ the dir at the time the dump
+ was made */
+#define LF_LONGLINK 'K' /* Identifies the NEXT file on the tape
+ as having a long linkname */
+#define LF_LONGNAME 'L' /* Identifies the NEXT file on the tape
+ as having a long name. */
+#define LF_MULTIVOL 'M' /* This is the continuation
+ of a file that began on another
+ volume */
+#define LF_NAMES 'N' /* For storing filenames that didn't
+ fit in 100 characters */
+#define LF_SPARSE 'S' /* This is for sparse files */
+#define LF_VOLHDR 'V' /* This file is a tape/volume header */
+/* Ignore it on extraction */
+
+/*
+ * Exit codes from the "tar" program
+ */
+#define EX_SUCCESS 0 /* success! */
+#define EX_ARGSBAD 1 /* invalid args */
+#define EX_BADFILE 2 /* invalid filename */
+#define EX_BADARCH 3 /* bad archive */
+#define EX_SYSTEM 4 /* system gave unexpected error */
+#define EX_BADVOL 5 /* Special error code means
+ Tape volume doesn't match the one
+ specified on the command line */
+
+/*
+ * Global variables
+ */
+TAR_EXTERN union record *ar_block; /* Start of block of archive */
+TAR_EXTERN union record *ar_record; /* Current record of archive */
+TAR_EXTERN union record *ar_last; /* Last+1 record of archive block */
+TAR_EXTERN char ar_reading; /* 0 writing, !0 reading archive */
+TAR_EXTERN int blocking; /* Size of each block, in records */
+TAR_EXTERN int blocksize; /* Size of each block, in bytes */
+TAR_EXTERN char *info_script; /* Script to run at end of each tape change */
+TAR_EXTERN char *name_file; /* File containing names to work on */
+TAR_EXTERN char filename_terminator; /* \n or \0. */
+TAR_EXTERN char *tar; /* Name of this program */
+TAR_EXTERN struct sp_array *sparsearray; /* Pointer to the start of the scratch space */
+TAR_EXTERN int sp_array_size; /* Initial size of the sparsearray */
+TAR_EXTERN int tot_written; /* Total written to output */
+TAR_EXTERN struct re_pattern_buffer
+ *label_pattern; /* compiled regex for extract label */
+TAR_EXTERN char **ar_files; /* list of tape drive names */
+TAR_EXTERN int n_ar_files; /* number of tape drive names */
+TAR_EXTERN int cur_ar_file; /* tape drive currently being used */
+TAR_EXTERN int ar_files_len; /* malloced size of ar_files */
+TAR_EXTERN char *current_file_name, *current_link_name;
+
+/*
+ * Flags from the command line
+ */
+TAR_EXTERN int cmd_mode;
+#define CMD_NONE 0
+#define CMD_CAT 1 /* -A */
+#define CMD_CREATE 2 /* -c */
+#define CMD_DIFF 3 /* -d */
+#define CMD_APPEND 4 /* -r */
+#define CMD_LIST 5 /* -t */
+#define CMD_UPDATE 6 /* -u */
+#define CMD_EXTRACT 7 /* -x */
+#define CMD_DELETE 8 /* -D */
+#define CMD_VERSION 9 /* --version */
+
+
+TAR_EXTERN int f_reblock; /* -B */
+#if 0
+TAR_EXTERN char f_dironly; /* -D */
+#endif
+TAR_EXTERN int f_run_script_at_end; /* -F */
+TAR_EXTERN int f_gnudump; /* -G */
+TAR_EXTERN int f_follow_links; /* -h */
+TAR_EXTERN int f_ignorez; /* -i */
+TAR_EXTERN int f_keep; /* -k */
+TAR_EXTERN int f_startfile; /* -K */
+TAR_EXTERN int f_local_filesys; /* -l */
+TAR_EXTERN int tape_length; /* -L */
+TAR_EXTERN int f_modified; /* -m */
+TAR_EXTERN int f_multivol; /* -M */
+TAR_EXTERN int f_new_files; /* -N */
+TAR_EXTERN int f_oldarch; /* -o */
+TAR_EXTERN int f_exstdout; /* -O */
+TAR_EXTERN int f_use_protection;/* -p */
+TAR_EXTERN int f_absolute_paths;/* -P */
+TAR_EXTERN int f_sayblock; /* -R */
+TAR_EXTERN int f_sorted_names; /* -s */
+TAR_EXTERN int f_sparse_files; /* -S ... JK */
+TAR_EXTERN int f_namefile; /* -T */
+TAR_EXTERN int f_verbose; /* -v */
+TAR_EXTERN char *f_volhdr; /* -V */
+TAR_EXTERN int f_confirm; /* -w */
+TAR_EXTERN int f_verify; /* -W */
+TAR_EXTERN int f_exclude; /* -X */
+TAR_EXTERN char *f_compressprog; /* -z and -Z */
+TAR_EXTERN int f_do_chown; /* --do-chown */
+TAR_EXTERN int f_totals; /* --totals */
+TAR_EXTERN int f_remove_files; /* --remove-files */
+TAR_EXTERN int f_ignore_failed_read; /* --ignore-failed-read */
+TAR_EXTERN int f_checkpoint; /* --checkpoint */
+TAR_EXTERN int f_show_omitted_dirs; /* --show-omitted-dirs */
+TAR_EXTERN char *f_volno_file; /* --volno-file */
+TAR_EXTERN int f_force_local; /* --force-local */
+TAR_EXTERN int f_atime_preserve;/* --atime-preserve */
+TAR_EXTERN int f_compress_block; /* --compress-block */
+
+/*
+ * We default to Unix Standard format rather than 4.2BSD tar format.
+ * The code can actually produce all three:
+ * f_standard ANSI standard
+ * f_oldarch V7
+ * neither 4.2BSD
+ * but we don't bother, since 4.2BSD can read ANSI standard format anyway.
+ * The only advantage to the "neither" option is that we can cmp our
+ * output to the output of 4.2BSD tar, for debugging.
+ */
+#define f_standard (!f_oldarch)
+
+/*
+ * Structure for keeping track of filenames and lists thereof.
+ */
+struct name
+ {
+ struct name *next;
+ short length; /* cached strlen(name) */
+ char found; /* A matching file has been found */
+ char firstch; /* First char is literally matched */
+ char regexp; /* This name is a regexp, not literal */
+ char *change_dir; /* JF set with the -C option */
+ char *dir_contents; /* JF for f_gnudump */
+ char fake; /* dummy entry */
+ char name[1];
+ };
+
+TAR_EXTERN struct name *namelist; /* Points to first name in list */
+TAR_EXTERN struct name *namelast; /* Points to last name in list */
+
+TAR_EXTERN int archive; /* File descriptor for archive file */
+TAR_EXTERN int errors; /* # of files in error */
+
+TAR_EXTERN char *gnu_dumpfile;
+
+/*
+ * Error recovery stuff
+ */
+TAR_EXTERN char read_error_flag;
+
+
+/*
+ * Declarations of functions available to the world.
+ */
+union record *findrec ();
+void userec ();
+union record *endofrecs ();
+void anno ();
+
+#if defined (HAVE_VPRINTF) && __STDC__
+void msg (char *,...);
+void msg_perror (char *,...);
+#else
+void msg ();
+void msg_perror ();
+#endif
diff --git a/gnu/usr.bin/tar/update.c b/gnu/usr.bin/tar/update.c
new file mode 100644
index 000000000000..a64317c666dc
--- /dev/null
+++ b/gnu/usr.bin/tar/update.c
@@ -0,0 +1,585 @@
+/* Update a tar archive.
+ Copyright (C) 1988, 1992 Free Software Foundation
+
+This file is part of GNU Tar.
+
+GNU Tar is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Tar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Tar; see the file COPYING. If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* JF implement the 'r' 'u' and 'A' options for tar. */
+/* The 'A' option is my own invention: It means that the file-names are
+ tar files, and they should simply be appended to the end of the archive.
+ No attempt is made to block the reads from the args; if they're on raw
+ tape or something like that, it'll probably lose. . . */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <errno.h>
+#ifndef STDC_HEADERS
+extern int errno;
+#endif
+
+#ifdef HAVE_SYS_MTIO_H
+#include <sys/ioctl.h>
+#include <sys/mtio.h>
+#endif
+
+#ifdef BSD42
+#include <sys/file.h>
+#else
+#ifndef V7
+#include <fcntl.h>
+#endif
+#endif
+
+#ifndef __MSDOS__
+#include <pwd.h>
+#include <grp.h>
+#endif
+
+#define STDIN 0
+#define STDOUT 1
+
+#include "tar.h"
+#include "port.h"
+#include "rmt.h"
+
+int time_to_start_writing = 0; /* We've hit the end of the old stuff,
+ and its time to start writing new stuff
+ to the tape. This involves seeking
+ back one block and re-writing the current
+ block (which has been changed). */
+
+char *output_start; /* Pointer to where we started to write in
+ the first block we write out. This is used
+ if we can't backspace the output and have
+ to null out the first part of the block */
+
+extern void skip_file ();
+extern void skip_extended_headers ();
+
+extern union record *head;
+extern struct stat hstat;
+
+void append_file ();
+void close_archive ();
+int confirm ();
+void decode_header ();
+void fl_read ();
+void fl_write ();
+void flush_archive ();
+int move_arch ();
+struct name *name_scan ();
+char *name_from_list ();
+void name_expand ();
+void name_gather ();
+void names_notfound ();
+void open_archive ();
+int read_header ();
+void reset_eof ();
+void write_block ();
+void write_eot ();
+
+/* Implement the 'r' (add files to end of archive), and 'u' (add files to
+ end of archive if they arent there, or are more up to date than the
+ version in the archive.) commands.*/
+void
+update_archive ()
+{
+ int found_end = 0;
+ int status = 3;
+ int prev_status;
+ char *p;
+ struct name *name;
+ extern void dump_file ();
+
+ name_gather ();
+ if (cmd_mode == CMD_UPDATE)
+ name_expand ();
+ open_archive (2); /* Open for updating */
+
+ do
+ {
+ prev_status = status;
+ status = read_header ();
+ switch (status)
+ {
+ case EOF:
+ found_end = 1;
+ break;
+
+ case 0: /* A bad record */
+ userec (head);
+ switch (prev_status)
+ {
+ case 3:
+ msg ("This doesn't look like a tar archive.");
+ /* FALL THROUGH */
+ case 2:
+ case 1:
+ msg ("Skipping to next header");
+ case 0:
+ break;
+ }
+ break;
+
+ /* A good record */
+ case 1:
+ /* printf("File %s\n",head->header.name); */
+ /* head->header.name[NAMSIZ-1]='\0'; */
+ if (cmd_mode == CMD_UPDATE && (name = name_scan (current_file_name)))
+ {
+
+ /* struct stat hstat; */
+ struct stat nstat;
+ int head_standard;
+
+ decode_header (head, &hstat, &head_standard, 0);
+ if (stat (current_file_name, &nstat) < 0)
+ {
+ msg_perror ("can't stat %s:", current_file_name);
+ }
+ else
+ {
+ if (hstat.st_mtime >= nstat.st_mtime)
+ name->found++;
+ }
+ }
+ userec (head);
+ if (head->header.isextended)
+ skip_extended_headers ();
+ skip_file ((long) hstat.st_size);
+ break;
+
+ case 2:
+ ar_record = head;
+ found_end = 1;
+ break;
+ }
+ }
+ while (!found_end);
+
+ reset_eof ();
+ time_to_start_writing = 1;
+ output_start = ar_record->charptr;
+
+ while (p = name_from_list ())
+ {
+ if (f_confirm && !confirm ("add", p))
+ continue;
+ if (cmd_mode == CMD_CAT)
+ append_file (p);
+ else
+ dump_file (p, -1, 1);
+ }
+
+ write_eot ();
+ close_archive ();
+ names_notfound ();
+}
+
+/* Catenate file p to the archive without creating a header for it. It had
+ better be a tar file or the archive is screwed */
+
+void
+append_file (p)
+ char *p;
+{
+ int fd;
+ struct stat statbuf;
+ long bytes_left;
+ union record *start;
+ long bufsiz, count;
+
+ if (0 != stat (p, &statbuf) || (fd = open (p, O_RDONLY | O_BINARY)) < 0)
+ {
+ msg_perror ("can't open file %s", p);
+ errors++;
+ return;
+ }
+
+ bytes_left = statbuf.st_size;
+
+ while (bytes_left > 0)
+ {
+ start = findrec ();
+ bufsiz = endofrecs ()->charptr - start->charptr;
+ if (bytes_left < bufsiz)
+ {
+ bufsiz = bytes_left;
+ count = bufsiz % RECORDSIZE;
+ if (count)
+ bzero (start->charptr + bytes_left, (int) (RECORDSIZE - count));
+ }
+ count = read (fd, start->charptr, bufsiz);
+ if (count < 0)
+ {
+ msg_perror ("read error at byte %ld reading %d bytes in file %s", statbuf.st_size - bytes_left, bufsiz, p);
+ exit (EX_ARGSBAD); /* FOO */
+ }
+ bytes_left -= count;
+ userec (start + (count - 1) / RECORDSIZE);
+ if (count != bufsiz)
+ {
+ msg ("%s: file shrunk by %d bytes, yark!", p, bytes_left);
+ abort ();
+ }
+ }
+ (void) close (fd);
+}
+
+#ifdef DONTDEF
+bprint (fp, buf, num)
+ FILE *fp;
+ char *buf;
+{
+ int c;
+
+ if (num == 0 || num == -1)
+ return;
+ fputs (" '", fp);
+ while (num--)
+ {
+ c = *buf++;
+ if (c == '\\')
+ fputs ("\\\\", fp);
+ else if (c >= ' ' && c <= '~')
+ putc (c, fp);
+ else
+ switch (c)
+ {
+ case '\n':
+ fputs ("\\n", fp);
+ break;
+ case '\r':
+ fputs ("\\r", fp);
+ break;
+ case '\b':
+ fputs ("\\b", fp);
+ break;
+ case '\0':
+ /* fputs("\\-",fp); */
+ break;
+ default:
+ fprintf (fp, "\\%03o", c);
+ break;
+ }
+ }
+ fputs ("'\n", fp);
+}
+
+#endif
+
+int number_of_blocks_read = 0;
+
+int number_of_new_records = 0;
+int number_of_records_needed = 0;
+
+union record *new_block = 0;
+union record *save_block = 0;
+
+void
+junk_archive ()
+{
+ int found_stuff = 0;
+ int status = 3;
+ int prev_status;
+ struct name *name;
+
+ /* int dummy_head; */
+ int number_of_records_to_skip = 0;
+ int number_of_records_to_keep = 0;
+ int number_of_kept_records_in_block;
+ int sub_status;
+ extern int write_archive_to_stdout;
+
+ /* fprintf(stderr,"Junk files\n"); */
+ name_gather ();
+ open_archive (2);
+
+ while (!found_stuff)
+ {
+ prev_status = status;
+ status = read_header ();
+ switch (status)
+ {
+ case EOF:
+ found_stuff = 1;
+ break;
+
+ case 0:
+ userec (head);
+ switch (prev_status)
+ {
+ case 3:
+ msg ("This doesn't look like a tar archive.");
+ /* FALL THROUGH */
+ case 2:
+ case 1:
+ msg ("Skipping to next header");
+ /* FALL THROUGH */
+ case 0:
+ break;
+ }
+ break;
+
+ case 1:
+ /* head->header.name[NAMSIZ-1] = '\0'; */
+ /* fprintf(stderr,"file %s\n",head->header.name); */
+ if ((name = name_scan (current_file_name)) == (struct name *) 0)
+ {
+ userec (head);
+ /* fprintf(stderr,"Skip %ld\n",(long)(hstat.st_size)); */
+ if (head->header.isextended)
+ skip_extended_headers ();
+ skip_file ((long) (hstat.st_size));
+ break;
+ }
+ name->found = 1;
+ found_stuff = 2;
+ break;
+
+ case 2:
+ found_stuff = 1;
+ break;
+ }
+ }
+ /* fprintf(stderr,"Out of first loop\n"); */
+
+ if (found_stuff != 2)
+ {
+ write_eot ();
+ close_archive ();
+ names_notfound ();
+ return;
+ }
+
+ if (write_archive_to_stdout)
+ write_archive_to_stdout = 0;
+ new_block = (union record *) malloc (blocksize);
+ if (new_block == 0)
+ {
+ msg ("Can't allocate secondary block of %d bytes", blocksize);
+ exit (EX_SYSTEM);
+ }
+
+ /* Save away records before this one in this block */
+ number_of_new_records = ar_record - ar_block;
+ number_of_records_needed = blocking - number_of_new_records;
+ if (number_of_new_records)
+ bcopy ((void *) ar_block, (void *) new_block, (number_of_new_records) * RECORDSIZE);
+
+ /* fprintf(stderr,"Saved %d recs, need %d more\n",number_of_new_records,number_of_records_needed); */
+ userec (head);
+ if (head->header.isextended)
+ skip_extended_headers ();
+ skip_file ((long) (hstat.st_size));
+ found_stuff = 0;
+ /* goto flush_file; */
+
+ for (;;)
+ {
+ /* Fill in a block */
+ /* another_file: */
+ if (ar_record == ar_last)
+ {
+ /* fprintf(stderr,"New block\n"); */
+ flush_archive ();
+ number_of_blocks_read++;
+ }
+ sub_status = read_header ();
+ /* fprintf(stderr,"Header type %d\n",sub_status); */
+
+ if (sub_status == 2 && f_ignorez)
+ {
+ userec (head);
+ continue;
+ }
+ if (sub_status == EOF || sub_status == 2)
+ {
+ found_stuff = 1;
+ bzero (new_block[number_of_new_records].charptr, RECORDSIZE * number_of_records_needed);
+ number_of_new_records += number_of_records_needed;
+ number_of_records_needed = 0;
+ write_block (0);
+ break;
+ }
+
+ if (sub_status == 0)
+ {
+ msg ("Deleting non-header from archive.");
+ userec (head);
+ continue;
+ }
+
+ /* Found another header. Yipee! */
+ /* head->header.name[NAMSIZ-1] = '\0'; */
+ /* fprintf(stderr,"File %s ",head->header.name); */
+ if (name = name_scan (current_file_name))
+ {
+ name->found = 1;
+ /* fprintf(stderr,"Flush it\n"); */
+ /* flush_file: */
+ /* decode_header(head,&hstat,&dummy_head,0); */
+ userec (head);
+ number_of_records_to_skip = (hstat.st_size + RECORDSIZE - 1) / RECORDSIZE;
+ /* fprintf(stderr,"Flushing %d recs from %s\n",number_of_records_to_skip,head->header.name); */
+
+ while (ar_last - ar_record <= number_of_records_to_skip)
+ {
+
+ /* fprintf(stderr,"Block: %d <= %d ",ar_last-ar_record,number_of_records_to_skip); */
+ number_of_records_to_skip -= (ar_last - ar_record);
+ flush_archive ();
+ number_of_blocks_read++;
+ /* fprintf(stderr,"Block %d left\n",number_of_records_to_skip); */
+ }
+ ar_record += number_of_records_to_skip;
+ /* fprintf(stderr,"Final %d\n",number_of_records_to_skip); */
+ number_of_records_to_skip = 0;
+ continue;
+ }
+
+ /* copy_header: */
+ new_block[number_of_new_records] = *head;
+ number_of_new_records++;
+ number_of_records_needed--;
+ number_of_records_to_keep = (hstat.st_size + RECORDSIZE - 1) / RECORDSIZE;
+ userec (head);
+ if (number_of_records_needed == 0)
+ write_block (1);
+ /* copy_data: */
+ number_of_kept_records_in_block = ar_last - ar_record;
+ if (number_of_kept_records_in_block > number_of_records_to_keep)
+ number_of_kept_records_in_block = number_of_records_to_keep;
+
+ /* fprintf(stderr,"Need %d kept_in %d keep %d\n",blocking,number_of_kept_records_in_block,number_of_records_to_keep); */
+
+ while (number_of_records_to_keep)
+ {
+ int n;
+
+ if (ar_record == ar_last)
+ {
+ /* fprintf(stderr,"Flush. . .\n"); */
+ fl_read ();
+ number_of_blocks_read++;
+ ar_record = ar_block;
+ number_of_kept_records_in_block = blocking;
+ if (number_of_kept_records_in_block > number_of_records_to_keep)
+ number_of_kept_records_in_block = number_of_records_to_keep;
+ }
+ n = number_of_kept_records_in_block;
+ if (n > number_of_records_needed)
+ n = number_of_records_needed;
+
+ /* fprintf(stderr,"Copying %d\n",n); */
+ bcopy ((void *) ar_record, (void *) (new_block + number_of_new_records), n * RECORDSIZE);
+ number_of_new_records += n;
+ number_of_records_needed -= n;
+ ar_record += n;
+ number_of_records_to_keep -= n;
+ number_of_kept_records_in_block -= n;
+ /* fprintf(stderr,"Now new %d need %d keep %d keep_in %d rec %d/%d\n",
+ number_of_new_records,number_of_records_needed,number_of_records_to_keep,
+ number_of_kept_records_in_block,ar_record-ar_block,ar_last-ar_block); */
+
+ if (number_of_records_needed == 0)
+ {
+ write_block (1);
+ }
+ }
+ }
+
+ write_eot ();
+ close_archive ();
+ names_notfound ();
+}
+
+void
+write_block (f)
+ int f;
+{
+ /* fprintf(stderr,"Write block\n"); */
+ /* We've filled out a block. Write it out. */
+
+ /* Backspace back to where we started. . . */
+ if (archive != STDIN)
+ (void) move_arch (-(number_of_blocks_read + 1));
+
+ save_block = ar_block;
+ ar_block = new_block;
+
+ if (archive == STDIN)
+ archive = STDOUT;
+ fl_write ();
+
+ if (archive == STDOUT)
+ archive = STDIN;
+ ar_block = save_block;
+
+ if (f)
+ {
+ /* Move the tape head back to where we were */
+ if (archive != STDIN)
+ (void) move_arch (number_of_blocks_read);
+ number_of_blocks_read--;
+ }
+
+ number_of_records_needed = blocking;
+ number_of_new_records = 0;
+}
+
+/* Move archive descriptor by n blocks worth. If n is positive we move
+ forward, else we move negative. If its a tape, MTIOCTOP had better
+ work. If its something else, we try to seek on it. If we can't
+ seek, we lose! */
+int
+move_arch (n)
+ int n;
+{
+ long cur;
+
+#ifdef MTIOCTOP
+ struct mtop t;
+ int er;
+
+ if (n > 0)
+ {
+ t.mt_op = MTFSR;
+ t.mt_count = n;
+ }
+ else
+ {
+ t.mt_op = MTBSR;
+ t.mt_count = -n;
+ }
+ if ((er = rmtioctl (archive, MTIOCTOP, &t)) >= 0)
+ return 1;
+ if (errno == EIO && (er = rmtioctl (archive, MTIOCTOP, &t)) >= 0)
+ return 1;
+#endif
+
+ cur = rmtlseek (archive, 0L, 1);
+ cur += blocksize * n;
+
+ /* fprintf(stderr,"Fore to %x\n",cur); */
+ if (rmtlseek (archive, cur, 0) != cur)
+ {
+ /* Lseek failed. Try a different method */
+ msg ("Couldn't re-position archive file.");
+ exit (EX_BADARCH);
+ }
+ return 3;
+}
diff --git a/gnu/usr.bin/tar/version.c b/gnu/usr.bin/tar/version.c
new file mode 100644
index 000000000000..4454f62c8e98
--- /dev/null
+++ b/gnu/usr.bin/tar/version.c
@@ -0,0 +1 @@
+char version_string[] = "GNU tar version 1.11.2";
diff --git a/gnu/usr.bin/tar/y.tab.h b/gnu/usr.bin/tar/y.tab.h
new file mode 100644
index 000000000000..4a541d2c97f7
--- /dev/null
+++ b/gnu/usr.bin/tar/y.tab.h
@@ -0,0 +1,18 @@
+#define tAGO 257
+#define tDAY 258
+#define tDAYZONE 259
+#define tID 260
+#define tMERIDIAN 261
+#define tMINUTE_UNIT 262
+#define tMONTH 263
+#define tMONTH_UNIT 264
+#define tSEC_UNIT 265
+#define tSNUMBER 266
+#define tUNUMBER 267
+#define tZONE 268
+#define tDST 269
+typedef union {
+ time_t Number;
+ enum _MERIDIAN Meridian;
+} YYSTYPE;
+extern YYSTYPE yylval;