aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog245
-rw-r--r--FIXES109
-rw-r--r--LICENSE23
-rwxr-xr-xREGRESS35
-rw-r--r--awk.1160
-rw-r--r--awk.h10
-rw-r--r--awkgram.y4
-rw-r--r--b.c291
-rw-r--r--bugs-fixed/README57
-rw-r--r--bugs-fixed/a-format.awk3
-rw-r--r--bugs-fixed/a-format.bad3
-rw-r--r--bugs-fixed/a-format.ok1
-rw-r--r--bugs-fixed/concat-assign-same.awk4
-rw-r--r--bugs-fixed/concat-assign-same.bad2
-rw-r--r--bugs-fixed/concat-assign-same.ok2
-rw-r--r--bugs-fixed/decr-NF.awk11
-rw-r--r--bugs-fixed/decr-NF.bad5
-rw-r--r--bugs-fixed/decr-NF.ok5
-rw-r--r--bugs-fixed/fmt-overflow.awk1
-rw-r--r--bugs-fixed/fmt-overflow.ok1
-rw-r--r--bugs-fixed/fs-overflow.awk13
-rw-r--r--bugs-fixed/getline-numeric.awk6
-rw-r--r--bugs-fixed/getline-numeric.bad3
-rw-r--r--bugs-fixed/getline-numeric.in1
-rw-r--r--bugs-fixed/getline-numeric.ok3
-rw-r--r--bugs-fixed/missing-precision.awk1
-rw-r--r--bugs-fixed/missing-precision.ok2
-rw-r--r--bugs-fixed/negative-nf.awk1
-rw-r--r--bugs-fixed/negative-nf.ok2
-rw-r--r--bugs-fixed/nf-self-assign.awk6
-rw-r--r--bugs-fixed/nf-self-assign.bad1
-rw-r--r--bugs-fixed/nf-self-assign.ok1
-rw-r--r--bugs-fixed/numeric-fs.awk5
-rw-r--r--bugs-fixed/numeric-fs.ok3
-rw-r--r--bugs-fixed/numeric-output-seps.awk8
-rw-r--r--bugs-fixed/numeric-output-seps.bad2
-rw-r--r--bugs-fixed/numeric-output-seps.ok1
-rw-r--r--bugs-fixed/numeric-rs.awk6
-rw-r--r--bugs-fixed/numeric-rs.bad1
-rw-r--r--bugs-fixed/numeric-rs.ok4
-rw-r--r--bugs-fixed/numeric-subsep.awk5
-rw-r--r--bugs-fixed/numeric-subsep.bad1
-rw-r--r--bugs-fixed/numeric-subsep.ok1
-rw-r--r--bugs-fixed/ofs-rebuild.awk17
-rw-r--r--bugs-fixed/ofs-rebuild.bad1
-rw-r--r--bugs-fixed/ofs-rebuild.ok1
-rw-r--r--bugs-fixed/space.awk22
-rw-r--r--bugs-fixed/space.bad16
-rw-r--r--bugs-fixed/space.ok16
-rw-r--r--bugs-fixed/split-fs-from-array.awk5
-rw-r--r--bugs-fixed/split-fs-from-array.ok1
-rw-r--r--bugs-fixed/string-conv.awk13
-rw-r--r--bugs-fixed/string-conv.bad4
-rw-r--r--bugs-fixed/string-conv.ok4
-rw-r--r--bugs-fixed/subsep-overflow.awk24
-rw-r--r--bugs-fixed/subsep-overflow.ok5
-rw-r--r--bugs-fixed/system-status.awk19
-rw-r--r--bugs-fixed/system-status.bad3
-rw-r--r--bugs-fixed/system-status.ok3
-rw-r--r--bugs-fixed/unary-plus.awk4
-rw-r--r--bugs-fixed/unary-plus.bad2
-rw-r--r--bugs-fixed/unary-plus.ok2
-rw-r--r--lex.c32
-rw-r--r--lib.c36
-rw-r--r--main.c12
-rw-r--r--makefile53
-rw-r--r--maketab.c13
-rw-r--r--parse.c2
-rw-r--r--proctab.c209
-rw-r--r--proto.h3
-rw-r--r--run.c187
-rw-r--r--tran.c183
72 files changed, 1723 insertions, 218 deletions
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 000000000000..fd03b2bbca0b
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,245 @@
+2019-05-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * lib.c (isclvar): Remove check for additional '=' after
+ first one. No longer needed.
+
+2019-01-26 Arnold D. Robbins <arnold@skeeve.com>
+
+ * main.c (version): Updated.
+
+2019-01-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * run.c (awkgetline): Check for numeric value in all getline
+ variants. See the numeric-getline.* files in bugs-fixed directory.
+
+2018-08-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * REGRESS: Check for existence of a.out. If not there, run
+ make. Enable core dumps for T.arnold system status test
+ to work on MacOS X.
+
+2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awktest.tar (testdir/T.expr): Fix test for unary plus.
+
+2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * REGRESS: Extract tests if necessary, set PATH to include '.'.
+ * regdir/beebe.tar (Makefile): Fix longwrds test to prefix
+ sort with LC_ALL=C.
+ * awktest.tar: Updated from fixed test suite, directory
+ it extracts is now called 'testdir' to match what's in top-level
+ REGRESS script.
+ * regdir: Removed, as Brian wants to keep the test suite in
+ the tar file.
+
+2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * FIXES, lib.c, run.c, makefile, main.c: Merge from Brian's tree.
+ * REGRESS: New file, from Brian.
+ * awktest.tar: Restored from Brian's tree.
+
+2018-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (UPLUS): New token. In the grammar, call op1()
+ with it.
+ * maketab.c (proc): Add entry for UPLUS.
+ * run.c (arith): Handle UPLUS.
+ * main.c (version): Updated.
+ * bugs-fixed/unary-plus.awk, bugs-fixed/unary-plus.bad,
+ bugs-fixed/unary-plus.ok: New files.
+
+2018-08-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * TODO: Updated.
+ * awk.1: Improve use of macros, add some additional explanation
+ in a few places, alphabetize list of variables.
+
+2018-08-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awk.h (Cell): Add new field `fmt' to track xFMT value used
+ for a string conversion.
+ [CONVC, CONVO]: New flag macros.
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/string-conv.awk, bugs-fixed/string-conv.bad,
+ bugs-fixed/string-conv.ok: New files.
+ * main.c (version): Updated.
+ * proto.h (flags2str): Add declaration.
+ * tran.c (setfval): Clear CONVC and CONVO flags and set vp->fmt
+ to NULL.
+ (setsval): Ditto. Add large comment and new code to manage
+ correct conversion of number to string based on various flags
+ and the value of vp->fmt. The idea is to not convert again
+ if xFMT is the same as before and we're doing the same conversion.
+ Otherwise, clear the old flags, set the new, and reconvert.
+ (flags2str): New function. For debug prints and for use from a debugger.
+
+2018-08-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ Fix filename conflicts in regdir where the only difference was
+ in letter case. This caused problems on Windows systems.
+
+ * regdir/Compare.T1: Renamed from regdir/Compare.T.
+ * regdir/t.delete0: Renamed from regdir/t.delete.
+ * regdir/t.getline1: Renamed from regdir/t.getline.
+ * regdir/t.redir1: Renamed from regdir/t.redir.
+ * regdir/t.split1: Renamed from regdir/t.split.
+ * regdir/t.sub0: Renamed from regdir/t.sub.
+ * regdir/REGRESS: Adjusted.
+
+2018-08-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ With scalpel, tweasers, magnifying glass and bated breath,
+ borrow code from the NetBSD version of nawk to fix the years-old
+ bug whereby decrementing the value of NF did not change the
+ record.
+
+ * lib.c (fldbld): Set donerec to 1 when done.
+ (setlastfld): New function.
+ * proto.h (setlastfld): Add declaration.
+ * run.c (copycell): Make code smarter about flags (from NetBSD code).
+ * tran.c (setfree): New function.
+ * tran.c (setfval): Normalize negative zero to positive zero.
+ If setting NF, clear donerec and call setlastfld().
+ (setsval): Remove call to save_old_OFS(). If setting OFS, call
+ recbld(). If setting NF, clear donerec and call setlastfld().
+
+ As part of the process, revert OFS-related changes of 2018-05-22:
+
+ * awk.h (saveOFS, saveOFSlen, save_old_OFS): Remove declarations.
+ * lib.c (recbld): Use *OFS instead of saveOFS.
+ * run.c (saveOFS, saveOFSlen, save_old_OFS): Remove.
+ * tran.c (syminit): Remove initialization of saveOFS and saveOFSlen.
+
+ General stuff that goes along with all this:
+
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/decr-NF.awk, bugs-fixed/decr-NF.bad,
+ bugs-fixed/decr-NF.ok: New files.
+ * main.c (version): Updated.
+ * regdir/README.TESTS: Fix awk book title.
+ * regdir/T.misc: Revise test to match fixed code.
+ * run.c (format): Increase size of buffer used for %a test. (Unrelated
+ to NF or OFS, but fixes a compiler complaint.)
+
+2018-06-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regdir/beebe.tar: Fix longwrds.ok so that the test will pass.
+ The file was incorrectly sorted.
+
+2018-06-06 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regdir/T.lilly: Fix the bug again in the second instance
+ of the code. Thanks to BWK for pointing this out.
+
+2018-05-31 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regdir/T.lilly: Fix a syntax error and ordering bug
+ in creating the 'foo' file.
+
+2018-05-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awk.1: Remove standalone 'awk' at the top of file, it messed up
+ the formatting. Arrange built-in variable list in alphabetical
+ order.
+
+2018-05-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * main.c (version): Add my email address and a date so that
+ users can tell this isn't straight BWK awk.
+ * README.md: Minor updates.
+ * TODO: Updated.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ Add POSIX-required formats %a and %A.
+
+ * run.c (format): Check for %a support in C library. If there,
+ allow %a and %A as valid formats.
+ * TODO: Updated.
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/a-format.awk, bugs-fixed/a-format.bad,
+ bugs-fixed/a-format.ok: New files.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * FIXES: Restored a line from a much earlier version that
+ apparently got lost when the dates were reordered.
+ * TODO: Updated.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * README.md: New file.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regdir/echo.c, regdir/time.c: Minor fixes to compile without
+ warning on current GCC / Linux.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * TODO: New file.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * makefile (gitadd, gitpush): Remove these targets. They
+ should not be automated and were incorrect for things that
+ would be done regularly.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ Fix nawk so that [[:blank:]] only matches space and tab instead
+ of any whitespace character, originally made May 10, 2018.
+ See bugs-fixed/space.awk.
+
+ This appears to have been a thinko on Brian's part.
+
+ * b.c (charclasses): Use xisblank() function for [[:blank:]].
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/space.awk, bugs-fixed/space.bad,
+ bugs-fixed/space.ok: New files.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * .gitignore: New file.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ Fix nawk to provide reasonable exit status for system(),
+ a la gawk, originally made March 12, 2016. See
+ bugs-fixed/system-status.awk.
+
+ * run.c (bltin): For FSYSTEM, use the macros defined for wait(2)
+ to produce a reasonable exit value, instead of doing a floating-point
+ division by 256.
+ * awk.1: Document the return status values.
+ * bugs-fixed/README: Updated.
+ * bugs-fixed/system-status.awk, bugs-fixed/system-status.bad,
+ bugs-fixed/system-status.ok: New files.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ Bug fix with respect to rebuilding a record, originally
+ made August 19, 2014. See bugs-fixed/ofs-rebuild.awk.
+
+ * awk.h (saveOFS, saveOFSlen): Declare new variables.
+ * lib.c (recbld): Use them when rebuilding the record.
+ * run.c (saveOFS, saveOFSlen): Define new variables.
+ (save_old_OFS): New function to save OFS aside.
+ * tran.c (syminit): Initialize saveOFS and saveOFSlen.
+ (setsval): If setting a field, call save_old_OFS().
+ * bugs-fixed/README, bugs-fixed/ofs-rebuild.awk,
+ bugs-fixed/ofs-rebuild.bad, bugs-fixed/ofs-rebuild.ok: New files.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * makefile (YACC): Use bison.
+
+2018-05-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * ChangeLog: Created.
+ * regdir: Created. Based on contents of awktest.a.
+ * .gitattributes: Created, to preserve CR LF in regdir/t.crlf.
+ * awktest.a: Removed.
+ * regdir/T.gawk, regdir/T.latin1: Updated from awktest.tar.
+ * awktest.tar: Removed.
diff --git a/FIXES b/FIXES
index c78aabc511f3..183eaedee47d 100644
--- a/FIXES
+++ b/FIXES
@@ -25,6 +25,113 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
+May 29,2019:
+ Fix check for command line arguments to no longer require that
+ first character after '=' not be another '='. Reverts change of
+ August 11, 1989. Thanks to GitHub user Jamie Landeg Jones for
+ pointing out the issue; from Issue #38.
+
+Apr 7, 2019:
+ Update awktest.tar(p.50) to use modern options to sort. Needed
+ for Android development. Thanks to GitHub user mohd-akram (Mohamed
+ Akram). From Comment #33.
+
+Mar 12, 2019:
+ Added very simplistic support for cross-compiling in the
+ makefile. We are NOT going to go in the direction of the
+ autotools, though. Thanks to GitHub user nee-san for
+ the basic change. (Merged from PR #34.)
+
+Mar 5, 2019:
+ Added support for POSIX-standard interval expressions (a.k.a.
+ bounds, a.k.a. repetition expressions) in regular expressions,
+ backported (via NetBSD) from Apple awk-24 (20070501).
+ Thanks to Martijn Dekker <martijn@inlv.org> for the port.
+ (Merged from PR #30.)
+
+Mar 3, 2019:
+ Merge PRs as follows:
+ #12: Avoid undefined behaviour when using ctype(3) functions in
+ relex(). Thanks to GitHub user iamleot.
+ #31: Make getline handle numeric strings, and update FIXES. Thanks
+ to GitHub user arnoldrobbins
+ #32: maketab: support build systems with read-only source. Thanks
+ to GitHub user enh.
+
+Jan 25, 2019:
+ Make getline handle numeric strings properly in all cases.
+ (Thanks, Arnold.)
+
+Jan 21, 2019:
+ Merged a number of small fixes from GitHub pull requests.
+ Thanks to GitHub users Arnold Robbins (arnoldrobbins),
+ Cody Mello (melloc) and Christoph Junghans (junghans).
+ PR numbers: 13-21, 23, 24, 27.
+
+Oct 25, 2018:
+ Added test in maketab.c to prevent generating a proctab entry
+ for YYSTYPE_IS_DEFINED. It was harmless but some gcc settings
+ generated a warning message. Thanks to Nan Xiao for report.
+
+Aug 27, 2018:
+ Disallow '$' in printf formats; arguments evaluated in order
+ and printed in order.
+
+ Added some casts to silence warnings on debugging printfs.
+ (Thanks, Arnold.)
+
+Aug 23, 2018:
+ A long list of fixes courtesy of Arnold Robbins,
+ to whom profound thanks.
+
+ 1. ofs-rebuild: OFS value used to rebuild the record was incorrect.
+ Fixed August 19, 2014. Revised fix August 2018.
+
+ 2. system-status: Instead of a floating-point division by 256, use
+ the wait(2) macros to create a reasonable exit status.
+ Fixed March 12, 2016.
+
+ 3. space: Use provided xisblank() function instead of ispace() for
+ matching [[:blank:]].
+
+ 4. a-format: Add POSIX standard %a and %A to supported formats. Check
+ at runtime that this format is available.
+
+ 5. decr-NF: Decrementing NF did not change $0. This is a decades-old
+ bug. There are interactions with the old and new value of OFS as well.
+ Most of the fix came from the NetBSD awk.
+
+ 6. string-conv: String conversions of scalars were sticky. Once a
+ conversion to string happened, even with OFMT, that value was used until
+ a new numeric value was assigned, even if OFMT differed from CONVFMT,
+ and also if CONVFMT changed.
+
+ 7. unary-plus: Unary plus on a string constant returned the string.
+ Instead, it should convert the value to numeric and give that value.
+
+ Also added Arnold's tests for these to awktest.tar as T.arnold.
+
+Aug 15, 2018:
+ fixed mangled awktest.tar (thanks, Arnold), posted all
+ current (very minor) fixes to github / onetrueawk
+
+Jun 7, 2018:
+ (yes, a long layoff)
+ Updated some broken tests (beebe.tar, T.lilly)
+ [thanks to Arnold Robbins]
+
+Mar 26, 2015:
+ buffer overflow in error reporting; thanks to tobias ulmer
+ and john-mark gurney for spotting it and the fix.
+
+Feb 4, 2013:
+ cleaned up a handful of tests that didn't seem to actually
+ test for correct behavior: T.latin1, T.gawk.
+
+Jan 5, 2013:
+ added ,NULL initializer to static Cells in run.c; not really
+ needed but cleaner. Thanks to Michael Bombardieri.
+
Dec 20, 2012:
fiddled makefile to get correct yacc and bison flags. pick yacc
(linux) or bison (mac) as necessary.
@@ -493,6 +600,8 @@ May 12, 1998:
Mar 12, 1998:
added -V to print version number and die.
+[notify dave kerns, dkerns@dacsoup.ih.lucent.com]
+
Feb 11, 1998:
subtle silent bug in lex.c: if the program ended with a number
longer than 1 digit, part of the input would be pushed back and
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000000..07dfd7b73b11
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,23 @@
+/****************************************************************
+Copyright (C) Lucent Technologies 1997
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name Lucent Technologies or any of
+its entities not be used in advertising or publicity pertaining
+to distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+****************************************************************/
diff --git a/REGRESS b/REGRESS
new file mode 100755
index 000000000000..7d3ded69d536
--- /dev/null
+++ b/REGRESS
@@ -0,0 +1,35 @@
+#! /bin/sh
+
+case `uname` in
+CYGWIN) EXE=a.exe ;;
+*) EXE=a.out ;;
+esac
+
+if [ ! -f $EXE ]
+then
+ make || exit 1
+fi
+
+if [ -d testdir ]
+then
+ true # do nothing
+elif [ -f awktest.tar ]
+then
+ echo extracting testdir
+ tar -xpf awktest.tar
+else
+ echo $0: No testdir directory and no awktest.tar to extract it from! >&2
+ exit 1
+fi
+
+cd testdir
+pwd
+PATH=.:$PATH
+export PATH
+if (ulimit -c unlimited > /dev/null 2>&1)
+then
+ # Workaround broken default on MacOS X
+ ulimit -c unlimited
+fi
+
+REGRESS
diff --git a/awk.1 b/awk.1
index 6119613c1aae..18e99ad39496 100644
--- a/awk.1
+++ b/awk.1
@@ -7,7 +7,6 @@
.fi
.ft 1
..
-awk
.TH AWK 1
.CT 1 files prog_other
.SH NAME
@@ -36,7 +35,7 @@ awk \- pattern-directed scanning and processing language
scans each input
.I file
for lines that match any of a set of patterns specified literally in
-.IR prog
+.I prog
or in one or more files
specified as
.B \-f
@@ -53,7 +52,7 @@ The file name
.B \-
means the standard input.
Any
-.IR file
+.I file
of the form
.I var=value
is treated as an assignment, not a filename,
@@ -70,12 +69,12 @@ any number of
options may be present.
The
.B \-F
-.IR fs
+.I fs
option defines the input field separator to be the regular expression
-.IR fs.
+.IR fs .
.PP
An input line is normally made up of fields separated by white space,
-or by regular expression
+or by the regular expression
.BR FS .
The fields are denoted
.BR $1 ,
@@ -87,7 +86,7 @@ If
.BR FS
is null, the input line is split into one field per character.
.PP
-A pattern-action statement has the form
+A pattern-action statement has the form:
.IP
.IB pattern " { " action " }
.PP
@@ -101,7 +100,7 @@ An action is a sequence of statements.
A statement can be one of the following:
.PP
.EX
-.ta \w'\f(CWdelete array[expression]'u
+.ta \w'\f(CWdelete array[expression]\fR'u
.RS
.nf
.ft CW
@@ -145,7 +144,7 @@ The operators
are also available in expressions.
Variables may be scalars, array elements
(denoted
-.IB x [ i ] )
+.IB x [ i ] \fR)
or fields.
Variables are initialized to the null string.
Array subscripts may be any string,
@@ -161,11 +160,11 @@ The
.B print
statement prints its arguments on the standard output
(or on a file if
-.BI > file
+.BI > " file
or
-.BI >> file
+.BI >> " file
is present or on a pipe if
-.BI | cmd
+.BI | " cmd
is present), separated by the current output field separator,
and terminated by the output record separator.
.I file
@@ -176,9 +175,10 @@ identical string values in different statements denote
the same open file.
The
.B printf
-statement formats its expression list according to the format
+statement formats its expression list according to the
+.I format
(see
-.IR printf (3)) .
+.IR printf (3)).
The built-in function
.BI close( expr )
closes the file or pipe
@@ -189,13 +189,13 @@ flushes any buffered output for the file or pipe
.IR expr .
.PP
The mathematical functions
+.BR atan2 ,
+.BR cos ,
.BR exp ,
.BR log ,
-.BR sqrt ,
.BR sin ,
-.BR cos ,
and
-.BR atan2
+.B sqrt
are built in.
Other built-in functions:
.TF length
@@ -203,7 +203,8 @@ Other built-in functions:
.B length
the length of its argument
taken as a string,
-or of
+number of elements in an array for an array argument,
+or length of
.B $0
if no argument.
.TP
@@ -218,14 +219,18 @@ and returns the previous seed.
.B int
truncates to an integer value
.TP
-.BI substr( s , " m" , " n\fB)
+\fBsubstr(\fIs\fB, \fIm\fR [\fB, \fIn\^\fR]\fB)\fR
the
.IR n -character
substring of
.I s
that begins at position
-.IR m
+.I m
counted from 1.
+If no
+.IR m ,
+use the rest of the string
+.I
.TP
.BI index( s , " t" )
the position in
@@ -246,14 +251,14 @@ and
.B RLENGTH
are set to the position and length of the matched string.
.TP
-.BI split( s , " a" , " fs\fB)
+\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIfs\^\fR]\fB)\fR
splits the string
.I s
into array elements
-.IB a [1] ,
-.IB a [2] ,
+.IB a [1] \fR,
+.IB a [2] \fR,
\&...,
-.IB a [ n ] ,
+.IB a [ n ] \fR,
and returns
.IR n .
The separation is done with the regular expression
@@ -266,7 +271,7 @@ is not given.
An empty string as field separator splits the string
into one array element per character.
.TP
-.BI sub( r , " t" , " s\fB)
+\fBsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB)
substitutes
.I t
for the first occurrence of the regular expression
@@ -279,7 +284,7 @@ is not given,
.B $0
is used.
.TP
-.B gsub
+\fBgsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB)
same as
.B sub
except that all occurrences of the regular expression
@@ -289,18 +294,28 @@ and
.B gsub
return the number of replacements.
.TP
-.BI sprintf( fmt , " expr" , " ...\fB )
+.BI sprintf( fmt , " expr" , " ...\fB)
the string resulting from formatting
.I expr ...
according to the
.IR printf (3)
format
-.I fmt
+.IR fmt .
.TP
.BI system( cmd )
executes
.I cmd
-and returns its exit status
+and returns its exit status. This will be \-1 upon error,
+.IR cmd 's
+exit status upon a normal exit,
+256 +
+.I sig
+upon death-by-signal, where
+.I sig
+is the number of the murdering signal,
+or 512 +
+.I sig
+if there was a core dump.
.TP
.BI tolower( str )
returns a copy of
@@ -321,7 +336,7 @@ sets
.B $0
to the next input record from the current input file;
.B getline
-.BI < file
+.BI < " file
sets
.B $0
to the next record from
@@ -359,7 +374,7 @@ Isolated regular expressions
in a pattern apply to the entire line.
Regular expressions may also occur in
relational expressions, using the operators
-.BR ~
+.B ~
and
.BR !~ .
.BI / re /
@@ -383,8 +398,12 @@ A relational expression is one of the following:
.br
.BI ( expr , expr,... ") in " array-name
.PP
-where a relop is any of the six relational operators in C,
-and a matchop is either
+where a
+.I relop
+is any of the six relational operators in C,
+and a
+.I matchop
+is either
.B ~
(matches)
or
@@ -405,57 +424,68 @@ and after the last.
and
.B END
do not combine with other patterns.
+They may appear multiple times in a program and execute
+in the order they are read by
+.IR awk .
.PP
Variable names with special meanings:
.TF FILENAME
.TP
+.B ARGC
+argument count, assignable.
+.TP
+.B ARGV
+argument array, assignable;
+non-null members are taken as filenames.
+.TP
.B CONVFMT
conversion format used when converting numbers
(default
-.BR "%.6g" )
+.BR "%.6g" ).
+.TP
+.B ENVIRON
+array of environment variables; subscripts are names.
+.TP
+.B FILENAME
+the name of the current input file.
+.TP
+.B FNR
+ordinal number of the current record in the current file.
.TP
.B FS
regular expression used to separate fields; also settable
by option
-.BI \-F fs.
+.BI \-F fs\fR.
.TP
.BR NF
-number of fields in the current record
+number of fields in the current record.
.TP
.B NR
-ordinal number of the current record
-.TP
-.B FNR
-ordinal number of the current record in the current file
-.TP
-.B FILENAME
-the name of the current input file
+ordinal number of the current record.
.TP
-.B RS
-input record separator (default newline)
+.B OFMT
+output format for numbers (default
+.BR "%.6g" ).
.TP
.B OFS
-output field separator (default blank)
+output field separator (default space).
.TP
.B ORS
-output record separator (default newline)
+output record separator (default newline).
.TP
-.B OFMT
-output format for numbers (default
-.BR "%.6g" )
-.TP
-.B SUBSEP
-separates multiple subscripts (default 034)
+.B RLENGTH
+the length of a string matched by
+.BR match .
.TP
-.B ARGC
-argument count, assignable
+.B RS
+input record separator (default newline).
.TP
-.B ARGV
-argument array, assignable;
-non-null members are taken as filenames
+.B RSTART
+the start position of a string matched by
+.BR match .
.TP
-.B ENVIRON
-array of environment variables; subscripts are names.
+.B SUBSEP
+separates multiple subscripts (default 034).
.PD
.PP
Functions may be defined (at the position of a pattern-action statement) thus:
@@ -486,7 +516,7 @@ BEGIN { FS = ",[ \et]*|[ \et]+" }
.EE
.ns
.IP
-Same, with input fields separated by comma and/or blanks and tabs.
+Same, with input fields separated by comma and/or spaces and tabs.
.PP
.EX
.nf
@@ -512,13 +542,13 @@ BEGIN { # Simulate echo(1)
.fi
.EE
.SH SEE ALSO
+.IR grep (1),
.IR lex (1),
.IR sed (1)
.br
A. V. Aho, B. W. Kernighan, P. J. Weinberger,
-.I
-The AWK Programming Language,
-Addison-Wesley, 1988. ISBN 0-201-07981-X
+.IR "The AWK Programming Language" ,
+Addison-Wesley, 1988. ISBN 0-201-07981-X.
.SH BUGS
There are no explicit conversions between numbers and strings.
To force an expression to be treated as a number add 0 to it;
@@ -527,3 +557,5 @@ to force it to be treated as a string concatenate
.br
The scope rules for variables in functions are a botch;
the syntax is worse.
+.br
+Only eight-bit characters sets are handled correctly.
diff --git a/awk.h b/awk.h
index a36cdb151e75..ddf246687969 100644
--- a/awk.h
+++ b/awk.h
@@ -81,7 +81,8 @@ typedef struct Cell {
char *nval; /* name, for variables only */
char *sval; /* string value */
Awkfloat fval; /* value as number */
- int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */
+ int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */
+ char *fmt; /* CONVFMT/OFMT value used to convert from number */
struct Cell *cnext; /* ptr to next if chained */
} Cell;
@@ -96,9 +97,14 @@ extern Array *symtab;
extern Cell *nrloc; /* NR */
extern Cell *fnrloc; /* FNR */
+extern Cell *fsloc; /* FS */
extern Cell *nfloc; /* NF */
+extern Cell *ofsloc; /* OFS */
+extern Cell *orsloc; /* ORS */
+extern Cell *rsloc; /* RS */
extern Cell *rstartloc; /* RSTART */
extern Cell *rlengthloc; /* RLENGTH */
+extern Cell *subseploc; /* SUBSEP */
/* Cell.tval values: */
#define NUM 01 /* number value is valid */
@@ -109,6 +115,8 @@ extern Cell *rlengthloc; /* RLENGTH */
#define FCN 040 /* this is a function name */
#define FLD 0100 /* this is a field $1, $2, ... */
#define REC 0200 /* this is $0 */
+#define CONVC 0400 /* string was converted from number via CONVFMT */
+#define CONVO 01000 /* string was converted from number via OFMT */
/* function types */
diff --git a/awkgram.y b/awkgram.y
index 5b5c461b3eed..e4abeeddcb6a 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -86,7 +86,7 @@ Node *arglist = 0; /* list of args for current function */
%left CAT
%left '+' '-'
%left '*' '/' '%'
-%left NOT UMINUS
+%left NOT UMINUS UPLUS
%right POWER
%right DECR INCR
%left INDIRECT
@@ -357,7 +357,7 @@ term:
| term '%' term { $$ = op2(MOD, $1, $3); }
| term POWER term { $$ = op2(POWER, $1, $3); }
| '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
- | '+' term %prec UMINUS { $$ = $2; }
+ | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); }
| NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
| BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
| BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
diff --git a/b.c b/b.c
index 5ccb4b1e5d0f..37ea0a5bb2a7 100644
--- a/b.c
+++ b/b.c
@@ -27,6 +27,7 @@ THIS SOFTWARE.
#define DEBUG
#include <ctype.h>
+#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -65,6 +66,11 @@ int rlxval;
static uschar *rlxstr;
static uschar *prestr; /* current position in current re */
static uschar *lastre; /* origin of last re */
+static uschar *lastatom; /* origin of last Atom */
+static uschar *starttok;
+static uschar *basestr; /* starts with original, replaced during
+ repetition processing */
+static uschar *firstbasestr;
static int setcnt;
static int poscnt;
@@ -82,11 +88,11 @@ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
fa *pfa;
static int now = 1;
- if (setvec == NULL) { /* first time through any RE */
+ if (setvec == 0) { /* first time through any RE */
maxsetvec = MAXLIN;
setvec = (int *) malloc(maxsetvec * sizeof(int));
tmpset = (int *) malloc(maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("out of space initializing makedfa");
}
@@ -124,6 +130,8 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
Node *p, *p1;
fa *f;
+ firstbasestr = (uschar *) s;
+ basestr = firstbasestr;
p = reparse(s);
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
/* put ALL STAR in front of reg. exp. */
@@ -137,7 +145,7 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
f->accept = poscnt-1; /* penter has computed number of positions in re */
cfoll(f, p1); /* set up follow sets */
freetr(p1);
- if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
+ if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
overflo("out of space in makedfa");
if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
overflo("out of space in makedfa");
@@ -145,6 +153,10 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
f->initstat = makeinit(f, anchor);
f->anchor = anchor;
f->restr = (uschar *) tostring(s);
+ if (firstbasestr != basestr) {
+ if (basestr)
+ xfree(basestr);
+ }
return f;
}
@@ -157,7 +169,7 @@ int makeinit(fa *f, int anchor)
f->reset = 0;
k = *(f->re[0].lfollow);
xfree(f->posns[2]);
- if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
+ if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
overflo("out of space in makeinit");
for (i=0; i <= k; i++) {
(f->posns[2])[i] = (f->re[0].lfollow)[i];
@@ -290,11 +302,11 @@ char *cclenter(const char *argp) /* add a character class */
int i, c, c2;
uschar *p = (uschar *) argp;
uschar *op, *bp;
- static uschar *buf = NULL;
+ static uschar *buf = 0;
static int bufsz = 100;
op = p;
- if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
FATAL("out of space for character class [%.10s...] 1", p);
bp = buf;
for (i = 0; (c = *p++) != 0; ) {
@@ -350,14 +362,14 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo
maxsetvec *= 4;
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("out of space in cfoll()");
}
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v); /* computes setvec and setcnt */
- if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
+ if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
overflo("out of space building follow set");
f->re[info(v)].lfollow = p;
*p = setcnt;
@@ -391,7 +403,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */
maxsetvec *= 4;
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("out of space in first()");
}
if (type(p) == EMPTYRE) {
@@ -531,7 +543,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
- if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
+ if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
overflo("out of space in pmatch");
for (i = 0; i <= k; i++)
(f->posns[2])[i] = (f->posns[0])[i];
@@ -588,7 +600,7 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
for (i = 2; i <= f->curstat; i++)
xfree(f->posns[i]);
k = *f->posns[0];
- if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
+ if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
overflo("out of state space");
for (i = 0; i <= k; i++)
(f->posns[2])[i] = (f->posns[0])[i];
@@ -628,9 +640,11 @@ Node *regexp(void) /* top-level parse of reg expr */
Node *primary(void)
{
Node *np;
+ int savelastatom;
switch (rtok) {
case CHAR:
+ lastatom = starttok;
np = op2(CHAR, NIL, itonp(rlxval));
rtok = relex();
return (unary(np));
@@ -639,16 +653,19 @@ Node *primary(void)
return (unary(op2(ALL, NIL, NIL)));
case EMPTYRE:
rtok = relex();
- return (unary(op2(ALL, NIL, NIL)));
+ return (unary(op2(EMPTYRE, NIL, NIL)));
case DOT:
+ lastatom = starttok;
rtok = relex();
return (unary(op2(DOT, NIL, NIL)));
case CCL:
np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
+ lastatom = starttok;
rtok = relex();
return (unary(np));
case NCCL:
np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
+ lastatom = starttok;
rtok = relex();
return (unary(np));
case '^':
@@ -658,6 +675,8 @@ Node *primary(void)
rtok = relex();
return (unary(op2(CHAR, NIL, NIL)));
case '(':
+ lastatom = starttok;
+ savelastatom = starttok - basestr; /* Retain over recursion */
rtok = relex();
if (rtok == ')') { /* special pleading for () */
rtok = relex();
@@ -665,6 +684,7 @@ Node *primary(void)
}
np = regexp();
if (rtok == ')') {
+ lastatom = basestr + savelastatom; /* Restore */
rtok = relex();
return (unary(np));
}
@@ -679,8 +699,12 @@ Node *primary(void)
Node *concat(Node *np)
{
switch (rtok) {
- case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
+ case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
return (concat(op2(CAT, np, primary())));
+ case EMPTYRE:
+ rtok = relex();
+ return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
+ primary())));
}
return (np);
}
@@ -749,7 +773,7 @@ struct charclass {
{ "alnum", 5, isalnum },
{ "alpha", 5, isalpha },
#ifndef HAS_ISBLANK
- { "blank", 5, isspace }, /* was isblank */
+ { "blank", 5, xisblank },
#else
{ "blank", 5, isblank },
#endif
@@ -765,16 +789,132 @@ struct charclass {
{ NULL, 0, NULL },
};
+#define REPEAT_SIMPLE 0
+#define REPEAT_PLUS_APPENDED 1
+#define REPEAT_WITH_Q 2
+#define REPEAT_ZERO 3
+
+static int
+replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
+ int atomlen, int firstnum, int secondnum, int special_case)
+{
+ int i, j;
+ uschar *buf = 0;
+ int ret = 1;
+ int init_q = (firstnum==0); /* first added char will be ? */
+ int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
+ int prefix_length = reptok - basestr; /* prefix includes first rep */
+ int suffix_length = strlen((char *) reptok) - reptoklen; /* string after rep specifier */
+ int size = prefix_length + suffix_length;
+
+ if (firstnum > 1) { /* add room for reps 2 through firstnum */
+ size += atomlen*(firstnum-1);
+ }
+
+ /* Adjust size of buffer for special cases */
+ if (special_case == REPEAT_PLUS_APPENDED) {
+ size++; /* for the final + */
+ } else if (special_case == REPEAT_WITH_Q) {
+ size += init_q + (atomlen+1)* n_q_reps;
+ } else if (special_case == REPEAT_ZERO) {
+ size += 2; /* just a null ERE: () */
+ }
+ if ((buf = (uschar *) malloc(size+1)) == NULL)
+ FATAL("out of space in reg expr %.10s..", lastre);
+ memcpy(buf, basestr, prefix_length); /* copy prefix */
+ j = prefix_length;
+ if (special_case == REPEAT_ZERO) {
+ j -= atomlen;
+ buf[j++] = '(';
+ buf[j++] = ')';
+ }
+ for (i=1; i < firstnum; i++) { /* copy x reps */
+ memcpy(&buf[j], atom, atomlen);
+ j += atomlen;
+ }
+ if (special_case == REPEAT_PLUS_APPENDED) {
+ buf[j++] = '+';
+ } else if (special_case == REPEAT_WITH_Q) {
+ if (init_q) buf[j++] = '?';
+ for (i=0; i < n_q_reps; i++) { /* copy x? reps */
+ memcpy(&buf[j], atom, atomlen);
+ j += atomlen;
+ buf[j++] = '?';
+ }
+ }
+ memcpy(&buf[j], reptok+reptoklen, suffix_length);
+ if (special_case == REPEAT_ZERO) {
+ buf[j+suffix_length] = '\0';
+ } else {
+ buf[size] = '\0';
+ }
+ /* free old basestr */
+ if (firstbasestr != basestr) {
+ if (basestr)
+ xfree(basestr);
+ }
+ basestr = buf;
+ prestr = buf + prefix_length;
+ if (special_case == REPEAT_ZERO) {
+ prestr -= atomlen;
+ ret++;
+ }
+ return ret;
+}
+
+static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
+ int atomlen, int firstnum, int secondnum)
+{
+ /*
+ In general, the repetition specifier or "bound" is replaced here
+ by an equivalent ERE string, repeating the immediately previous atom
+ and appending ? and + as needed. Note that the first copy of the
+ atom is left in place, except in the special_case of a zero-repeat
+ (i.e., {0}).
+ */
+ if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
+ if (firstnum < 2) {
+ /* 0 or 1: should be handled before you get here */
+ FATAL("internal error");
+ } else {
+ return replace_repeat(reptok, reptoklen, atom, atomlen,
+ firstnum, secondnum, REPEAT_PLUS_APPENDED);
+ }
+ } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
+ if (firstnum == 0) { /* {0} or {0,0} */
+ /* This case is unusual because the resulting
+ replacement string might actually be SMALLER than
+ the original ERE */
+ return replace_repeat(reptok, reptoklen, atom, atomlen,
+ firstnum, secondnum, REPEAT_ZERO);
+ } else { /* (firstnum >= 1) */
+ return replace_repeat(reptok, reptoklen, atom, atomlen,
+ firstnum, secondnum, REPEAT_SIMPLE);
+ }
+ } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
+ /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
+ return replace_repeat(reptok, reptoklen, atom, atomlen,
+ firstnum, secondnum, REPEAT_WITH_Q);
+ } else { /* Error - shouldn't be here (n>m) */
+ FATAL("internal error");
+ }
+ return 0;
+}
int relex(void) /* lexical analyzer for reparse */
{
int c, n;
int cflag;
- static uschar *buf = NULL;
+ static uschar *buf = 0;
static int bufsz = 100;
uschar *bp;
struct charclass *cc;
int i;
+ int num, m, commafound, digitfound;
+ const uschar *startreptok;
+
+rescan:
+ starttok = prestr;
switch (c = *prestr++) {
case '|': return OR;
@@ -795,7 +935,7 @@ int relex(void) /* lexical analyzer for reparse */
rlxval = c;
return CHAR;
case '[':
- if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
FATAL("out of space in reg expr %.10s..", lastre);
bp = buf;
if (*prestr == '^') {
@@ -823,7 +963,15 @@ int relex(void) /* lexical analyzer for reparse */
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
prestr[2 + cc->cc_namelen] == ']') {
prestr += cc->cc_namelen + 3;
- for (i = 0; i < NCHARS; i++) {
+ /*
+ * BUG: We begin at 1, instead of 0, since we
+ * would otherwise prematurely terminate the
+ * string for classes like [[:cntrl:]]. This
+ * means that we can't match the NUL character,
+ * not without first adapting the entire
+ * program to track each string's length.
+ */
+ for (i = 1; i <= UCHAR_MAX; i++) {
if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
FATAL("out of space for reg expr %.10s...", lastre);
if (cc->cc_func(i)) {
@@ -833,6 +981,40 @@ int relex(void) /* lexical analyzer for reparse */
}
} else
*bp++ = c;
+ } else if (c == '[' && *prestr == '.') {
+ char collate_char;
+ prestr++;
+ collate_char = *prestr++;
+ if (*prestr == '.' && prestr[1] == ']') {
+ prestr += 2;
+ /* Found it: map via locale TBD: for
+ now, simply return this char. This
+ is sufficient to pass conformance
+ test awk.ex 156
+ */
+ if (*prestr == ']') {
+ prestr++;
+ rlxval = collate_char;
+ return CHAR;
+ }
+ }
+ } else if (c == '[' && *prestr == '=') {
+ char equiv_char;
+ prestr++;
+ equiv_char = *prestr++;
+ if (*prestr == '=' && prestr[1] == ']') {
+ prestr += 2;
+ /* Found it: map via locale TBD: for now
+ simply return this char. This is
+ sufficient to pass conformance test
+ awk.ex 156
+ */
+ if (*prestr == ']') {
+ prestr++;
+ rlxval = equiv_char;
+ return CHAR;
+ }
+ }
} else if (c == '\0') {
FATAL("nonterminated character class %.20s", lastre);
} else if (bp == buf) { /* 1st char is special */
@@ -847,6 +1029,75 @@ int relex(void) /* lexical analyzer for reparse */
} else
*bp++ = c;
}
+ break;
+ case '{':
+ if (isdigit(*(prestr))) {
+ num = 0; /* Process as a repetition */
+ n = -1; m = -1;
+ commafound = 0;
+ digitfound = 0;
+ startreptok = prestr-1;
+ /* Remember start of previous atom here ? */
+ } else { /* just a { char, not a repetition */
+ rlxval = c;
+ return CHAR;
+ }
+ for (; ; ) {
+ if ((c = *prestr++) == '}') {
+ if (commafound) {
+ if (digitfound) { /* {n,m} */
+ m = num;
+ if (m<n)
+ FATAL("illegal repetition expression: class %.20s",
+ lastre);
+ if ((n==0) && (m==1)) {
+ return QUEST;
+ }
+ } else { /* {n,} */
+ if (n==0) return STAR;
+ if (n==1) return PLUS;
+ }
+ } else {
+ if (digitfound) { /* {n} same as {n,n} */
+ n = num;
+ m = num;
+ } else { /* {} */
+ FATAL("illegal repetition expression: class %.20s",
+ lastre);
+ }
+ }
+ if (repeat(starttok, prestr-starttok, lastatom,
+ startreptok - lastatom, n, m) > 0) {
+ if ((n==0) && (m==0)) {
+ return EMPTYRE;
+ }
+ /* must rescan input for next token */
+ goto rescan;
+ }
+ /* Failed to replace: eat up {...} characters
+ and treat like just PLUS */
+ return PLUS;
+ } else if (c == '\0') {
+ FATAL("nonterminated character class %.20s",
+ lastre);
+ } else if (isdigit(c)) {
+ num = 10 * num + c - '0';
+ digitfound = 1;
+ } else if (c == ',') {
+ if (commafound)
+ FATAL("illegal repetition expression: class %.20s",
+ lastre);
+ /* looking for {n,} or {n,m} */
+ commafound = 1;
+ n = num;
+ digitfound = 0; /* reset */
+ num = 0;
+ } else {
+ FATAL("illegal repetition expression: class %.20s",
+ lastre);
+ }
+ }
+ break;
}
}
@@ -860,7 +1111,7 @@ int cgoto(fa *f, int s, int c)
maxsetvec *= 4;
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("out of space in cgoto()");
}
for (i = 0; i <= f->accept; i++)
@@ -882,7 +1133,7 @@ int cgoto(fa *f, int s, int c)
maxsetvec *= 4;
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- if (setvec == NULL || tmpset == NULL)
+ if (setvec == 0 || tmpset == 0)
overflo("cgoto overflow");
}
if (setvec[q[j]] == 0) {
@@ -925,7 +1176,7 @@ int cgoto(fa *f, int s, int c)
for (i = 0; i < NCHARS; i++)
f->gototab[f->curstat][i] = 0;
xfree(f->posns[f->curstat]);
- if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
+ if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
overflo("out of space in cgoto");
f->posns[f->curstat] = p;
diff --git a/bugs-fixed/README b/bugs-fixed/README
new file mode 100644
index 000000000000..2f27c1039873
--- /dev/null
+++ b/bugs-fixed/README
@@ -0,0 +1,57 @@
+List of bugs fixed.
+
+1. ofs-rebuild: OFS value used to rebuild the record was incorrect.
+Fixed August 19, 2014. Revised fix August 2018.
+
+2. system-status: Instead of a floating-point division by 256, use
+the wait(2) macros to create a reasonable exit status. Fixed March 12, 2016.
+
+3. space: Use provided xisblank() function instead of ispace() for
+matching [[:blank:]].
+
+4. a-format: Add POSIX standard %a and %A to supported formats. Check
+at runtime that this format is available.
+
+5. decr-NF: Decrementing NF did not change $0. This is a decades-old
+bug. There are interactions with the old and new value of OFS as well.
+Most of the fix came from the NetBSD awk.
+
+6. string-conv: String conversions of scalars were sticky. Once a
+conversion to string happened, even with OFMT, that value was used until
+a new numeric value was assigned, even if OFMT differed from CONVFMT,
+and also if CONVFMT changed.
+
+7. unary-plus: Unary plus on a string constant returned the string.
+Instead, it should convert the value to numeric and give that value.
+
+8. concat-assign-same: Concatenation previously evaluated both sides of the
+expression before doing its work, which, since assign() evaluates to the cell
+being assigned to, meant that expressions like "print (a = 1) (a = 2)" would
+print "22" rather than "12".
+
+9. missing-precision: When using the format string "%*s", the precision
+argument was used without checking if it was present first.
+
+10. missing-precision: When using the format string "%*s", the precision
+argument was used without checking if it was present first.
+
+11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
+to with sprintf(), which meant that some conversions could write past the
+end.
+
+12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP,
+FS, RS, OFS, or ORS were set to a numeric value, then their string values
+wouldn't always be generated before being needed.
+
+13. subsep-overflow: The length of SUBSEP needs to be rechecked after
+calling execute(), in case SUBSEP itself has been changed.
+
+14. split-fs-from-array: If the third argument to split() comes from the
+array passed as the second argument, then split() would previously read
+from the freed memory and possibly produce incorrect results (depending
+on the system's malloc()/free() behaviour.)
+
+15. getline-numeric: The `getline xx < file' syntax did not check if
+values were numeric, in discordance from POSIX. Test case adapted from
+one posted by Ben Bacarisse <ben.usenet@bsb.me.uk> in comp.lang.awk,
+January 2019.
diff --git a/bugs-fixed/a-format.awk b/bugs-fixed/a-format.awk
new file mode 100644
index 000000000000..5b7929ee3eea
--- /dev/null
+++ b/bugs-fixed/a-format.awk
@@ -0,0 +1,3 @@
+BEGIN {
+ printf("%a\n", 42)
+}
diff --git a/bugs-fixed/a-format.bad b/bugs-fixed/a-format.bad
new file mode 100644
index 000000000000..1281825b1111
--- /dev/null
+++ b/bugs-fixed/a-format.bad
@@ -0,0 +1,3 @@
+nawk: weird printf conversion %a
+ source line number 2
+%a42
diff --git a/bugs-fixed/a-format.ok b/bugs-fixed/a-format.ok
new file mode 100644
index 000000000000..e421e2d01ba6
--- /dev/null
+++ b/bugs-fixed/a-format.ok
@@ -0,0 +1 @@
+0x1.5p+5
diff --git a/bugs-fixed/concat-assign-same.awk b/bugs-fixed/concat-assign-same.awk
new file mode 100644
index 000000000000..ed19f35ca835
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.awk
@@ -0,0 +1,4 @@
+BEGIN {
+ print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5);
+ print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5);
+}
diff --git a/bugs-fixed/concat-assign-same.bad b/bugs-fixed/concat-assign-same.bad
new file mode 100644
index 000000000000..294725b28a97
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.bad
@@ -0,0 +1,2 @@
+22345
+1 2 3 4 5
diff --git a/bugs-fixed/concat-assign-same.ok b/bugs-fixed/concat-assign-same.ok
new file mode 100644
index 000000000000..447505259d02
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.ok
@@ -0,0 +1,2 @@
+12345
+1 2 3 4 5
diff --git a/bugs-fixed/decr-NF.awk b/bugs-fixed/decr-NF.awk
new file mode 100644
index 000000000000..7474991d196e
--- /dev/null
+++ b/bugs-fixed/decr-NF.awk
@@ -0,0 +1,11 @@
+BEGIN {
+ $0 = "a b c d e f"
+ print NF
+ OFS = ":"
+ NF--
+ print $0
+ print NF
+ NF++
+ print $0
+ print NF
+}
diff --git a/bugs-fixed/decr-NF.bad b/bugs-fixed/decr-NF.bad
new file mode 100644
index 000000000000..b634e065954c
--- /dev/null
+++ b/bugs-fixed/decr-NF.bad
@@ -0,0 +1,5 @@
+6
+a b c d e f
+5
+a b c d e f
+6
diff --git a/bugs-fixed/decr-NF.ok b/bugs-fixed/decr-NF.ok
new file mode 100644
index 000000000000..3359cf2312d1
--- /dev/null
+++ b/bugs-fixed/decr-NF.ok
@@ -0,0 +1,5 @@
+6
+a:b:c:d:e
+5
+a:b:c:d:e:
+6
diff --git a/bugs-fixed/fmt-overflow.awk b/bugs-fixed/fmt-overflow.awk
new file mode 100644
index 000000000000..bf5877e4abac
--- /dev/null
+++ b/bugs-fixed/fmt-overflow.awk
@@ -0,0 +1 @@
+BEGIN { OFMT = "%.1000f"; print 1.25; }
diff --git a/bugs-fixed/fmt-overflow.ok b/bugs-fixed/fmt-overflow.ok
new file mode 100644
index 000000000000..5f7449e68073
--- /dev/null
+++ b/bugs-fixed/fmt-overflow.ok
@@ -0,0 +1 @@
+1.2500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
diff --git a/bugs-fixed/fs-overflow.awk b/bugs-fixed/fs-overflow.awk
new file mode 100644
index 000000000000..be10f5a46f0d
--- /dev/null
+++ b/bugs-fixed/fs-overflow.awk
@@ -0,0 +1,13 @@
+function foo() {
+ a = "";
+ for (i = 0; i < 10000; i++) {
+ a = a "c";
+ }
+ return a;
+}
+
+BEGIN {
+ FS = foo();
+ $0="foo";
+ print $1;
+}
diff --git a/bugs-fixed/getline-numeric.awk b/bugs-fixed/getline-numeric.awk
new file mode 100644
index 000000000000..5571a9589a3d
--- /dev/null
+++ b/bugs-fixed/getline-numeric.awk
@@ -0,0 +1,6 @@
+{
+ print $0, ($0 <= 50 ? "<=" : ">"), 50
+ getline dd < ARGV[1]
+ print dd, (dd <= 50 ? "<=" : ">"), 50
+ if (dd == $0) print "same"
+}
diff --git a/bugs-fixed/getline-numeric.bad b/bugs-fixed/getline-numeric.bad
new file mode 100644
index 000000000000..d911c774fa9a
--- /dev/null
+++ b/bugs-fixed/getline-numeric.bad
@@ -0,0 +1,3 @@
+120 > 50
+120 <= 50
+same
diff --git a/bugs-fixed/getline-numeric.in b/bugs-fixed/getline-numeric.in
new file mode 100644
index 000000000000..52bd8e43afb0
--- /dev/null
+++ b/bugs-fixed/getline-numeric.in
@@ -0,0 +1 @@
+120
diff --git a/bugs-fixed/getline-numeric.ok b/bugs-fixed/getline-numeric.ok
new file mode 100644
index 000000000000..f7efd3db506f
--- /dev/null
+++ b/bugs-fixed/getline-numeric.ok
@@ -0,0 +1,3 @@
+120 > 50
+120 > 50
+same
diff --git a/bugs-fixed/missing-precision.awk b/bugs-fixed/missing-precision.awk
new file mode 100644
index 000000000000..4e7a74b2c964
--- /dev/null
+++ b/bugs-fixed/missing-precision.awk
@@ -0,0 +1 @@
+BEGIN { printf("%*s"); }
diff --git a/bugs-fixed/missing-precision.ok b/bugs-fixed/missing-precision.ok
new file mode 100644
index 000000000000..608b4fa48666
--- /dev/null
+++ b/bugs-fixed/missing-precision.ok
@@ -0,0 +1,2 @@
+./a.out: not enough args in printf(%*s)
+ source line number 1
diff --git a/bugs-fixed/negative-nf.awk b/bugs-fixed/negative-nf.awk
new file mode 100644
index 000000000000..6caeee4602b5
--- /dev/null
+++ b/bugs-fixed/negative-nf.awk
@@ -0,0 +1 @@
+BEGIN { NF = -5; }
diff --git a/bugs-fixed/negative-nf.ok b/bugs-fixed/negative-nf.ok
new file mode 100644
index 000000000000..71c860468cc0
--- /dev/null
+++ b/bugs-fixed/negative-nf.ok
@@ -0,0 +1,2 @@
+./a.out: cannot set NF to a negative value
+ source line number 1
diff --git a/bugs-fixed/nf-self-assign.awk b/bugs-fixed/nf-self-assign.awk
new file mode 100644
index 000000000000..6ae29eef916d
--- /dev/null
+++ b/bugs-fixed/nf-self-assign.awk
@@ -0,0 +1,6 @@
+BEGIN {
+ $0="a b c";
+ OFS=",";
+ NF = NF;
+ print;
+}
diff --git a/bugs-fixed/nf-self-assign.bad b/bugs-fixed/nf-self-assign.bad
new file mode 100644
index 000000000000..3774da60e546
--- /dev/null
+++ b/bugs-fixed/nf-self-assign.bad
@@ -0,0 +1 @@
+a b c
diff --git a/bugs-fixed/nf-self-assign.ok b/bugs-fixed/nf-self-assign.ok
new file mode 100644
index 000000000000..b2ffb02521e6
--- /dev/null
+++ b/bugs-fixed/nf-self-assign.ok
@@ -0,0 +1 @@
+a,b,c
diff --git a/bugs-fixed/numeric-fs.awk b/bugs-fixed/numeric-fs.awk
new file mode 100644
index 000000000000..01e438d4aa28
--- /dev/null
+++ b/bugs-fixed/numeric-fs.awk
@@ -0,0 +1,5 @@
+BEGIN {
+ FS = 0; split("20202", a); print a[1];
+ FS = 1; $0="31313"; print $1;
+ FS = 2; "echo 42424" | getline; print $1;
+}
diff --git a/bugs-fixed/numeric-fs.ok b/bugs-fixed/numeric-fs.ok
new file mode 100644
index 000000000000..dcf37cd5e262
--- /dev/null
+++ b/bugs-fixed/numeric-fs.ok
@@ -0,0 +1,3 @@
+2
+3
+4
diff --git a/bugs-fixed/numeric-output-seps.awk b/bugs-fixed/numeric-output-seps.awk
new file mode 100644
index 000000000000..daa0f72aa6ff
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.awk
@@ -0,0 +1,8 @@
+BEGIN {
+ $0 = "a b c";
+ OFS = 1;
+ ORS = 2;
+ NF = 2;
+ print;
+ print "d", "e";
+}
diff --git a/bugs-fixed/numeric-output-seps.bad b/bugs-fixed/numeric-output-seps.bad
new file mode 100644
index 000000000000..95310f78a7f3
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.bad
@@ -0,0 +1,2 @@
+a b
+d e
diff --git a/bugs-fixed/numeric-output-seps.ok b/bugs-fixed/numeric-output-seps.ok
new file mode 100644
index 000000000000..de6b2026e539
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.ok
@@ -0,0 +1 @@
+a1b2d1e2 \ No newline at end of file
diff --git a/bugs-fixed/numeric-rs.awk b/bugs-fixed/numeric-rs.awk
new file mode 100644
index 000000000000..cc7a0a0c08c2
--- /dev/null
+++ b/bugs-fixed/numeric-rs.awk
@@ -0,0 +1,6 @@
+BEGIN {
+ RS = 1;
+ while ("echo a1b1c1d" | getline > 0) {
+ print $1;
+ }
+}
diff --git a/bugs-fixed/numeric-rs.bad b/bugs-fixed/numeric-rs.bad
new file mode 100644
index 000000000000..2027bc6f27c9
--- /dev/null
+++ b/bugs-fixed/numeric-rs.bad
@@ -0,0 +1 @@
+a1b1c1d
diff --git a/bugs-fixed/numeric-rs.ok b/bugs-fixed/numeric-rs.ok
new file mode 100644
index 000000000000..d68dd4031d2a
--- /dev/null
+++ b/bugs-fixed/numeric-rs.ok
@@ -0,0 +1,4 @@
+a
+b
+c
+d
diff --git a/bugs-fixed/numeric-subsep.awk b/bugs-fixed/numeric-subsep.awk
new file mode 100644
index 000000000000..1252e4a99607
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.awk
@@ -0,0 +1,5 @@
+BEGIN {
+ SUBSEP = 123.456;
+ a["hello", "world"] = "foo";
+ print a["hello" SUBSEP "world"];
+}
diff --git a/bugs-fixed/numeric-subsep.bad b/bugs-fixed/numeric-subsep.bad
new file mode 100644
index 000000000000..8b137891791f
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.bad
@@ -0,0 +1 @@
+
diff --git a/bugs-fixed/numeric-subsep.ok b/bugs-fixed/numeric-subsep.ok
new file mode 100644
index 000000000000..257cc5642cb1
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.ok
@@ -0,0 +1 @@
+foo
diff --git a/bugs-fixed/ofs-rebuild.awk b/bugs-fixed/ofs-rebuild.awk
new file mode 100644
index 000000000000..dd2700031524
--- /dev/null
+++ b/bugs-fixed/ofs-rebuild.awk
@@ -0,0 +1,17 @@
+# The bug here is that nawk should use the value of OFS that
+# was current when $0 became invalid to rebuild the record.
+
+BEGIN {
+ OFS = ":"
+ $0 = "a b c d e f g"
+ $3 = "3333"
+ # Conceptually, $0 should now be "a:b:3333:d:e:f:g"
+
+ # Change OFS after (conceptually) rebuilding the record
+ OFS = "<>"
+
+ # Unmodifed nawk prints "a<>b<>3333<>d<>e<>f<>g" because
+ # it delays rebuilding $0 until it's needed, and then it uses
+ # the current value of OFS. Oops.
+ print
+}
diff --git a/bugs-fixed/ofs-rebuild.bad b/bugs-fixed/ofs-rebuild.bad
new file mode 100644
index 000000000000..7570811e2c16
--- /dev/null
+++ b/bugs-fixed/ofs-rebuild.bad
@@ -0,0 +1 @@
+a<>b<>3333<>d<>e<>f<>g
diff --git a/bugs-fixed/ofs-rebuild.ok b/bugs-fixed/ofs-rebuild.ok
new file mode 100644
index 000000000000..26892181f91b
--- /dev/null
+++ b/bugs-fixed/ofs-rebuild.ok
@@ -0,0 +1 @@
+a:b:3333:d:e:f:g
diff --git a/bugs-fixed/space.awk b/bugs-fixed/space.awk
new file mode 100644
index 000000000000..6aa87d2e6259
--- /dev/null
+++ b/bugs-fixed/space.awk
@@ -0,0 +1,22 @@
+BEGIN {
+ c[" "] = "\" \""
+ c["\a"] = "\\a"
+ c["\b"] = "\\b"
+ c["\f"] = "\\f"
+ c["\n"] = "\\n"
+ c["\r"] = "\\r"
+ c["\t"] = "\\t"
+ c["\v"] = "\\v"
+
+ sort = "LC_ALL=C sort"
+
+ for (i in c)
+ printf("%s %s [[:space:]]\n", c[i],
+ i ~ /[[:space:]]/ ? "~" : "!~") | sort
+
+ for (i in c)
+ printf("%s %s [[:blank:]]\n", c[i],
+ i ~ /[[:blank:]]/ ? "~" : "!~") | sort
+
+ close(sort)
+}
diff --git a/bugs-fixed/space.bad b/bugs-fixed/space.bad
new file mode 100644
index 000000000000..f92055fd0c26
--- /dev/null
+++ b/bugs-fixed/space.bad
@@ -0,0 +1,16 @@
+" " ~ [[:blank:]]
+" " ~ [[:space:]]
+\a !~ [[:blank:]]
+\a !~ [[:space:]]
+\b !~ [[:blank:]]
+\b !~ [[:space:]]
+\f ~ [[:blank:]]
+\f ~ [[:space:]]
+\n ~ [[:blank:]]
+\n ~ [[:space:]]
+\r ~ [[:blank:]]
+\r ~ [[:space:]]
+\t ~ [[:blank:]]
+\t ~ [[:space:]]
+\v ~ [[:blank:]]
+\v ~ [[:space:]]
diff --git a/bugs-fixed/space.ok b/bugs-fixed/space.ok
new file mode 100644
index 000000000000..4278c5c9df3b
--- /dev/null
+++ b/bugs-fixed/space.ok
@@ -0,0 +1,16 @@
+" " ~ [[:blank:]]
+" " ~ [[:space:]]
+\a !~ [[:blank:]]
+\a !~ [[:space:]]
+\b !~ [[:blank:]]
+\b !~ [[:space:]]
+\f !~ [[:blank:]]
+\f ~ [[:space:]]
+\n !~ [[:blank:]]
+\n ~ [[:space:]]
+\r !~ [[:blank:]]
+\r ~ [[:space:]]
+\t ~ [[:blank:]]
+\t ~ [[:space:]]
+\v !~ [[:blank:]]
+\v ~ [[:space:]]
diff --git a/bugs-fixed/split-fs-from-array.awk b/bugs-fixed/split-fs-from-array.awk
new file mode 100644
index 000000000000..fce1607c2a97
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.awk
@@ -0,0 +1,5 @@
+BEGIN {
+ a[1] = "elephantie"
+ a[2] = "e"
+ print split(a[1],a,a[2]), a[2], a[3], split(a[2],a,a[2])
+}
diff --git a/bugs-fixed/split-fs-from-array.ok b/bugs-fixed/split-fs-from-array.ok
new file mode 100644
index 000000000000..9402b94f4fae
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.ok
@@ -0,0 +1 @@
+4 l phanti 2
diff --git a/bugs-fixed/string-conv.awk b/bugs-fixed/string-conv.awk
new file mode 100644
index 000000000000..a1f04aba354b
--- /dev/null
+++ b/bugs-fixed/string-conv.awk
@@ -0,0 +1,13 @@
+BEGIN {
+ OFMT = ">>%.6g<<"
+ a = 12.1234
+ print "a =", a
+ b = a ""
+ print "1 ->", b
+ CONVFMT = "%2.2f"
+ b = a ""
+ print "2 ->", b
+ CONVFMT = "%.12g"
+ b = a ""
+ print "3 ->", b
+}
diff --git a/bugs-fixed/string-conv.bad b/bugs-fixed/string-conv.bad
new file mode 100644
index 000000000000..2ab95e87d0a8
--- /dev/null
+++ b/bugs-fixed/string-conv.bad
@@ -0,0 +1,4 @@
+a = >>12.1234<<
+1 -> >>12.1234<<
+2 -> >>12.1234<<
+3 -> >>12.1234<<
diff --git a/bugs-fixed/string-conv.ok b/bugs-fixed/string-conv.ok
new file mode 100644
index 000000000000..7c097113207a
--- /dev/null
+++ b/bugs-fixed/string-conv.ok
@@ -0,0 +1,4 @@
+a = >>12.1234<<
+1 -> 12.1234
+2 -> 12.12
+3 -> 12.1234
diff --git a/bugs-fixed/subsep-overflow.awk b/bugs-fixed/subsep-overflow.awk
new file mode 100644
index 000000000000..66c7c24db0e6
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.awk
@@ -0,0 +1,24 @@
+function foo(c, n) {
+ s = "";
+ for (i = 0; i < n; i++) {
+ s = s c;
+ }
+ return s;
+}
+
+BEGIN {
+ str1 = foo("a", 4500);
+ str2 = foo("b", 9000);
+
+ a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1;
+
+ for (k in a) {
+ print length(k);
+ }
+
+ print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+ print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+ delete a[(SUBSEP = str1), (SUBSEP = str2), "c"];
+ print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+ print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+}
diff --git a/bugs-fixed/subsep-overflow.ok b/bugs-fixed/subsep-overflow.ok
new file mode 100644
index 000000000000..ddbbd78707ee
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.ok
@@ -0,0 +1,5 @@
+27001
+1
+1
+0
+0
diff --git a/bugs-fixed/system-status.awk b/bugs-fixed/system-status.awk
new file mode 100644
index 000000000000..8daf563e6f4f
--- /dev/null
+++ b/bugs-fixed/system-status.awk
@@ -0,0 +1,19 @@
+# Unmodified nawk prints the 16 bit exit status divided by 256, but
+# does so using floating point arithmetic, yielding strange results.
+#
+# The fix is to use the various macros defined for wait(2) and to
+# use the signal number + 256 for death by signal, or signal number + 512
+# for death by signal with core dump.
+
+BEGIN {
+ status = system("exit 42")
+ print "normal status", status
+
+ status = system("kill -HUP $$")
+ print "death by signal status", status
+
+ status = system("kill -ABRT $$")
+ print "death by signal with core dump status", status
+
+ system("rm -f core*")
+}
diff --git a/bugs-fixed/system-status.bad b/bugs-fixed/system-status.bad
new file mode 100644
index 000000000000..a1317dba54a8
--- /dev/null
+++ b/bugs-fixed/system-status.bad
@@ -0,0 +1,3 @@
+normal status 42
+death by signal status 0.00390625
+death by signal with core dump status 0.523438
diff --git a/bugs-fixed/system-status.ok b/bugs-fixed/system-status.ok
new file mode 100644
index 000000000000..737828f5ed7a
--- /dev/null
+++ b/bugs-fixed/system-status.ok
@@ -0,0 +1,3 @@
+normal status 42
+death by signal status 257
+death by signal with core dump status 518
diff --git a/bugs-fixed/unary-plus.awk b/bugs-fixed/unary-plus.awk
new file mode 100644
index 000000000000..ba6185b96704
--- /dev/null
+++ b/bugs-fixed/unary-plus.awk
@@ -0,0 +1,4 @@
+BEGIN {
+ print +"q"
+ print +"43.12345678912345678"
+}
diff --git a/bugs-fixed/unary-plus.bad b/bugs-fixed/unary-plus.bad
new file mode 100644
index 000000000000..76f57d5d580c
--- /dev/null
+++ b/bugs-fixed/unary-plus.bad
@@ -0,0 +1,2 @@
+q
+43.12345678912345678
diff --git a/bugs-fixed/unary-plus.ok b/bugs-fixed/unary-plus.ok
new file mode 100644
index 000000000000..90f97afc5c44
--- /dev/null
+++ b/bugs-fixed/unary-plus.ok
@@ -0,0 +1,2 @@
+0
+43.1235
diff --git a/lex.c b/lex.c
index 0c65a9fe3292..ad8e878a247d 100644
--- a/lex.c
+++ b/lex.c
@@ -170,10 +170,10 @@ int reg = 0; /* 1 => return a REGEXPR now */
int yylex(void)
{
int c;
- static char *buf = NULL;
+ static char *buf = 0;
static int bufsize = 5; /* BUG: setting this small causes core dump! */
- if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
+ if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
FATAL( "out of space in yylex" );
if (sc) {
sc = 0;
@@ -198,6 +198,7 @@ int yylex(void)
yylval.i = c;
switch (c) {
case '\n': /* {EOL} */
+ lineno++;
RET(NL);
case '\r': /* assume \n is coming */
case ' ': /* {WS}+ */
@@ -213,6 +214,7 @@ int yylex(void)
case '\\':
if (peek() == '\n') {
input();
+ lineno++;
} else if (peek() == '\r') {
input(); input(); /* \n */
lineno++;
@@ -358,10 +360,10 @@ int string(void)
{
int c, n;
char *s, *bp;
- static char *buf = NULL;
+ static char *buf = 0;
static int bufsz = 500;
- if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of space for strings");
for (bp = buf; (c = input()) != '"'; ) {
if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
@@ -370,10 +372,11 @@ int string(void)
case '\n':
case '\r':
case 0:
+ *bp = '\0';
SYNTAX( "non-terminated string %.10s...", buf );
- lineno++;
if (c == 0) /* hopeless */
FATAL( "giving up" );
+ lineno++;
break;
case '\\':
c = input();
@@ -504,17 +507,18 @@ void startreg(void) /* next call to yylex will return a regular expression */
int regexpr(void)
{
int c;
- static char *buf = NULL;
+ static char *buf = 0;
static int bufsz = 500;
char *bp;
- if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
+ if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of space for rex expr");
bp = buf;
for ( ; (c = input()) != '/' && c != 0; ) {
if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
FATAL("out of space for reg expr %.10s...", buf);
if (c == '\n') {
+ *bp = '\0';
SYNTAX( "newline in regular expression %.10s...", buf );
unput('\n');
break;
@@ -539,7 +543,7 @@ char ebuf[300];
char *ep = ebuf;
char yysbuf[100]; /* pushback buffer */
char *yysptr = yysbuf;
-FILE *yyin = NULL;
+FILE *yyin = 0;
int input(void) /* get next lexical input character */
{
@@ -553,19 +557,19 @@ int input(void) /* get next lexical input character */
lexprog++;
} else /* awk -f ... */
c = pgetc();
- if (c == '\n')
- lineno++;
- else if (c == EOF)
+ if (c == EOF)
c = 0;
if (ep >= ebuf + sizeof ebuf)
ep = ebuf;
- return *ep++ = c;
+ *ep = c;
+ if (c != 0) {
+ ep++;
+ }
+ return (c);
}
void unput(int c) /* put lexical character back on input */
{
- if (c == '\n')
- lineno--;
if (yysptr >= yysbuf + sizeof(yysbuf))
FATAL("pushed back too much: %.20s...", yysbuf);
*yysptr++ = c;
diff --git a/lib.c b/lib.c
index 5eeb53d4679d..a365245a0e1a 100644
--- a/lib.c
+++ b/lib.c
@@ -59,7 +59,7 @@ void recinit(unsigned int n)
{
if ( (record = (char *) malloc(n)) == NULL
|| (fields = (char *) malloc(n+1)) == NULL
- || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL
+ || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL
|| (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
FATAL("out of space for $0 and fields");
*fldtab[0] = dollar0;
@@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
int sep, c;
char *rr, *buf = *pbuf;
int bufsize = *pbufsize;
+ char *rs = getsval(rsloc);
- if (strlen(*FS) >= sizeof(inputFS))
+ if (strlen(getsval(fsloc)) >= sizeof (inputFS))
FATAL("field separator %.10s... is too long", *FS);
/*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
strcpy(inputFS, *FS); /* for subsequent field splitting */
- if ((sep = **RS) == 0) {
+ if ((sep = *rs) == 0) {
sep = '\n';
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
;
@@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
FATAL("input record `%.30s...' too long", buf);
*rr++ = c;
}
- if (**RS == sep || c == EOF)
+ if (*rs == sep || c == EOF)
break;
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
break;
@@ -283,6 +284,8 @@ void fldbld(void) /* create fields from current record */
}
fr = fields;
i = 0; /* number of fields accumulated here */
+ if (strlen(getsval(fsloc)) >= sizeof (inputFS))
+ FATAL("field separator %.10s... is too long", *FS);
strcpy(inputFS, *FS);
if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS);
@@ -356,6 +359,7 @@ void fldbld(void) /* create fields from current record */
}
}
setfval(nfloc, (Awkfloat) lastfld);
+ donerec = 1; /* restore */
if (dbg) {
for (j = 0; j <= lastfld; j++) {
p = fldtab[j];
@@ -387,6 +391,21 @@ void newfld(int n) /* add field n after end of existing lastfld */
setfval(nfloc, (Awkfloat) n);
}
+void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
+{
+ if (n < 0)
+ FATAL("cannot set NF to a negative value");
+ if (n > nfields)
+ growfldtab(n);
+
+ if (lastfld < n)
+ cleanfld(lastfld+1, n);
+ else
+ cleanfld(n+1, lastfld);
+
+ lastfld = n;
+}
+
Cell *fieldadr(int n) /* get nth field */
{
if (n < 0)
@@ -465,6 +484,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
{
int i;
char *r, *p;
+ char *sep = getsval(ofsloc);
if (donerec == 1)
return;
@@ -476,9 +496,9 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
while ((*r = *p++) != 0)
r++;
if (i < *NF) {
- if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
+ if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
FATAL("created $0 `%.30s...' too long", record);
- for (p = *OFS; (*r = *p++) != 0; )
+ for (p = sep; (*r = *p++) != 0; )
r++;
}
}
@@ -618,6 +638,8 @@ void eprint(void) /* try to print context around error */
if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
return;
+ if (ebuf == ep)
+ return;
p = ep - 1;
if (p > ebuf && *p == '\n')
p--;
@@ -681,7 +703,7 @@ int isclvar(const char *s) /* is s of form var=something ? */
for ( ; *s; s++)
if (!(isalnum((uschar) *s) || *s == '_'))
break;
- return *s == '=' && s > os && *(s+1) != '=';
+ return *s == '=' && s > os;
}
/* strtod is supposed to be a proper test of what's a valid number */
diff --git a/main.c b/main.c
index 4b659974b056..98661fcd7829 100644
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20121220";
+const char *version = "version 20190529";
#define DEBUG
#include <stdio.h>
@@ -54,6 +54,13 @@ int curpfile = 0; /* current filename */
int safe = 0; /* 1 => "safe" mode */
+/* Can this work with recursive calls? I don't think so.
+void segvcatch(int n)
+{
+ FATAL("segfault. Do you have an unbounded recursive call?", n);
+}
+*/
+
int main(int argc, char *argv[])
{
const char *fs = NULL;
@@ -68,6 +75,7 @@ int main(int argc, char *argv[])
exit(1);
}
signal(SIGFPE, fpecatch);
+ /*signal(SIGSEGV, segvcatch); experiment */
srand_seed = 1;
srand(srand_seed);
@@ -80,7 +88,7 @@ int main(int argc, char *argv[])
exit(0);
break;
}
- if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */
+ if (strcmp(argv[1], "--") == 0) { /* explicit end of args */
argc--;
argv++;
break;
diff --git a/makefile b/makefile
index 88f992421561..3c0b62e1df9e 100644
--- a/makefile
+++ b/makefile
@@ -23,18 +23,21 @@
# ****************************************************************/
CFLAGS = -g
-CFLAGS = -O2
CFLAGS =
+CFLAGS = -O2
-CC = gcc -Wall -g -Wwrite-strings
-CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
-CC = gcc -g -Wall -pedantic
-CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
+# compiler options
+#CC = gcc -Wall -g -Wwrite-strings
+#CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
+#CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
+HOSTCC = gcc -g -Wall -pedantic
+CC = $(HOSTCC) # change this is cross-compiling.
-YACC = bison -d -y
-YACC = yacc -d -S
+# yacc options. pick one; this varies a lot by system.
#YFLAGS = -d -S
- # -S uses sprintf in yacc parser instead of sprint
+YACC = bison -d -y
+#YACC = yacc -d
+# -S uses sprintf in yacc parser instead of sprint
OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o
@@ -44,7 +47,7 @@ SOURCE = awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \
LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
lib.c run.c tran.c
-SHIP = README FIXES $(SOURCE) ytab[ch].bak makefile \
+SHIP = README LICENSE FIXES $(SOURCE) ytab[ch].bak makefile \
awk.1
a.out: ytab.o $(OFILES)
@@ -52,17 +55,23 @@ a.out: ytab.o $(OFILES)
$(OFILES): awk.h ytab.h proto.h
-ytab.o: awk.h proto.h awkgram.y
+#Clear dependency for parallel build: (make -j)
+#YACC generated y.tab.c and y.tab.h at the same time
+#this needs to be a static pattern rules otherwise multiple target
+#are mapped onto multiple executions of yacc, which overwrite
+#each others outputs.
+y%.c y%.h: awk.h proto.h awkgram.y
$(YACC) $(YFLAGS) awkgram.y
- mv y.tab.c ytab.c
- mv y.tab.h ytab.h
- $(CC) $(CFLAGS) -c ytab.c
+ mv y.$*.c y$*.c
+ mv y.$*.h y$*.h
+
+ytab.h: ytab.c
proctab.c: maketab
- ./maketab >proctab.c
+ ./maketab ytab.h >proctab.c
maketab: ytab.h maketab.c
- $(CC) $(CFLAGS) maketab.c -o maketab
+ $(HOSTCC) $(CFLAGS) maketab.c -o maketab
bundle:
@cp ytab.h ytabh.bak
@@ -79,8 +88,22 @@ tar:
@zip awk.zip $(SHIP)
ls -l awk.zip
+gitadd:
+ git add README LICENSE FIXES \
+ awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
+ lib.c run.c tran.c \
+ makefile awk.1 awktest.tar
+
+gitpush:
+ # only do this once:
+ # git remote add origin https://github.com/onetrueawk/awk.git
+ git push -u origin master
+
names:
@echo $(LISTING)
clean:
rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda # proctab.c
+
+cleaner:
+ rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda proctab.c ytab*
diff --git a/maketab.c b/maketab.c
index b59e81592fdb..dbe3d241fcc8 100644
--- a/maketab.c
+++ b/maketab.c
@@ -62,6 +62,7 @@ struct xx
{ DIVIDE, "arith", " / " },
{ MOD, "arith", " % " },
{ UMINUS, "arith", " -" },
+ { UPLUS, "arith", " +" },
{ POWER, "arith", " **" },
{ PREINCR, "incrdecr", "++" },
{ POSTINCR, "incrdecr", "++" },
@@ -124,8 +125,12 @@ int main(int argc, char *argv[])
for (i = SIZE; --i >= 0; )
names[i] = "";
- if ((fp = fopen("ytab.h", "r")) == NULL) {
- fprintf(stderr, "maketab can't open ytab.h!\n");
+ if (argc != 2) {
+ fprintf(stderr, "usage: maketab YTAB_H\n");
+ exit(1);
+ }
+ if ((fp = fopen(argv[1], "r")) == NULL) {
+ fprintf(stderr, "maketab can't open %s!\n", argv[1]);
exit(1);
}
printf("static char *printname[%d] = {\n", SIZE);
@@ -134,6 +139,8 @@ int main(int argc, char *argv[])
n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */
continue;
+ if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0)
+ continue;
if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
/* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */
continue;
@@ -149,7 +156,7 @@ int main(int argc, char *argv[])
table[p->token-FIRSTTOKEN] = p->name;
printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
for (i=0; i<SIZE; i++)
- if (table[i]==NULL)
+ if (table[i]==0)
printf("\tnullproc,\t/* %s */\n", names[i]);
else
printf("\t%s,\t/* %s */\n", table[i], names[i]);
diff --git a/parse.c b/parse.c
index 753a50def1fb..8304ded837ba 100644
--- a/parse.c
+++ b/parse.c
@@ -259,7 +259,7 @@ int isarg(const char *s) /* is s in argument list for current function? */
Node *p = arglist;
int n;
- for (n = 0; p != NULL; p = p->nnext, n++)
+ for (n = 0; p != 0; p = p->nnext, n++)
if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0)
return n;
return -1;
diff --git a/proctab.c b/proctab.c
new file mode 100644
index 000000000000..ff212c416c3a
--- /dev/null
+++ b/proctab.c
@@ -0,0 +1,209 @@
+#include <stdio.h>
+#include "awk.h"
+#include "ytab.h"
+
+static char *printname[94] = {
+ (char *) "FIRSTTOKEN", /* 258 */
+ (char *) "PROGRAM", /* 259 */
+ (char *) "PASTAT", /* 260 */
+ (char *) "PASTAT2", /* 261 */
+ (char *) "XBEGIN", /* 262 */
+ (char *) "XEND", /* 263 */
+ (char *) "NL", /* 264 */
+ (char *) "ARRAY", /* 265 */
+ (char *) "MATCH", /* 266 */
+ (char *) "NOTMATCH", /* 267 */
+ (char *) "MATCHOP", /* 268 */
+ (char *) "FINAL", /* 269 */
+ (char *) "DOT", /* 270 */
+ (char *) "ALL", /* 271 */
+ (char *) "CCL", /* 272 */
+ (char *) "NCCL", /* 273 */
+ (char *) "CHAR", /* 274 */
+ (char *) "OR", /* 275 */
+ (char *) "STAR", /* 276 */
+ (char *) "QUEST", /* 277 */
+ (char *) "PLUS", /* 278 */
+ (char *) "EMPTYRE", /* 279 */
+ (char *) "AND", /* 280 */
+ (char *) "BOR", /* 281 */
+ (char *) "APPEND", /* 282 */
+ (char *) "EQ", /* 283 */
+ (char *) "GE", /* 284 */
+ (char *) "GT", /* 285 */
+ (char *) "LE", /* 286 */
+ (char *) "LT", /* 287 */
+ (char *) "NE", /* 288 */
+ (char *) "IN", /* 289 */
+ (char *) "ARG", /* 290 */
+ (char *) "BLTIN", /* 291 */
+ (char *) "BREAK", /* 292 */
+ (char *) "CLOSE", /* 293 */
+ (char *) "CONTINUE", /* 294 */
+ (char *) "DELETE", /* 295 */
+ (char *) "DO", /* 296 */
+ (char *) "EXIT", /* 297 */
+ (char *) "FOR", /* 298 */
+ (char *) "FUNC", /* 299 */
+ (char *) "SUB", /* 300 */
+ (char *) "GSUB", /* 301 */
+ (char *) "IF", /* 302 */
+ (char *) "INDEX", /* 303 */
+ (char *) "LSUBSTR", /* 304 */
+ (char *) "MATCHFCN", /* 305 */
+ (char *) "NEXT", /* 306 */
+ (char *) "NEXTFILE", /* 307 */
+ (char *) "ADD", /* 308 */
+ (char *) "MINUS", /* 309 */
+ (char *) "MULT", /* 310 */
+ (char *) "DIVIDE", /* 311 */
+ (char *) "MOD", /* 312 */
+ (char *) "ASSIGN", /* 313 */
+ (char *) "ASGNOP", /* 314 */
+ (char *) "ADDEQ", /* 315 */
+ (char *) "SUBEQ", /* 316 */
+ (char *) "MULTEQ", /* 317 */
+ (char *) "DIVEQ", /* 318 */
+ (char *) "MODEQ", /* 319 */
+ (char *) "POWEQ", /* 320 */
+ (char *) "PRINT", /* 321 */
+ (char *) "PRINTF", /* 322 */
+ (char *) "SPRINTF", /* 323 */
+ (char *) "ELSE", /* 324 */
+ (char *) "INTEST", /* 325 */
+ (char *) "CONDEXPR", /* 326 */
+ (char *) "POSTINCR", /* 327 */
+ (char *) "PREINCR", /* 328 */
+ (char *) "POSTDECR", /* 329 */
+ (char *) "PREDECR", /* 330 */
+ (char *) "VAR", /* 331 */
+ (char *) "IVAR", /* 332 */
+ (char *) "VARNF", /* 333 */
+ (char *) "CALL", /* 334 */
+ (char *) "NUMBER", /* 335 */
+ (char *) "STRING", /* 336 */
+ (char *) "REGEXPR", /* 337 */
+ (char *) "GETLINE", /* 338 */
+ (char *) "RETURN", /* 339 */
+ (char *) "SPLIT", /* 340 */
+ (char *) "SUBSTR", /* 341 */
+ (char *) "WHILE", /* 342 */
+ (char *) "CAT", /* 343 */
+ (char *) "NOT", /* 344 */
+ (char *) "UMINUS", /* 345 */
+ (char *) "UPLUS", /* 346 */
+ (char *) "POWER", /* 347 */
+ (char *) "DECR", /* 348 */
+ (char *) "INCR", /* 349 */
+ (char *) "INDIRECT", /* 350 */
+ (char *) "LASTTOKEN", /* 351 */
+};
+
+
+Cell *(*proctab[94])(Node **, int) = {
+ nullproc, /* FIRSTTOKEN */
+ program, /* PROGRAM */
+ pastat, /* PASTAT */
+ dopa2, /* PASTAT2 */
+ nullproc, /* XBEGIN */
+ nullproc, /* XEND */
+ nullproc, /* NL */
+ array, /* ARRAY */
+ matchop, /* MATCH */
+ matchop, /* NOTMATCH */
+ nullproc, /* MATCHOP */
+ nullproc, /* FINAL */
+ nullproc, /* DOT */
+ nullproc, /* ALL */
+ nullproc, /* CCL */
+ nullproc, /* NCCL */
+ nullproc, /* CHAR */
+ nullproc, /* OR */
+ nullproc, /* STAR */
+ nullproc, /* QUEST */
+ nullproc, /* PLUS */
+ nullproc, /* EMPTYRE */
+ boolop, /* AND */
+ boolop, /* BOR */
+ nullproc, /* APPEND */
+ relop, /* EQ */
+ relop, /* GE */
+ relop, /* GT */
+ relop, /* LE */
+ relop, /* LT */
+ relop, /* NE */
+ instat, /* IN */
+ arg, /* ARG */
+ bltin, /* BLTIN */
+ jump, /* BREAK */
+ closefile, /* CLOSE */
+ jump, /* CONTINUE */
+ awkdelete, /* DELETE */
+ dostat, /* DO */
+ jump, /* EXIT */
+ forstat, /* FOR */
+ nullproc, /* FUNC */
+ sub, /* SUB */
+ gsub, /* GSUB */
+ ifstat, /* IF */
+ sindex, /* INDEX */
+ nullproc, /* LSUBSTR */
+ matchop, /* MATCHFCN */
+ jump, /* NEXT */
+ jump, /* NEXTFILE */
+ arith, /* ADD */
+ arith, /* MINUS */
+ arith, /* MULT */
+ arith, /* DIVIDE */
+ arith, /* MOD */
+ assign, /* ASSIGN */
+ nullproc, /* ASGNOP */
+ assign, /* ADDEQ */
+ assign, /* SUBEQ */
+ assign, /* MULTEQ */
+ assign, /* DIVEQ */
+ assign, /* MODEQ */
+ assign, /* POWEQ */
+ printstat, /* PRINT */
+ awkprintf, /* PRINTF */
+ awksprintf, /* SPRINTF */
+ nullproc, /* ELSE */
+ intest, /* INTEST */
+ condexpr, /* CONDEXPR */
+ incrdecr, /* POSTINCR */
+ incrdecr, /* PREINCR */
+ incrdecr, /* POSTDECR */
+ incrdecr, /* PREDECR */
+ nullproc, /* VAR */
+ nullproc, /* IVAR */
+ getnf, /* VARNF */
+ call, /* CALL */
+ nullproc, /* NUMBER */
+ nullproc, /* STRING */
+ nullproc, /* REGEXPR */
+ awkgetline, /* GETLINE */
+ jump, /* RETURN */
+ split, /* SPLIT */
+ substr, /* SUBSTR */
+ whilestat, /* WHILE */
+ cat, /* CAT */
+ boolop, /* NOT */
+ arith, /* UMINUS */
+ arith, /* UPLUS */
+ arith, /* POWER */
+ nullproc, /* DECR */
+ nullproc, /* INCR */
+ indirect, /* INDIRECT */
+ nullproc, /* LASTTOKEN */
+};
+
+char *tokname(int n)
+{
+ static char buf[100];
+
+ if (n < FIRSTTOKEN || n > LASTTOKEN) {
+ sprintf(buf, "token %d", n);
+ return buf;
+ }
+ return printname[n-FIRSTTOKEN];
+}
diff --git a/proto.h b/proto.h
index 9a657ef73ec1..ad6f2e80a594 100644
--- a/proto.h
+++ b/proto.h
@@ -124,6 +124,7 @@ extern void setclvar(char *);
extern void fldbld(void);
extern void cleanfld(int, int);
extern void newfld(int);
+extern void setlastfld(int);
extern int refldbld(const char *, const char *);
extern void recbld(void);
extern Cell *fieldadr(int);
@@ -193,3 +194,5 @@ extern Cell *gsub(Node **, int);
extern FILE *popen(const char *, const char *);
extern int pclose(FILE *);
+
+extern const char *flags2str(int flags);
diff --git a/run.c b/run.c
index 5342fe0d0f8b..2dfb3e6c383d 100644
--- a/run.c
+++ b/run.c
@@ -31,6 +31,8 @@ THIS SOFTWARE.
#include <string.h>
#include <stdlib.h>
#include <time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include "awk.h"
#include "ytab.h"
@@ -71,23 +73,23 @@ extern Awkfloat srand_seed;
Node *winner = NULL; /* root of parse tree */
Cell *tmps; /* free temporary cells for execution */
-static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM };
+static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL };
Cell *True = &truecell;
-static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM };
+static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL };
Cell *False = &falsecell;
-static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM };
+static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL };
Cell *jbreak = &breakcell;
-static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM };
+static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL };
Cell *jcont = &contcell;
-static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM };
+static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL };
Cell *jnext = &nextcell;
-static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM };
+static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL };
Cell *jnextfile = &nextfilecell;
-static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM };
+static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL };
Cell *jexit = &exitcell;
-static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM };
+static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL };
Cell *jret = &retcell;
-static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE };
+static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
Node *curnode = NULL; /* the node being executed, for debugging */
@@ -112,7 +114,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
if (rminlen)
minlen += quantum - rminlen;
tbuf = (char *) realloc(*pbuf, minlen);
- dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
+ dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void *) *pbuf, (void *) tbuf) );
if (tbuf == NULL) {
if (whatrtn)
FATAL("out of memory in %s", whatrtn);
@@ -221,7 +223,7 @@ struct Frame *fp = NULL; /* frame pointer. bottom level unused */
Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
{
- static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE };
+ static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
int i, ncall, ndef;
int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
Node *x;
@@ -323,14 +325,18 @@ Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
{
Cell *y;
+ /* copy is not constant or field */
+
y = gettemp();
+ y->tval = x->tval & ~(CON|FLD|REC);
y->csub = CCOPY; /* prevents freeing until call is over */
y->nval = x->nval; /* BUG? */
- if (isstr(x))
+ if (isstr(x) /* || x->ctype == OCELL */) {
y->sval = tostring(x->sval);
+ y->tval &= ~DONTFREE;
+ } else
+ y->tval |= DONTFREE;
y->fval = x->fval;
- y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */
- /* is DONTFREE right? */
return y;
}
@@ -419,6 +425,10 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
} else if (a[0] != NULL) { /* getline var <file */
x = execute(a[0]);
setsval(x, buf);
+ if (is_number(x->sval)) {
+ x->fval = atof(x->sval);
+ x->tval |= NUM;
+ }
tempfree(x);
} else { /* getline <file */
setsval(fldtab[0], buf);
@@ -434,6 +444,10 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
n = getrec(&buf, &bufsize, 0);
x = execute(a[0]);
setsval(x, buf);
+ if (is_number(x->sval)) {
+ x->fval = atof(x->sval);
+ x->tval |= NUM;
+ }
tempfree(x);
}
}
@@ -456,7 +470,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
Node *np;
char *buf;
int bufsz = recsize;
- int nsub = strlen(*SUBSEP);
+ int nsub;
if ((buf = (char *) malloc(bufsz)) == NULL)
FATAL("out of memory in array");
@@ -466,6 +480,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
for (np = a[1]; np; np = np->nnext) {
y = execute(np); /* subscript */
s = getsval(y);
+ nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
FATAL("out of memory for %s[%s...]", x->nval, buf);
strcat(buf, s);
@@ -494,12 +509,12 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
Cell *x, *y;
Node *np;
char *s;
- int nsub = strlen(*SUBSEP);
+ int nsub;
x = execute(a[0]); /* Cell* for symbol table */
if (!isarr(x))
return True;
- if (a[1] == NULL) { /* delete the elements, not the table */
+ if (a[1] == 0) { /* delete the elements, not the table */
freesymtab(x);
x->tval &= ~STR;
x->tval |= ARR;
@@ -513,9 +528,10 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts *
for (np = a[1]; np; np = np->nnext) {
y = execute(np); /* subscript */
s = getsval(y);
+ nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf);
- strcat(buf, s);
+ strcat(buf, s);
if (np->nnext)
strcat(buf, *SUBSEP);
tempfree(y);
@@ -534,7 +550,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
char *buf;
char *s;
int bufsz = recsize;
- int nsub = strlen(*SUBSEP);
+ int nsub;
ap = execute(a[1]); /* array name */
if (!isarr(ap)) {
@@ -552,6 +568,7 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
for (p = a[0]; p; p = p->nnext) {
x = execute(p); /* expr */
s = getsval(x);
+ nsub = strlen(getsval(subseploc));
if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
FATAL("out of memory deleting %s[%s...]", x->nval, buf);
strcat(buf, s);
@@ -583,7 +600,7 @@ Cell *matchop(Node **a, int n) /* ~ and match() */
}
x = execute(a[1]); /* a[1] = target text */
s = getsval(x);
- if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
+ if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */
i = (*mf)((fa *) a[2], s);
else {
y = execute(a[2]); /* a[2] = regular expr */
@@ -699,7 +716,7 @@ Cell *gettemp(void) /* get a tempcell */
FATAL("out of space for temporaries");
for(i = 1; i < 100; i++)
tmps[i-1].cnext = &tmps[i];
- tmps[i-1].cnext = NULL;
+ tmps[i-1].cnext = 0;
}
x = tmps;
tmps = x->cnext;
@@ -734,18 +751,18 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
int k, m, n;
char *s;
int temp;
- Cell *x, *y, *z = NULL;
+ Cell *x, *y, *z = 0;
x = execute(a[0]);
y = execute(a[1]);
- if (a[2] != NULL)
+ if (a[2] != 0)
z = execute(a[2]);
s = getsval(x);
k = strlen(s) + 1;
if (k <= 1) {
tempfree(x);
tempfree(y);
- if (a[2] != NULL) {
+ if (a[2] != 0) {
tempfree(z);
}
x = gettemp();
@@ -758,7 +775,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
else if (m > k)
m = k;
tempfree(y);
- if (a[2] != NULL) {
+ if (a[2] != 0) {
n = (int) getfval(z);
tempfree(z);
} else
@@ -817,6 +834,17 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
char *buf = *pbuf;
int bufsize = *pbufsize;
+ static int first = 1;
+ static int have_a_format = 0;
+
+ if (first) {
+ char buf[100];
+
+ sprintf(buf, "%a", 42.0);
+ have_a_format = (strcmp(buf, "0x1.5p+5") == 0);
+ first = 0;
+ }
+
os = s;
p = buf;
if ((fmt = (char *) malloc(fmtsz)) == NULL)
@@ -842,7 +870,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
FATAL("format item %.30s... ran format() out of memory", os);
if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L')
break; /* the ansi panoply */
+ if (*s == '$') {
+ FATAL("'$' not permitted in awk formats");
+ }
if (*s == '*') {
+ if (a == NULL) {
+ FATAL("not enough args in printf(%s)", os);
+ }
x = execute(a);
a = a->nnext;
sprintf(t-1, "%d", fmtwd=(int) getfval(x));
@@ -857,8 +891,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
if (fmtwd < 0)
fmtwd = -fmtwd;
adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
-
switch (*s) {
+ case 'a': case 'A':
+ if (have_a_format)
+ flag = *s;
+ else
+ flag = 'f';
+ break;
case 'f': case 'e': case 'g': case 'E': case 'G':
flag = 'f';
break;
@@ -901,6 +940,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
p += strlen(p);
sprintf(p, "%s", t);
break;
+ case 'a':
+ case 'A':
case 'f': sprintf(p, fmt, getfval(x)); break;
case 'd': sprintf(p, fmt, (long) getfval(x)); break;
case 'u': sprintf(p, fmt, (int) getfval(x)); break;
@@ -1003,7 +1044,7 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
x = execute(a[0]);
i = getfval(x);
tempfree(x);
- if (n != UMINUS) {
+ if (n != UMINUS && n != UPLUS) {
y = execute(a[1]);
j = getfval(y);
tempfree(y);
@@ -1033,6 +1074,8 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
case UMINUS:
i = -i;
break;
+ case UPLUS: /* handled by getfval(), above */
+ break;
case POWER:
if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
i = ipow(i, (int) j);
@@ -1088,8 +1131,8 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
y = execute(a[1]);
x = execute(a[0]);
if (n == ASSIGN) { /* ordinary assignment */
- if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */
- ; /* leave alone unless it's a field */
+ if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
+ ; /* self-assignment: leave alone unless it's a field or NF */
else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
setsval(x, getsval(y));
x->fval = getfval(y);
@@ -1146,25 +1189,26 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */
{
Cell *x, *y, *z;
int n1, n2;
- char *s;
+ char *s = NULL;
+ int ssz = 0;
x = execute(a[0]);
+ n1 = strlen(getsval(x));
+ adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
+ (void) strncpy(s, x->sval, ssz);
+
y = execute(a[1]);
- getsval(x);
- getsval(y);
- n1 = strlen(x->sval);
- n2 = strlen(y->sval);
- s = (char *) malloc(n1 + n2 + 1);
- if (s == NULL)
- FATAL("out of space concatenating %.15s... and %.15s...",
- x->sval, y->sval);
- strcpy(s, x->sval);
- strcpy(s+n1, y->sval);
+ n2 = strlen(getsval(y));
+ adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
+ (void) strncpy(s + n1, y->sval, ssz - n1);
+
tempfree(x);
tempfree(y);
+
z = gettemp();
z->sval = s;
z->tval = STR;
+
return(z);
}
@@ -1172,7 +1216,7 @@ Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
{
Cell *x;
- if (a[0] == NULL)
+ if (a[0] == 0)
x = execute(a[1]);
else {
x = execute(a[0]);
@@ -1209,20 +1253,22 @@ Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
{
- Cell *x = NULL, *y, *ap;
+ Cell *x = 0, *y, *ap;
char *s, *origs;
+ char *fs, *origfs = NULL;
int sep;
- char *t, temp, num[50], *fs = NULL;
+ char *t, temp, num[50];
int n, tempstat, arg3type;
y = execute(a[0]); /* source string */
origs = s = strdup(getsval(y));
arg3type = ptoi(a[3]);
- if (a[2] == NULL) /* fs string */
- fs = *FS;
+ if (a[2] == 0) /* fs string */
+ fs = getsval(fsloc);
else if (arg3type == STRING) { /* split(str,arr,"string") */
x = execute(a[2]);
- fs = getsval(x);
+ origfs = fs = strdup(getsval(x));
+ tempfree(x);
} else if (arg3type == REGEXPR)
fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
else
@@ -1337,9 +1383,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
tempfree(ap);
tempfree(y);
free(origs);
- if (a[2] != NULL && arg3type == STRING) {
- tempfree(x);
- }
+ free(origfs);
x = gettemp();
x->tval = NUM;
x->fval = n;
@@ -1369,7 +1413,7 @@ Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
if (istrue(x)) {
tempfree(x);
x = execute(a[1]);
- } else if (a[2] != NULL) {
+ } else if (a[2] != 0) {
tempfree(x);
x = execute(a[2]);
}
@@ -1421,7 +1465,7 @@ Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
x = execute(a[0]);
tempfree(x);
for (;;) {
- if (a[1]!=NULL) {
+ if (a[1]!=0) {
x = execute(a[1]);
if (!istrue(x)) return(x);
else tempfree(x);
@@ -1479,6 +1523,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
Node *nextarg;
FILE *fp;
void flush_all(void);
+ int status = 0;
t = ptoi(a[0]);
x = execute(a[1]);
@@ -1503,7 +1548,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
case FCOS:
u = cos(getfval(x)); break;
case FATAN:
- if (nextarg == NULL) {
+ if (nextarg == 0) {
WARNING("atan2 requires two arguments; returning 1.0");
u = 1.0;
} else {
@@ -1515,7 +1560,20 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
break;
case FSYSTEM:
fflush(stdout); /* in case something is buffered already */
- u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */
+ status = system(getsval(x));
+ u = status;
+ if (status != -1) {
+ if (WIFEXITED(status)) {
+ u = WEXITSTATUS(status);
+ } else if (WIFSIGNALED(status)) {
+ u = WTERMSIG(status) + 256;
+#ifdef WCOREDUMP
+ if (WCOREDUMP(status))
+ u += 256;
+#endif
+ } else /* something else?!? */
+ u = 0;
+ }
break;
case FRAND:
/* in principle, rand() returns something in 0..RAND_MAX */
@@ -1564,7 +1622,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
tempfree(x);
x = gettemp();
setfval(x, u);
- if (nextarg != NULL) {
+ if (nextarg != 0) {
WARNING("warning: function has too many arguments");
for ( ; nextarg; nextarg = nextarg->nnext)
execute(nextarg);
@@ -1578,7 +1636,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */
Cell *y;
FILE *fp;
- if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
+ if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
fp = stdout;
else
fp = redirect(ptoi(a[1]), a[2]);
@@ -1587,11 +1645,11 @@ Cell *printstat(Node **a, int n) /* print a[0] */
fputs(getpssval(y), fp);
tempfree(y);
if (x->nnext == NULL)
- fputs(*ORS, fp);
+ fputs(getsval(orsloc), fp);
else
- fputs(*OFS, fp);
+ fputs(getsval(ofsloc), fp);
}
- if (a[1] != NULL)
+ if (a[1] != 0)
fflush(fp);
if (ferror(fp))
FATAL("write error on %s", filename(fp));
@@ -1600,8 +1658,6 @@ Cell *printstat(Node **a, int n) /* print a[0] */
Cell *nullproc(Node **a, int n)
{
- n = n;
- a = a;
return 0;
}
@@ -1650,7 +1706,7 @@ FILE *openfile(int a, const char *us)
{
const char *s = us;
int i, m;
- FILE *fp = NULL;
+ FILE *fp = 0;
if (*s == '\0')
FATAL("null file name in print or getline");
@@ -1665,7 +1721,7 @@ FILE *openfile(int a, const char *us)
return NULL;
for (i=0; i < nfiles; i++)
- if (files[i].fp == NULL)
+ if (files[i].fp == 0)
break;
if (i >= nfiles) {
struct files *nf;
@@ -1715,7 +1771,6 @@ Cell *closefile(Node **a, int n)
Cell *x;
int i, stat;
- n = n;
x = execute(a[0]);
getsval(x);
stat = -1;
@@ -1782,7 +1837,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */
FATAL("out of memory in sub");
x = execute(a[3]); /* target string */
t = getsval(x);
- if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
+ if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
@@ -1822,7 +1877,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */
if (pb > buf + bufsz)
FATAL("sub result2 %.30s too big; can't happen", buf);
setsval(x, buf); /* BUG: should be able to avoid copy */
- result = True;
+ result = True;;
}
tempfree(x);
tempfree(y);
@@ -1845,7 +1900,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */
num = 0;
x = execute(a[3]); /* target string */
t = getsval(x);
- if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
+ if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
diff --git a/tran.c b/tran.c
index e364ebd81928..d1dfe2b2f176 100644
--- a/tran.c
+++ b/tran.c
@@ -55,10 +55,14 @@ Cell *fsloc; /* FS */
Cell *nrloc; /* NR */
Cell *nfloc; /* NF */
Cell *fnrloc; /* FNR */
+Cell *ofsloc; /* OFS */
+Cell *orsloc; /* ORS */
+Cell *rsloc; /* RS */
Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *rstartloc; /* RSTART */
Cell *rlengthloc; /* RLENGTH */
+Cell *subseploc; /* SUBSEP */
Cell *symtabloc; /* SYMTAB */
Cell *nullloc; /* a guaranteed empty cell */
@@ -67,6 +71,18 @@ Cell *literal0;
extern Cell **fldtab;
+static void
+setfree(Cell *vp)
+{
+ if (&vp->sval == FS || &vp->sval == RS ||
+ &vp->sval == OFS || &vp->sval == ORS ||
+ &vp->sval == OFMT || &vp->sval == CONVFMT ||
+ &vp->sval == FILENAME || &vp->sval == SUBSEP)
+ vp->tval |= DONTFREE;
+ else
+ vp->tval &= ~DONTFREE;
+}
+
void syminit(void) /* initialize symbol table with builtin vars */
{
literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
@@ -76,9 +92,12 @@ void syminit(void) /* initialize symbol table with builtin vars */
fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
FS = &fsloc->sval;
- RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
- OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
- ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
+ rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
+ RS = &rsloc->sval;
+ ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
+ OFS = &ofsloc->sval;
+ orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
+ ORS = &orsloc->sval;
OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
@@ -88,7 +107,8 @@ void syminit(void) /* initialize symbol table with builtin vars */
NR = &nrloc->fval;
fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
FNR = &fnrloc->fval;
- SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
+ subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
+ SUBSEP = &subseploc->sval;
rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
RSTART = &rstartloc->fval;
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
@@ -174,7 +194,7 @@ void freesymtab(Cell *ap) /* free a symbol table */
free(cp);
tp->nelem--;
}
- tp->tab[i] = NULL;
+ tp->tab[i] = 0;
}
if (tp->nelem != 0)
WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
@@ -282,6 +302,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
{
int fldno;
+ f += 0.0; /* normalise negative zero to positive zero */
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
if (isfld(vp)) {
@@ -290,13 +311,21 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
if (fldno > *NF)
newfld(fldno);
dprintf( ("setting field %d to %g\n", fldno, f) );
+ } else if (&vp->fval == NF) {
+ donerec = 0; /* mark $0 invalid */
+ setlastfld(f);
+ dprintf( ("setting NF to %g\n", f) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
+ } else if (vp == ofsloc) {
+ if (donerec == 0)
+ recbld();
}
if (freeable(vp))
xfree(vp->sval); /* free any previous string */
- vp->tval &= ~STR; /* mark string invalid */
+ vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
+ vp->fmt = NULL;
vp->tval |= NUM; /* mark number ok */
if (f == -0) /* who would have thought this possible? */
f = 0;
@@ -318,6 +347,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
{
char *t;
int fldno;
+ Awkfloat f;
dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
@@ -328,20 +358,32 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
- dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
+ dprintf( ("setting field %d to %s (%p)\n", fldno, s, (void *) s) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
+ } else if (vp == ofsloc) {
+ if (donerec == 0)
+ recbld();
}
- t = tostring(s); /* in case it's self-assign */
+ t = s ? tostring(s) : tostring(""); /* in case it's self-assign */
if (freeable(vp))
xfree(vp->sval);
- vp->tval &= ~NUM;
+ vp->tval &= ~(NUM|CONVC|CONVO);
vp->tval |= STR;
- vp->tval &= ~DONTFREE;
+ vp->fmt = NULL;
+ setfree(vp);
dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
- (void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
- return(vp->sval = t);
+ (void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) );
+ vp->sval = t;
+ if (&vp->fval == NF) {
+ donerec = 0; /* mark $0 invalid */
+ f = getfval(vp);
+ setlastfld(f);
+ dprintf( ("setting NF to %g\n", f) );
+ }
+
+ return(vp->sval);
}
Awkfloat getfval(Cell *vp) /* get float val of a Cell */
@@ -364,7 +406,7 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */
static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
{
- char s[100]; /* BUG: unchecked */
+ char s[256];
double dtemp;
if ((vp->tval & (NUM | STR)) == 0)
@@ -373,19 +415,80 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel
fldbld();
else if (isrec(vp) && donerec == 0)
recbld();
+
+ /*
+ * ADR: This is complicated and more fragile than is desirable.
+ * Retrieving a string value for a number associates the string
+ * value with the scalar. Previously, the string value was
+ * sticky, meaning if converted via OFMT that became the value
+ * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
+ * changed after a string value was retrieved, the original value
+ * was maintained and used. Also not per POSIX.
+ *
+ * We work around this design by adding two additional flags,
+ * CONVC and CONVO, indicating how the string value was
+ * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
+ * of the pointer to the xFMT format string used for the
+ * conversion. This pointer is only read, **never** dereferenced.
+ * The next time we do a conversion, if it's coming from the same
+ * xFMT as last time, and the pointer value is different, we
+ * know that the xFMT format string changed, and we need to
+ * redo the conversion. If it's the same, we don't have to.
+ *
+ * There are also several cases where we don't do a conversion,
+ * such as for a field (see the checks below).
+ */
+
+ /* Don't duplicate the code for actually updating the value */
+#define update_str_val(vp) \
+ { \
+ if (freeable(vp)) \
+ xfree(vp->sval); \
+ if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
+ snprintf(s, sizeof (s), "%.30g", vp->fval); \
+ else \
+ snprintf(s, sizeof (s), *fmt, vp->fval); \
+ vp->sval = tostring(s); \
+ vp->tval &= ~DONTFREE; \
+ vp->tval |= STR; \
+ }
+
if (isstr(vp) == 0) {
- if (freeable(vp))
- xfree(vp->sval);
- if (modf(vp->fval, &dtemp) == 0) /* it's integral */
- sprintf(s, "%.30g", vp->fval);
- else
- sprintf(s, *fmt, vp->fval);
- vp->sval = tostring(s);
- vp->tval &= ~DONTFREE;
- vp->tval |= STR;
+ update_str_val(vp);
+ if (fmt == OFMT) {
+ vp->tval &= ~CONVC;
+ vp->tval |= CONVO;
+ } else {
+ /* CONVFMT */
+ vp->tval &= ~CONVO;
+ vp->tval |= CONVC;
+ }
+ vp->fmt = *fmt;
+ } else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) {
+ goto done;
+ } else if (isstr(vp)) {
+ if (fmt == OFMT) {
+ if ((vp->tval & CONVC) != 0
+ || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
+ update_str_val(vp);
+ vp->tval &= ~CONVC;
+ vp->tval |= CONVO;
+ vp->fmt = *fmt;
+ }
+ } else {
+ /* CONVFMT */
+ if ((vp->tval & CONVO) != 0
+ || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
+ update_str_val(vp);
+ vp->tval &= ~CONVO;
+ vp->tval |= CONVC;
+ vp->fmt = *fmt;
+ }
+ }
}
+done:
dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
- (void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
+ (void*)vp, NN(vp->nval), vp->sval, (void *) vp->sval, vp->tval) );
return(vp->sval);
}
@@ -457,3 +560,37 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */
*bp++ = 0;
return (char *) buf;
}
+
+const char *flags2str(int flags)
+{
+ static const struct ftab {
+ const char *name;
+ int value;
+ } flagtab[] = {
+ { "NUM", NUM },
+ { "STR", STR },
+ { "DONTFREE", DONTFREE },
+ { "CON", CON },
+ { "ARR", ARR },
+ { "FCN", FCN },
+ { "FLD", FLD },
+ { "REC", REC },
+ { "CONVC", CONVC },
+ { "CONVO", CONVO },
+ { NULL, 0 }
+ };
+ static char buf[100];
+ int i;
+ char *cp = buf;
+
+ for (i = 0; flagtab[i].name != NULL; i++) {
+ if ((flags & flagtab[i].value) != 0) {
+ if (cp > buf)
+ *cp++ = '|';
+ strcpy(cp, flagtab[i].name);
+ cp += strlen(cp);
+ }
+ }
+
+ return buf;
+}