72 files changed, 1723 insertions, 218 deletions
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 000000000000..fd03b2bbca0b
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,245 @@
+2019-05-29         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* lib.c (isclvar): Remove check for additional '=' after
+	first one. No longer needed.
+
+2019-01-26         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* main.c (version): Updated.
+
+2019-01-25         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* run.c (awkgetline): Check for numeric value in all getline
+	variants. See the numeric-getline.* files in bugs-fixed directory.
+
+2018-08-29         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* REGRESS: Check for existence of a.out. If not there, run
+	make.  Enable core dumps for T.arnold system status test
+	to work on MacOS X.
+
+2018-08-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* awktest.tar (testdir/T.expr): Fix test for unary plus.
+
+2018-08-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* REGRESS: Extract tests if necessary, set PATH to include '.'.
+	* regdir/beebe.tar (Makefile): Fix longwrds test to prefix
+	sort with LC_ALL=C.
+	* awktest.tar: Updated from fixed test suite, directory
+	it extracts is now called 'testdir' to match what's in top-level
+	REGRESS script.
+	* regdir: Removed, as Brian wants to keep the test suite in
+	the tar file.
+
+2018-08-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* FIXES, lib.c, run.c, makefile, main.c: Merge from Brian's tree.
+	* REGRESS: New file, from Brian.
+	* awktest.tar: Restored from Brian's tree.
+
+2018-08-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* awkgram.y (UPLUS): New token. In the grammar, call op1()
+	with it.
+	* maketab.c (proc): Add entry for UPLUS.
+	* run.c (arith): Handle UPLUS.
+	* main.c (version): Updated.
+	* bugs-fixed/unary-plus.awk, bugs-fixed/unary-plus.bad,
+	bugs-fixed/unary-plus.ok: New files.
+
+2018-08-10         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* TODO: Updated.
+	* awk.1: Improve use of macros, add some additional explanation
+	in a few places, alphabetize list of variables.
+
+2018-08-08         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* awk.h (Cell): Add new field `fmt' to track xFMT value used
+	for a string conversion.
+	[CONVC, CONVO]: New flag macros.
+	* bugs-fixed/README: Updated.
+	* bugs-fixed/string-conv.awk, bugs-fixed/string-conv.bad,
+	bugs-fixed/string-conv.ok: New files.
+	* main.c (version): Updated.
+	* proto.h (flags2str): Add declaration.
+	* tran.c (setfval): Clear CONVC and CONVO flags and set vp->fmt
+	to NULL.
+	(setsval): Ditto. Add large comment and new code to manage
+	correct conversion of number to string based on various flags
+	and the value of vp->fmt. The idea is to not convert again
+	if xFMT is the same as before and we're doing the same conversion.
+	Otherwise, clear the old flags, set the new, and reconvert.
+	(flags2str): New function. For debug prints and for use from a debugger.
+
+2018-08-05         Arnold D. Robbins     <arnold@skeeve.com>
+
+	Fix filename conflicts in regdir where the only difference was
+	in letter case. This caused problems on Windows systems.
+
+	* regdir/Compare.T1: Renamed from regdir/Compare.T.
+	* regdir/t.delete0: Renamed from regdir/t.delete.
+	* regdir/t.getline1: Renamed from regdir/t.getline.
+	* regdir/t.redir1: Renamed from regdir/t.redir.
+	* regdir/t.split1: Renamed from regdir/t.split.
+	* regdir/t.sub0: Renamed from regdir/t.sub.
+	* regdir/REGRESS: Adjusted.
+
+2018-08-04         Arnold D. Robbins     <arnold@skeeve.com>
+
+	With scalpel, tweasers, magnifying glass and bated breath,
+	borrow code from the NetBSD version of nawk to fix the years-old
+	bug whereby decrementing the value of NF did not change the
+	record.
+
+	* lib.c (fldbld): Set donerec to 1 when done.
+	(setlastfld): New function.
+	* proto.h (setlastfld): Add declaration.
+	* run.c (copycell): Make code smarter about flags (from NetBSD code).
+	* tran.c (setfree): New function.
+	* tran.c (setfval): Normalize negative zero to positive zero.
+	If setting NF, clear donerec and call setlastfld().
+	(setsval): Remove call to save_old_OFS().  If setting OFS, call
+	recbld(). If setting NF, clear donerec and call setlastfld().
+
+	As part of the process, revert OFS-related changes of 2018-05-22:
+
+	* awk.h (saveOFS, saveOFSlen, save_old_OFS): Remove declarations.
+	* lib.c (recbld): Use *OFS instead of saveOFS.
+	* run.c (saveOFS, saveOFSlen, save_old_OFS): Remove.
+	* tran.c (syminit): Remove initialization of saveOFS and saveOFSlen.
+
+	General stuff that goes along with all this:
+
+	* bugs-fixed/README: Updated.
+	* bugs-fixed/decr-NF.awk, bugs-fixed/decr-NF.bad,
+	bugs-fixed/decr-NF.ok: New files.
+	* main.c (version): Updated.
+	* regdir/README.TESTS: Fix awk book title.
+	* regdir/T.misc: Revise test to match fixed code.
+	* run.c (format): Increase size of buffer used for %a test. (Unrelated
+	to NF or OFS, but fixes a compiler complaint.)
+
+2018-06-07         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* regdir/beebe.tar: Fix longwrds.ok so that the test will pass.
+	The file was incorrectly sorted.
+
+2018-06-06         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* regdir/T.lilly: Fix the bug again in the second instance
+	of the code. Thanks to BWK for pointing this out.
+
+2018-05-31         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* regdir/T.lilly: Fix a syntax error and ordering bug
+	in creating the 'foo' file.
+
+2018-05-23         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* awk.1: Remove standalone 'awk' at the top of file, it messed up
+	the formatting. Arrange built-in variable list in alphabetical
+	order.
+
+2018-05-23         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* main.c (version): Add my email address and a date so that
+	users can tell this isn't straight BWK awk.
+	* README.md: Minor updates.
+	* TODO: Updated.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	Add POSIX-required formats %a and %A.
+
+	* run.c (format): Check for %a support in C library. If there,
+	allow %a and %A as valid formats.
+	* TODO: Updated.
+	* bugs-fixed/README: Updated.
+	* bugs-fixed/a-format.awk, bugs-fixed/a-format.bad,
+	bugs-fixed/a-format.ok: New files.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* FIXES: Restored a line from a much earlier version that
+	apparently got lost when the dates were reordered.
+	* TODO: Updated.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* README.md: New file.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* regdir/echo.c, regdir/time.c: Minor fixes to compile without
+	warning on current GCC / Linux.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* TODO: New file.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* makefile (gitadd, gitpush): Remove these targets. They
+	should not be automated and were incorrect for things that
+	would be done regularly.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	Fix nawk so that [[:blank:]] only matches space and tab instead
+	of any whitespace character, originally made May 10, 2018.
+	See bugs-fixed/space.awk.
+
+	This appears to have been a thinko on Brian's part.
+
+	* b.c (charclasses): Use xisblank() function for [[:blank:]].
+	* bugs-fixed/README: Updated.
+	* bugs-fixed/space.awk, bugs-fixed/space.bad,
+	bugs-fixed/space.ok: New files.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* .gitignore: New file.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	Fix nawk to provide reasonable exit status for system(),
+	a la gawk, originally made March 12, 2016. See
+	bugs-fixed/system-status.awk.
+
+	* run.c (bltin): For FSYSTEM, use the macros defined for wait(2)
+	to produce a reasonable exit value, instead of doing a floating-point
+	division by 256.
+	* awk.1: Document the return status values.
+	* bugs-fixed/README: Updated.
+	* bugs-fixed/system-status.awk, bugs-fixed/system-status.bad,
+	bugs-fixed/system-status.ok: New files.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	Bug fix with respect to rebuilding a record, originally
+	made August 19, 2014. See bugs-fixed/ofs-rebuild.awk.
+
+	* awk.h (saveOFS, saveOFSlen): Declare new variables.
+	* lib.c (recbld): Use them when rebuilding the record.
+	* run.c (saveOFS, saveOFSlen): Define new variables.
+	(save_old_OFS): New function to save OFS aside.
+	* tran.c (syminit): Initialize saveOFS and saveOFSlen.
+	(setsval): If setting a field, call save_old_OFS().
+	* bugs-fixed/README, bugs-fixed/ofs-rebuild.awk,
+	bugs-fixed/ofs-rebuild.bad, bugs-fixed/ofs-rebuild.ok: New files.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* makefile (YACC): Use bison.
+
+2018-05-22         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* ChangeLog: Created.
+	* regdir: Created. Based on contents of awktest.a.
+	* .gitattributes: Created, to preserve CR LF in regdir/t.crlf.
+	* awktest.a: Removed.
+	* regdir/T.gawk, regdir/T.latin1: Updated from awktest.tar.
+	* awktest.tar: Removed.
diff --git a/FIXES b/FIXES
index c78aabc511f3..183eaedee47d 100644
--- a/FIXES
+++ b/FIXES
@@ -25,6 +25,113 @@ THIS SOFTWARE.
 This file lists all bug fixes, changes, etc., made since the AWK book
 was sent to the printers in August, 1987.
 
+May 29,2019:
+	Fix check for command line arguments to no longer require that
+	first character after '=' not be another '='. Reverts change of
+	August 11, 1989. Thanks to GitHub user Jamie Landeg Jones for
+	pointing out the issue; from Issue #38.
+
+Apr 7, 2019:
+	Update awktest.tar(p.50) to use modern options to sort. Needed
+	for Android development. Thanks to GitHub user mohd-akram (Mohamed
+	Akram).  From Comment #33.
+
+Mar 12, 2019:
+	Added very simplistic support for cross-compiling in the
+	makefile.  We are NOT going to go in the direction of the
+	autotools, though.  Thanks to GitHub user nee-san for
+	the basic change. (Merged from PR #34.)
+
+Mar 5, 2019:
+	Added support for POSIX-standard interval expressions (a.k.a.
+	bounds, a.k.a. repetition expressions) in regular expressions,
+	backported (via NetBSD) from Apple awk-24 (20070501).
+	Thanks to Martijn Dekker <martijn@inlv.org> for the port.
+	(Merged from PR #30.)
+
+Mar 3, 2019:
+	Merge PRs as follows:
+	#12: Avoid undefined behaviour when using ctype(3) functions in
+	     relex(). Thanks to GitHub user iamleot.
+	#31: Make getline handle numeric strings, and update FIXES. Thanks
+	     to GitHub user arnoldrobbins
+	#32: maketab: support build systems with read-only source. Thanks
+	     to GitHub user enh.
+
+Jan 25, 2019:
+	Make getline handle numeric strings properly in all cases.
+	(Thanks, Arnold.)
+
+Jan 21, 2019:
+	Merged a number of small fixes from GitHub pull requests.
+	Thanks to GitHub users Arnold Robbins (arnoldrobbins),
+	Cody Mello (melloc) and Christoph Junghans (junghans).
+	PR numbers: 13-21, 23, 24, 27.
+
+Oct 25, 2018:
+	Added test in maketab.c to prevent generating a proctab entry
+	for YYSTYPE_IS_DEFINED.  It was harmless but some gcc settings
+	generated a warning message.  Thanks to Nan Xiao for report.
+
+Aug 27, 2018:
+	Disallow '$' in printf formats; arguments evaluated in order
+	and printed in order.
+
+	Added some casts to silence warnings on debugging printfs.
+	(Thanks, Arnold.)
+
+Aug 23, 2018:
+        A long list of fixes courtesy of Arnold Robbins,
+        to whom profound thanks.
+
+        1. ofs-rebuild: OFS value used to rebuild the record was incorrect.
+        Fixed August 19, 2014. Revised fix August 2018.
+
+        2. system-status: Instead of a floating-point division by 256, use
+        the wait(2) macros to create a reasonable exit status.
+        Fixed March 12, 2016.
+
+        3. space: Use provided xisblank() function instead of ispace() for
+        matching [[:blank:]].
+
+        4. a-format: Add POSIX standard %a and %A to supported formats. Check
+        at runtime that this format is available.
+
+        5. decr-NF: Decrementing NF did not change $0. This is a decades-old
+        bug. There are interactions with the old and new value of OFS as well.
+        Most of the fix came from the NetBSD awk.
+
+        6. string-conv: String conversions of scalars were sticky.  Once a
+        conversion to string happened, even with OFMT, that value was used until
+        a new numeric value was assigned, even if OFMT differed from CONVFMT,
+        and also if CONVFMT changed.
+
+        7. unary-plus: Unary plus on a string constant returned the string.
+        Instead, it should convert the value to numeric and give that value.
+
+	Also added Arnold's tests for these to awktest.tar as T.arnold.
+
+Aug 15, 2018:
+	fixed mangled awktest.tar (thanks, Arnold), posted all
+	current (very minor) fixes to github / onetrueawk
+
+Jun 7, 2018:
+	(yes, a long layoff)
+	Updated some broken tests (beebe.tar, T.lilly)
+	[thanks to Arnold Robbins]
+
+Mar 26, 2015:
+	buffer overflow in error reporting; thanks to tobias ulmer
+	and john-mark gurney for spotting it and the fix.
+
+Feb 4, 2013:
+	cleaned up a handful of tests that didn't seem to actually
+	test for correct behavior: T.latin1, T.gawk.
+
+Jan 5, 2013:
+	added ,NULL initializer to static Cells in run.c; not really
+	needed but cleaner.  Thanks to Michael Bombardieri.
+
 Dec 20, 2012:
 	fiddled makefile to get correct yacc and bison flags.  pick yacc
 	(linux) or bison (mac) as necessary.
@@ -493,6 +600,8 @@ May 12, 1998:
 Mar 12, 1998:
 	added -V to print version number and die.
 
+[notify dave kerns, dkerns@dacsoup.ih.lucent.com]
+
 Feb 11, 1998:
 	subtle silent bug in lex.c: if the program ended with a number
 	longer than 1 digit, part of the input would be pushed back and
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000000..07dfd7b73b11
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,23 @@
+/****************************************************************
+Copyright (C) Lucent Technologies 1997
+All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that the copyright notice and this
+permission notice and warranty disclaimer appear in supporting
+documentation, and that the name Lucent Technologies or any of
+its entities not be used in advertising or publicity pertaining
+to distribution of the software without specific, written prior
+permission.
+
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+****************************************************************/
diff --git a/REGRESS b/REGRESS
new file mode 100755
index 000000000000..7d3ded69d536
--- /dev/null
+++ b/REGRESS
@@ -0,0 +1,35 @@
+#! /bin/sh
+
+case `uname` in
+CYGWIN)	EXE=a.exe ;;
+*)	EXE=a.out ;;
+esac
+
+if [ ! -f $EXE ]
+then
+	make || exit 1
+fi
+
+if [ -d testdir ]
+then
+	true	# do nothing
+elif [ -f awktest.tar ]
+then
+	echo extracting testdir
+	tar -xpf awktest.tar
+else
+	echo $0: No testdir directory and no awktest.tar to extract it from! >&2
+	exit 1
+fi
+
+cd testdir
+pwd
+PATH=.:$PATH
+export PATH
+if (ulimit -c unlimited > /dev/null 2>&1)
+then
+	# Workaround broken default on MacOS X
+	ulimit -c unlimited
+fi
+
+REGRESS
diff --git a/awk.1 b/awk.1
index 6119613c1aae..18e99ad39496 100644
--- a/awk.1
+++ b/awk.1
@@ -7,7 +7,6 @@
 .fi
 .ft 1
 ..
-awk
 .TH AWK 1
 .CT 1 files prog_other
 .SH NAME
@@ -36,7 +35,7 @@ awk \- pattern-directed scanning and processing language
 scans each input
 .I file
 for lines that match any of a set of patterns specified literally in
-.IR prog
+.I prog
 or in one or more files
 specified as
 .B \-f
@@ -53,7 +52,7 @@ The file name
 .B \-
 means the standard input.
 Any
-.IR file
+.I file
 of the form
 .I var=value
 is treated as an assignment, not a filename,
@@ -70,12 +69,12 @@ any number of
 options may be present.
 The
 .B \-F
-.IR fs
+.I fs
 option defines the input field separator to be the regular expression
-.IR fs.
+.IR fs .
 .PP
 An input line is normally made up of fields separated by white space,
-or by regular expression
+or by the regular expression
 .BR FS .
 The fields are denoted
 .BR $1 ,
@@ -87,7 +86,7 @@ If
 .BR FS
 is null, the input line is split into one field per character.
 .PP
-A pattern-action statement has the form
+A pattern-action statement has the form:
 .IP
 .IB pattern " { " action " }
 .PP
@@ -101,7 +100,7 @@ An action is a sequence of statements.
 A statement can be one of the following:
 .PP
 .EX
-.ta \w'\f(CWdelete array[expression]'u
+.ta \w'\f(CWdelete array[expression]\fR'u
 .RS
 .nf
 .ft CW
@@ -145,7 +144,7 @@ The operators
 are also available in expressions.
 Variables may be scalars, array elements
 (denoted
-.IB x  [ i ] )
+.IB x  [ i ] \fR)
 or fields.
 Variables are initialized to the null string.
 Array subscripts may be any string,
@@ -161,11 +160,11 @@ The
 .B print
 statement prints its arguments on the standard output
 (or on a file if
-.BI > file
+.BI > " file
 or
-.BI >> file
+.BI >> " file
 is present or on a pipe if
-.BI | cmd
+.BI | " cmd
 is present), separated by the current output field separator,
 and terminated by the output record separator.
 .I file
@@ -176,9 +175,10 @@ identical string values in different statements denote
 the same open file.
 The
 .B printf
-statement formats its expression list according to the format
+statement formats its expression list according to the
+.I format
 (see
-.IR printf (3)) .
+.IR printf (3)).
 The built-in function
 .BI close( expr )
 closes the file or pipe
@@ -189,13 +189,13 @@ flushes any buffered output for the file or pipe
 .IR expr .
 .PP
 The mathematical functions
+.BR atan2 ,
+.BR cos ,
 .BR exp ,
 .BR log ,
-.BR sqrt ,
 .BR sin ,
-.BR cos ,
 and
-.BR atan2 
+.B sqrt
 are built in.
 Other built-in functions:
 .TF length
@@ -203,7 +203,8 @@ Other built-in functions:
 .B length
 the length of its argument
 taken as a string,
-or of
+number of elements in an array for an array argument,
+or length of
 .B $0
 if no argument.
 .TP
@@ -218,14 +219,18 @@ and returns the previous seed.
 .B int
 truncates to an integer value
 .TP
-.BI substr( s , " m" , " n\fB)
+\fBsubstr(\fIs\fB, \fIm\fR [\fB, \fIn\^\fR]\fB)\fR
 the
 .IR n -character
 substring of
 .I s
 that begins at position
-.IR m 
+.I m 
 counted from 1.
+If no
+.IR m ,
+use the rest of the string
+.I 
 .TP
 .BI index( s , " t" )
 the position in
@@ -246,14 +251,14 @@ and
 .B RLENGTH
 are set to the position and length of the matched string.
 .TP
-.BI split( s , " a" , " fs\fB)
+\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIfs\^\fR]\fB)\fR
 splits the string
 .I s
 into array elements
-.IB a [1] ,
-.IB a [2] ,
+.IB a [1] \fR,
+.IB a [2] \fR,
 \&...,
-.IB a [ n ] ,
+.IB a [ n ] \fR,
 and returns
 .IR n .
 The separation is done with the regular expression
@@ -266,7 +271,7 @@ is not given.
 An empty string as field separator splits the string
 into one array element per character.
 .TP
-.BI sub( r , " t" , " s\fB)
+\fBsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB)
 substitutes
 .I t
 for the first occurrence of the regular expression
@@ -279,7 +284,7 @@ is not given,
 .B $0
 is used.
 .TP
-.B gsub
+\fBgsub(\fIr\fB, \fIt \fR[, \fIs\^\fR]\fB)
 same as
 .B sub
 except that all occurrences of the regular expression
@@ -289,18 +294,28 @@ and
 .B gsub
 return the number of replacements.
 .TP
-.BI sprintf( fmt , " expr" , " ...\fB )
+.BI sprintf( fmt , " expr" , " ...\fB)
 the string resulting from formatting
 .I expr ...
 according to the
 .IR printf (3)
 format
-.I fmt
+.IR fmt .
 .TP
 .BI system( cmd )
 executes
 .I cmd
-and returns its exit status
+and returns its exit status. This will be \-1 upon error,
+.IR cmd 's
+exit status upon a normal exit,
+256 + 
+.I sig
+upon death-by-signal, where
+.I sig
+is the number of the murdering signal,
+or 512 +
+.I sig
+if there was a core dump.
 .TP
 .BI tolower( str )
 returns a copy of
@@ -321,7 +336,7 @@ sets
 .B $0
 to the next input record from the current input file;
 .B getline
-.BI < file
+.BI < " file
 sets
 .B $0
 to the next record from
@@ -359,7 +374,7 @@ Isolated regular expressions
 in a pattern apply to the entire line.
 Regular expressions may also occur in
 relational expressions, using the operators
-.BR ~
+.B ~
 and
 .BR !~ .
 .BI / re /
@@ -383,8 +398,12 @@ A relational expression is one of the following:
 .br
 .BI ( expr , expr,... ") in " array-name
 .PP
-where a relop is any of the six relational operators in C,
-and a matchop is either
+where a
+.I relop
+is any of the six relational operators in C,
+and a
+.I matchop
+is either
 .B ~
 (matches)
 or
@@ -405,57 +424,68 @@ and after the last.
 and
 .B END
 do not combine with other patterns.
+They may appear multiple times in a program and execute
+in the order they are read by
+.IR awk .
 .PP
 Variable names with special meanings:
 .TF FILENAME
 .TP
+.B ARGC
+argument count, assignable.
+.TP
+.B ARGV
+argument array, assignable;
+non-null members are taken as filenames.
+.TP
 .B CONVFMT
 conversion format used when converting numbers
 (default
-.BR "%.6g" )
+.BR "%.6g" ).
+.TP
+.B ENVIRON
+array of environment variables; subscripts are names.
+.TP
+.B FILENAME
+the name of the current input file.
+.TP
+.B FNR
+ordinal number of the current record in the current file.
 .TP
 .B FS
 regular expression used to separate fields; also settable
 by option
-.BI \-F fs.
+.BI \-F fs\fR.
 .TP
 .BR NF
-number of fields in the current record
+number of fields in the current record.
 .TP
 .B NR
-ordinal number of the current record
-.TP
-.B FNR
-ordinal number of the current record in the current file
-.TP
-.B FILENAME
-the name of the current input file
+ordinal number of the current record.
 .TP
-.B RS
-input record separator (default newline)
+.B OFMT
+output format for numbers (default
+.BR "%.6g" ).
 .TP
 .B OFS
-output field separator (default blank)
+output field separator (default space).
 .TP
 .B ORS
-output record separator (default newline)
+output record separator (default newline).
 .TP
-.B OFMT
-output format for numbers (default
-.BR "%.6g" )
-.TP
-.B SUBSEP
-separates multiple subscripts (default 034)
+.B RLENGTH
+the length of a string matched by
+.BR match .
 .TP
-.B ARGC
-argument count, assignable
+.B RS
+input record separator (default newline).
 .TP
-.B ARGV
-argument array, assignable;
-non-null members are taken as filenames
+.B RSTART
+the start position of a string matched by
+.BR match .
 .TP
-.B ENVIRON
-array of environment variables; subscripts are names.
+.B SUBSEP
+separates multiple subscripts (default 034).
 .PD
 .PP
 Functions may be defined (at the position of a pattern-action statement) thus:
@@ -486,7 +516,7 @@ BEGIN { FS = ",[ \et]*|[ \et]+" }
 .EE
 .ns
 .IP
-Same, with input fields separated by comma and/or blanks and tabs.
+Same, with input fields separated by comma and/or spaces and tabs.
 .PP
 .EX
 .nf
@@ -512,13 +542,13 @@ BEGIN	{	# Simulate echo(1)
 .fi
 .EE
 .SH SEE ALSO
+.IR grep (1), 
 .IR lex (1), 
 .IR sed (1)
 .br
 A. V. Aho, B. W. Kernighan, P. J. Weinberger,
-.I
-The AWK Programming Language,
-Addison-Wesley, 1988.  ISBN 0-201-07981-X
+.IR "The AWK Programming Language" ,
+Addison-Wesley, 1988.  ISBN 0-201-07981-X.
 .SH BUGS
 There are no explicit conversions between numbers and strings.
 To force an expression to be treated as a number add 0 to it;
@@ -527,3 +557,5 @@ to force it to be treated as a string concatenate
 .br
 The scope rules for variables in functions are a botch;
 the syntax is worse.
+.br
+Only eight-bit characters sets are handled correctly.
diff --git a/awk.h b/awk.h
index a36cdb151e75..ddf246687969 100644
--- a/awk.h
+++ b/awk.h
@@ -81,7 +81,8 @@ typedef struct Cell {
 	char	*nval;		/* name, for variables only */
 	char	*sval;		/* string value */
 	Awkfloat fval;		/* value as number */
-	int	 tval;		/* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */
+	int	 tval;		/* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */
+	char	*fmt;		/* CONVFMT/OFMT value used to convert from number */
 	struct Cell *cnext;	/* ptr to next if chained */
 } Cell;
 
@@ -96,9 +97,14 @@ extern Array	*symtab;
 
 extern Cell	*nrloc;		/* NR */
 extern Cell	*fnrloc;	/* FNR */
+extern Cell	*fsloc;		/* FS */
 extern Cell	*nfloc;		/* NF */
+extern Cell	*ofsloc;	/* OFS */
+extern Cell	*orsloc;	/* ORS */
+extern Cell	*rsloc;		/* RS */
 extern Cell	*rstartloc;	/* RSTART */
 extern Cell	*rlengthloc;	/* RLENGTH */
+extern Cell	*subseploc;	/* SUBSEP */
 
 /* Cell.tval values: */
 #define	NUM	01	/* number value is valid */
@@ -109,6 +115,8 @@ extern Cell	*rlengthloc;	/* RLENGTH */
 #define	FCN	040	/* this is a function name */
 #define FLD	0100	/* this is a field $1, $2, ... */
 #define	REC	0200	/* this is $0 */
+#define CONVC	0400	/* string was converted from number via CONVFMT */
+#define CONVO	01000	/* string was converted from number via OFMT */
 
 
 /* function types */
diff --git a/awkgram.y b/awkgram.y
index 5b5c461b3eed..e4abeeddcb6a 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -86,7 +86,7 @@ Node	*arglist = 0;	/* list of args for current function */
 %left	CAT
 %left	'+' '-'
 %left	'*' '/' '%'
-%left	NOT UMINUS
+%left	NOT UMINUS UPLUS
 %right	POWER
 %right	DECR INCR
 %left	INDIRECT
@@ -357,7 +357,7 @@ term:
 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
-	| '+' term %prec UMINUS		{ $$ = $2; }
+	| '+' term %prec UMINUS		{ $$ = op1(UPLUS, $2); }
 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
diff --git a/b.c b/b.c
index 5ccb4b1e5d0f..37ea0a5bb2a7 100644
--- a/b.c
+++ b/b.c
@@ -27,6 +27,7 @@ THIS SOFTWARE.
 #define	DEBUG
 
 #include <ctype.h>
+#include <limits.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -65,6 +66,11 @@ int	rlxval;
 static uschar	*rlxstr;
 static uschar	*prestr;	/* current position in current re */
 static uschar	*lastre;	/* origin of last re */
+static uschar	*lastatom;	/* origin of last Atom */
+static uschar	*starttok;
+static uschar 	*basestr;	/* starts with original, replaced during
+				   repetition processing */
+static uschar 	*firstbasestr;
 
 static	int setcnt;
 static	int poscnt;
@@ -82,11 +88,11 @@ fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
 	fa *pfa;
 	static int now = 1;
 
-	if (setvec == NULL) {	/* first time through any RE */
+	if (setvec == 0) {	/* first time through any RE */
 		maxsetvec = MAXLIN;
 		setvec = (int *) malloc(maxsetvec * sizeof(int));
 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
-		if (setvec == NULL || tmpset == NULL)
+		if (setvec == 0 || tmpset == 0)
 			overflo("out of space initializing makedfa");
 	}
 
@@ -124,6 +130,8 @@ fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
 	Node *p, *p1;
 	fa *f;
 
+	firstbasestr = (uschar *) s;
+	basestr = firstbasestr;
 	p = reparse(s);
 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
 		/* put ALL STAR in front of reg.  exp. */
@@ -137,7 +145,7 @@ fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
 	cfoll(f, p1);	/* set up follow sets */
 	freetr(p1);
-	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
+	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
 			overflo("out of space in makedfa");
 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
 		overflo("out of space in makedfa");
@@ -145,6 +153,10 @@ fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
 	f->initstat = makeinit(f, anchor);
 	f->anchor = anchor;
 	f->restr = (uschar *) tostring(s);
+	if (firstbasestr != basestr) {
+		if (basestr)
+			xfree(basestr);
+	}
 	return f;
 }
 
@@ -157,7 +169,7 @@ int makeinit(fa *f, int anchor)
 	f->reset = 0;
 	k = *(f->re[0].lfollow);
 	xfree(f->posns[2]);			
-	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
+	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
 		overflo("out of space in makeinit");
 	for (i=0; i <= k; i++) {
 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
@@ -290,11 +302,11 @@ char *cclenter(const char *argp)	/* add a character class */
 	int i, c, c2;
 	uschar *p = (uschar *) argp;
 	uschar *op, *bp;
-	static uschar *buf = NULL;
+	static uschar *buf = 0;
 	static int bufsz = 100;
 
 	op = p;
-	if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
+	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
 		FATAL("out of space for character class [%.10s...] 1", p);
 	bp = buf;
 	for (i = 0; (c = *p++) != 0; ) {
@@ -350,14 +362,14 @@ void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfo
 			maxsetvec *= 4;
 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
-			if (setvec == NULL || tmpset == NULL)
+			if (setvec == 0 || tmpset == 0)
 				overflo("out of space in cfoll()");
 		}
 		for (i = 0; i <= f->accept; i++)
 			setvec[i] = 0;
 		setcnt = 0;
 		follow(v);	/* computes setvec and setcnt */
-		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
+		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
 			overflo("out of space building follow set");
 		f->re[info(v)].lfollow = p;
 		*p = setcnt;
@@ -391,7 +403,7 @@ int first(Node *p)	/* collects initially active leaves of p into setvec */
 			maxsetvec *= 4;
 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
-			if (setvec == NULL || tmpset == NULL)
+			if (setvec == 0 || tmpset == 0)
 				overflo("out of space in first()");
 		}
 		if (type(p) == EMPTYRE) {
@@ -531,7 +543,7 @@ int pmatch(fa *f, const char *p0)	/* longest match, for sub */
 			for (i = 2; i <= f->curstat; i++)
 				xfree(f->posns[i]);
 			k = *f->posns[0];			
-			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
+			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
 				overflo("out of space in pmatch");
 			for (i = 0; i <= k; i++)
 				(f->posns[2])[i] = (f->posns[0])[i];
@@ -588,7 +600,7 @@ int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
 			for (i = 2; i <= f->curstat; i++)
 				xfree(f->posns[i]);
 			k = *f->posns[0];			
-			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
+			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
 				overflo("out of state space");
 			for (i = 0; i <= k; i++)
 				(f->posns[2])[i] = (f->posns[0])[i];
@@ -628,9 +640,11 @@ Node *regexp(void)	/* top-level parse of reg expr */
 Node *primary(void)
 {
 	Node *np;
+	int savelastatom;
 
 	switch (rtok) {
 	case CHAR:
+		lastatom = starttok;
 		np = op2(CHAR, NIL, itonp(rlxval));
 		rtok = relex();
 		return (unary(np));
@@ -639,16 +653,19 @@ Node *primary(void)
 		return (unary(op2(ALL, NIL, NIL)));
 	case EMPTYRE:
 		rtok = relex();
-		return (unary(op2(ALL, NIL, NIL)));
+		return (unary(op2(EMPTYRE, NIL, NIL)));
 	case DOT:
+		lastatom = starttok;
 		rtok = relex();
 		return (unary(op2(DOT, NIL, NIL)));
 	case CCL:
 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
+		lastatom = starttok;
 		rtok = relex();
 		return (unary(np));
 	case NCCL:
 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
+		lastatom = starttok;
 		rtok = relex();
 		return (unary(np));
 	case '^':
@@ -658,6 +675,8 @@ Node *primary(void)
 		rtok = relex();
 		return (unary(op2(CHAR, NIL, NIL)));
 	case '(':
+		lastatom = starttok;
+		savelastatom = starttok - basestr; /* Retain over recursion */
 		rtok = relex();
 		if (rtok == ')') {	/* special pleading for () */
 			rtok = relex();
@@ -665,6 +684,7 @@ Node *primary(void)
 		}
 		np = regexp();
 		if (rtok == ')') {
+			lastatom = basestr + savelastatom; /* Restore */
 			rtok = relex();
 			return (unary(np));
 		}
@@ -679,8 +699,12 @@ Node *primary(void)
 Node *concat(Node *np)
 {
 	switch (rtok) {
-	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
+	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
 		return (concat(op2(CAT, np, primary())));
+	case EMPTYRE:
+		rtok = relex();
+		return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
+				primary())));
 	}
 	return (np);
 }
@@ -749,7 +773,7 @@ struct charclass {
 	{ "alnum",	5,	isalnum },
 	{ "alpha",	5,	isalpha },
 #ifndef HAS_ISBLANK
-	{ "blank",	5,	isspace }, /* was isblank */
+	{ "blank",	5,	xisblank },
 #else
 	{ "blank",	5,	isblank },
 #endif
@@ -765,16 +789,132 @@ struct charclass {
 	{ NULL,		0,	NULL },
 };
 
+#define REPEAT_SIMPLE		0
+#define REPEAT_PLUS_APPENDED	1
+#define REPEAT_WITH_Q		2
+#define REPEAT_ZERO		3
+
+static int
+replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
+	       int atomlen, int firstnum, int secondnum, int special_case)
+{
+	int i, j;
+	uschar *buf = 0;
+	int ret = 1;
+	int init_q = (firstnum==0);		/* first added char will be ? */
+	int n_q_reps = secondnum-firstnum;	/* m>n, so reduce until {1,m-n} left  */
+	int prefix_length = reptok - basestr;	/* prefix includes first rep	*/
+	int suffix_length = strlen((char *) reptok) - reptoklen;	/* string after rep specifier	*/
+	int size = prefix_length +  suffix_length;
+
+	if (firstnum > 1) {	/* add room for reps 2 through firstnum */
+		size += atomlen*(firstnum-1);
+	}
+
+	/* Adjust size of buffer for special cases */
+	if (special_case == REPEAT_PLUS_APPENDED) {
+		size++;		/* for the final + */
+	} else if (special_case == REPEAT_WITH_Q) {
+		size += init_q + (atomlen+1)* n_q_reps;
+	} else if (special_case == REPEAT_ZERO) {
+		size += 2;	/* just a null ERE: () */
+	}
+	if ((buf = (uschar *) malloc(size+1)) == NULL)
+		FATAL("out of space in reg expr %.10s..", lastre);
+	memcpy(buf, basestr, prefix_length);	/* copy prefix	*/
+	j = prefix_length;
+	if (special_case == REPEAT_ZERO) {
+		j -= atomlen;
+		buf[j++] = '(';
+		buf[j++] = ')';
+	}
+	for (i=1; i < firstnum; i++) {		/* copy x reps 	*/
+		memcpy(&buf[j], atom, atomlen);
+		j += atomlen;
+	}
+	if (special_case == REPEAT_PLUS_APPENDED) {
+		buf[j++] = '+';
+	} else if (special_case == REPEAT_WITH_Q) {
+		if (init_q) buf[j++] = '?';
+		for (i=0; i < n_q_reps; i++) {	/* copy x? reps */
+			memcpy(&buf[j], atom, atomlen);
+			j += atomlen;
+			buf[j++] = '?';
+		}
+	}
+	memcpy(&buf[j], reptok+reptoklen, suffix_length);
+	if (special_case == REPEAT_ZERO) {
+		buf[j+suffix_length] = '\0';
+	} else {
+		buf[size] = '\0';
+	}
+	/* free old basestr */
+	if (firstbasestr != basestr) {
+		if (basestr)
+			xfree(basestr);
+	}
+	basestr = buf;
+	prestr  = buf + prefix_length;
+	if (special_case == REPEAT_ZERO) {
+		prestr  -= atomlen;
+		ret++;
+	}
+	return ret;
+}
+
+static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
+		  int atomlen, int firstnum, int secondnum)
+{
+	/*
+	   In general, the repetition specifier or "bound" is replaced here
+	   by an equivalent ERE string, repeating the immediately previous atom
+	   and appending ? and + as needed. Note that the first copy of the
+	   atom is left in place, except in the special_case of a zero-repeat
+	   (i.e., {0}).
+	 */
+	if (secondnum < 0) {	/* means {n,} -> repeat n-1 times followed by PLUS */
+		if (firstnum < 2) {
+			/* 0 or 1: should be handled before you get here */
+			FATAL("internal error");
+		} else {
+			return replace_repeat(reptok, reptoklen, atom, atomlen,
+				firstnum, secondnum, REPEAT_PLUS_APPENDED);
+		}
+	} else if (firstnum == secondnum) {	/* {n} or {n,n} -> simply repeat n-1 times */
+		if (firstnum == 0) {	/* {0} or {0,0} */
+			/* This case is unusual because the resulting
+			   replacement string might actually be SMALLER than
+			   the original ERE */
+			return replace_repeat(reptok, reptoklen, atom, atomlen,
+					firstnum, secondnum, REPEAT_ZERO);
+		} else {		/* (firstnum >= 1) */
+			return replace_repeat(reptok, reptoklen, atom, atomlen,
+					firstnum, secondnum, REPEAT_SIMPLE);
+		}
+	} else if (firstnum < secondnum) {	/* {n,m} -> repeat n-1 times then alternate  */
+		/*  x{n,m}  =>  xx...x{1, m-n+1}  =>  xx...x?x?x?..x?	*/
+		return replace_repeat(reptok, reptoklen, atom, atomlen,
+					firstnum, secondnum, REPEAT_WITH_Q);
+	} else {	/* Error - shouldn't be here (n>m) */
+		FATAL("internal error");
+	}
+	return 0;
+}
 
 int relex(void)		/* lexical analyzer for reparse */
 {
 	int c, n;
 	int cflag;
-	static uschar *buf = NULL;
+	static uschar *buf = 0;
 	static int bufsz = 100;
 	uschar *bp;
 	struct charclass *cc;
 	int i;
+	int num, m, commafound, digitfound;
+	const uschar *startreptok;
+
+rescan:
+	starttok = prestr;
 
 	switch (c = *prestr++) {
 	case '|': return OR;
@@ -795,7 +935,7 @@ int relex(void)		/* lexical analyzer for reparse */
 		rlxval = c;
 		return CHAR;
 	case '[': 
-		if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
+		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
 			FATAL("out of space in reg expr %.10s..", lastre);
 		bp = buf;
 		if (*prestr == '^') {
@@ -823,7 +963,15 @@ int relex(void)		/* lexical analyzer for reparse */
 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
 				    prestr[2 + cc->cc_namelen] == ']') {
 					prestr += cc->cc_namelen + 3;
-					for (i = 0; i < NCHARS; i++) {
+					/*
+					 * BUG: We begin at 1, instead of 0, since we
+					 * would otherwise prematurely terminate the
+					 * string for classes like [[:cntrl:]]. This
+					 * means that we can't match the NUL character,
+					 * not without first adapting the entire
+					 * program to track each string's length.
+					 */
+					for (i = 1; i <= UCHAR_MAX; i++) {
 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
 						    FATAL("out of space for reg expr %.10s...", lastre);
 						if (cc->cc_func(i)) {
@@ -833,6 +981,40 @@ int relex(void)		/* lexical analyzer for reparse */
 					}
 				} else
 					*bp++ = c;
+			} else if (c == '[' && *prestr == '.') {
+				char collate_char;
+				prestr++;
+				collate_char = *prestr++;
+				if (*prestr == '.' && prestr[1] == ']') {
+					prestr += 2;
+					/* Found it: map via locale TBD: for
+					   now, simply return this char.  This
+					   is sufficient to pass conformance
+					   test awk.ex 156
+					 */
+					if (*prestr == ']') {
+						prestr++;
+						rlxval = collate_char;
+						return CHAR;
+					}
+				}
+			} else if (c == '[' && *prestr == '=') {
+				char equiv_char;
+				prestr++;
+				equiv_char = *prestr++;
+				if (*prestr == '=' && prestr[1] == ']') {
+					prestr += 2;
+					/* Found it: map via locale TBD: for now
+					   simply return this char. This is
+					   sufficient to pass conformance test
+					   awk.ex 156
+					 */
+					if (*prestr == ']') {
+						prestr++;
+						rlxval = equiv_char;
+						return CHAR;
+					}
+				}
 			} else if (c == '\0') {
 				FATAL("nonterminated character class %.20s", lastre);
 			} else if (bp == buf) {	/* 1st char is special */
@@ -847,6 +1029,75 @@ int relex(void)		/* lexical analyzer for reparse */
 			} else
 				*bp++ = c;
 		}
+		break;
+	case '{':
+		if (isdigit(*(prestr))) {
+			num = 0;	/* Process as a repetition */
+			n = -1; m = -1;
+			commafound = 0;
+			digitfound = 0;
+			startreptok = prestr-1;
+			/* Remember start of previous atom here ? */
+		} else {        	/* just a { char, not a repetition */
+			rlxval = c;
+			return CHAR;
+                }
+		for (; ; ) {
+			if ((c = *prestr++) == '}') {
+				if (commafound) {
+					if (digitfound) { /* {n,m} */
+						m = num;
+						if (m<n)
+							FATAL("illegal repetition expression: class %.20s",
+								lastre);
+						if ((n==0) && (m==1)) {
+							return QUEST;
+						}
+					} else {	/* {n,} */
+						if (n==0) return STAR;
+						if (n==1) return PLUS;
+					}
+				} else {
+					if (digitfound) { /* {n} same as {n,n} */
+						n = num;
+						m = num;
+					} else {	/* {} */
+						FATAL("illegal repetition expression: class %.20s",
+							lastre);
+					}
+				}
+				if (repeat(starttok, prestr-starttok, lastatom,
+					   startreptok - lastatom, n, m) > 0) {
+					if ((n==0) && (m==0)) {
+						return EMPTYRE;
+					}
+					/* must rescan input for next token */
+					goto rescan;
+				}
+				/* Failed to replace: eat up {...} characters
+				   and treat like just PLUS */
+				return PLUS;
+			} else if (c == '\0') {
+				FATAL("nonterminated character class %.20s",
+					lastre);
+			} else if (isdigit(c)) {
+				num = 10 * num + c - '0';
+				digitfound = 1;
+			} else if (c == ',') {
+				if (commafound)
+					FATAL("illegal repetition expression: class %.20s",
+						lastre);
+				/* looking for {n,} or {n,m} */
+				commafound = 1;
+				n = num;
+				digitfound = 0; /* reset */
+				num = 0;
+			} else {
+				FATAL("illegal repetition expression: class %.20s",
+					lastre);
+			}
+		}
+		break;
 	}
 }
 
@@ -860,7 +1111,7 @@ int cgoto(fa *f, int s, int c)
 		maxsetvec *= 4;
 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
-		if (setvec == NULL || tmpset == NULL)
+		if (setvec == 0 || tmpset == 0)
 			overflo("out of space in cgoto()");
 	}
 	for (i = 0; i <= f->accept; i++)
@@ -882,7 +1133,7 @@ int cgoto(fa *f, int s, int c)
 						maxsetvec *= 4;
 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
 						tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
-						if (setvec == NULL || tmpset == NULL)
+						if (setvec == 0 || tmpset == 0)
 							overflo("cgoto overflow");
 					}
 					if (setvec[q[j]] == 0) {
@@ -925,7 +1176,7 @@ int cgoto(fa *f, int s, int c)
 	for (i = 0; i < NCHARS; i++)
 		f->gototab[f->curstat][i] = 0;
 	xfree(f->posns[f->curstat]);
-	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
+	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
 		overflo("out of space in cgoto");
 
 	f->posns[f->curstat] = p;
diff --git a/bugs-fixed/README b/bugs-fixed/README
new file mode 100644
index 000000000000..2f27c1039873
--- /dev/null
+++ b/bugs-fixed/README
@@ -0,0 +1,57 @@
+List of bugs fixed.
+
+1. ofs-rebuild: OFS value used to rebuild the record was incorrect.
+Fixed August 19, 2014. Revised fix August 2018.
+
+2. system-status: Instead of a floating-point division by 256, use
+the wait(2) macros to create a reasonable exit status. Fixed March 12, 2016.
+
+3. space: Use provided xisblank() function instead of ispace() for
+matching [[:blank:]].
+
+4. a-format: Add POSIX standard %a and %A to supported formats. Check
+at runtime that this format is available.
+
+5. decr-NF: Decrementing NF did not change $0. This is a decades-old
+bug. There are interactions with the old and new value of OFS as well.
+Most of the fix came from the NetBSD awk.
+
+6. string-conv: String conversions of scalars were sticky.  Once a
+conversion to string happened, even with OFMT, that value was used until
+a new numeric value was assigned, even if OFMT differed from CONVFMT,
+and also if CONVFMT changed.
+
+7. unary-plus: Unary plus on a string constant returned the string.
+Instead, it should convert the value to numeric and give that value.
+
+8. concat-assign-same: Concatenation previously evaluated both sides of the
+expression before doing its work, which, since assign() evaluates to the cell
+being assigned to, meant that expressions like "print (a = 1) (a = 2)" would
+print "22" rather than "12".
+
+9. missing-precision: When using the format string "%*s", the precision
+argument was used without checking if it was present first.
+
+10. missing-precision: When using the format string "%*s", the precision
+argument was used without checking if it was present first.
+
+11. fmt-overflow: The buffer used for OFMT/CONVFMT conversions was written
+to with sprintf(), which meant that some conversions could write past the
+end.
+
+12. numeric-subsep, numeric-fs, numeric-output-seps, numerics-rs: If SUBSEP,
+FS, RS, OFS, or ORS were set to a numeric value, then their string values
+wouldn't always be generated before being needed.
+
+13. subsep-overflow: The length of SUBSEP needs to be rechecked after
+calling execute(), in case SUBSEP itself has been changed.
+
+14. split-fs-from-array: If the third argument to split() comes from the
+array passed as the second argument, then split() would previously read
+from the freed memory and possibly produce incorrect results (depending
+on the system's malloc()/free() behaviour.)
+
+15. getline-numeric: The `getline xx < file' syntax did not check if
+values were numeric, in discordance from POSIX. Test case adapted from
+one posted by Ben Bacarisse <ben.usenet@bsb.me.uk> in comp.lang.awk,
+January 2019.
diff --git a/bugs-fixed/a-format.awk b/bugs-fixed/a-format.awk
new file mode 100644
index 000000000000..5b7929ee3eea
--- /dev/null
+++ b/bugs-fixed/a-format.awk
@@ -0,0 +1,3 @@
+BEGIN {
+	printf("%a\n", 42)
+}
diff --git a/bugs-fixed/a-format.bad b/bugs-fixed/a-format.bad
new file mode 100644
index 000000000000..1281825b1111
--- /dev/null
+++ b/bugs-fixed/a-format.bad
@@ -0,0 +1,3 @@
+nawk: weird printf conversion %a
+ source line number 2
+%a42
diff --git a/bugs-fixed/a-format.ok b/bugs-fixed/a-format.ok
new file mode 100644
index 000000000000..e421e2d01ba6
--- /dev/null
+++ b/bugs-fixed/a-format.ok
@@ -0,0 +1 @@
+0x1.5p+5
diff --git a/bugs-fixed/concat-assign-same.awk b/bugs-fixed/concat-assign-same.awk
new file mode 100644
index 000000000000..ed19f35ca835
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.awk
@@ -0,0 +1,4 @@
+BEGIN {
+    print (a = 1) (a = 2) (a = 3) (a = 4) (a = 5);
+    print (a = 1), (a = 2), (a = 3), (a = 4), (a = 5);
+}
diff --git a/bugs-fixed/concat-assign-same.bad b/bugs-fixed/concat-assign-same.bad
new file mode 100644
index 000000000000..294725b28a97
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.bad
@@ -0,0 +1,2 @@
+22345
+1 2 3 4 5
diff --git a/bugs-fixed/concat-assign-same.ok b/bugs-fixed/concat-assign-same.ok
new file mode 100644
index 000000000000..447505259d02
--- /dev/null
+++ b/bugs-fixed/concat-assign-same.ok
@@ -0,0 +1,2 @@
+12345
+1 2 3 4 5
diff --git a/bugs-fixed/decr-NF.awk b/bugs-fixed/decr-NF.awk
new file mode 100644
index 000000000000..7474991d196e
--- /dev/null
+++ b/bugs-fixed/decr-NF.awk
@@ -0,0 +1,11 @@
+BEGIN {
+	$0 = "a b c d e f"
+	print NF
+	OFS = ":"
+	NF--
+	print $0
+	print NF
+	NF++
+	print $0
+	print NF
+}
diff --git a/bugs-fixed/decr-NF.bad b/bugs-fixed/decr-NF.bad
new file mode 100644
index 000000000000..b634e065954c
--- /dev/null
+++ b/bugs-fixed/decr-NF.bad
@@ -0,0 +1,5 @@
+6
+a b c d e f
+5
+a b c d e f
+6
diff --git a/bugs-fixed/decr-NF.ok b/bugs-fixed/decr-NF.ok
new file mode 100644
index 000000000000..3359cf2312d1
--- /dev/null
+++ b/bugs-fixed/decr-NF.ok
@@ -0,0 +1,5 @@
+6
+a:b:c:d:e
+5
+a:b:c:d:e:
+6
diff --git a/bugs-fixed/fmt-overflow.awk b/bugs-fixed/fmt-overflow.awk
new file mode 100644
index 000000000000..bf5877e4abac
--- /dev/null
+++ b/bugs-fixed/fmt-overflow.awk
@@ -0,0 +1 @@
+BEGIN { OFMT = "%.1000f"; print 1.25; }
diff --git a/bugs-fixed/fmt-overflow.ok b/bugs-fixed/fmt-overflow.ok
new file mode 100644
index 000000000000..5f7449e68073
--- /dev/null
+++ b/bugs-fixed/fmt-overflow.ok
@@ -0,0 +1 @@
+1.2500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
diff --git a/bugs-fixed/fs-overflow.awk b/bugs-fixed/fs-overflow.awk
new file mode 100644
index 000000000000..be10f5a46f0d
--- /dev/null
+++ b/bugs-fixed/fs-overflow.awk
@@ -0,0 +1,13 @@
+function foo() {
+    a = "";
+    for (i = 0; i < 10000; i++) {
+        a = a "c";
+    }
+    return a;
+}
+
+BEGIN {
+    FS = foo();
+    $0="foo";
+    print $1;
+}
diff --git a/bugs-fixed/getline-numeric.awk b/bugs-fixed/getline-numeric.awk
new file mode 100644
index 000000000000..5571a9589a3d
--- /dev/null
+++ b/bugs-fixed/getline-numeric.awk
@@ -0,0 +1,6 @@
+{
+    print $0, ($0 <= 50 ? "<=" : ">"), 50
+    getline dd < ARGV[1]
+    print dd, (dd <= 50 ? "<=" : ">"), 50
+    if (dd == $0) print "same"
+}
diff --git a/bugs-fixed/getline-numeric.bad b/bugs-fixed/getline-numeric.bad
new file mode 100644
index 000000000000..d911c774fa9a
--- /dev/null
+++ b/bugs-fixed/getline-numeric.bad
@@ -0,0 +1,3 @@
+120 > 50
+120 <= 50
+same
diff --git a/bugs-fixed/getline-numeric.in b/bugs-fixed/getline-numeric.in
new file mode 100644
index 000000000000..52bd8e43afb0
--- /dev/null
+++ b/bugs-fixed/getline-numeric.in
@@ -0,0 +1 @@
+120
diff --git a/bugs-fixed/getline-numeric.ok b/bugs-fixed/getline-numeric.ok
new file mode 100644
index 000000000000..f7efd3db506f
--- /dev/null
+++ b/bugs-fixed/getline-numeric.ok
@@ -0,0 +1,3 @@
+120 > 50
+120 > 50
+same
diff --git a/bugs-fixed/missing-precision.awk b/bugs-fixed/missing-precision.awk
new file mode 100644
index 000000000000..4e7a74b2c964
--- /dev/null
+++ b/bugs-fixed/missing-precision.awk
@@ -0,0 +1 @@
+BEGIN { printf("%*s"); }
diff --git a/bugs-fixed/missing-precision.ok b/bugs-fixed/missing-precision.ok
new file mode 100644
index 000000000000..608b4fa48666
--- /dev/null
+++ b/bugs-fixed/missing-precision.ok
@@ -0,0 +1,2 @@
+./a.out: not enough args in printf(%*s)
+ source line number 1
diff --git a/bugs-fixed/negative-nf.awk b/bugs-fixed/negative-nf.awk
new file mode 100644
index 000000000000..6caeee4602b5
--- /dev/null
+++ b/bugs-fixed/negative-nf.awk
@@ -0,0 +1 @@
+BEGIN { NF = -5; }
diff --git a/bugs-fixed/negative-nf.ok b/bugs-fixed/negative-nf.ok
new file mode 100644
index 000000000000..71c860468cc0
--- /dev/null
+++ b/bugs-fixed/negative-nf.ok
@@ -0,0 +1,2 @@
+./a.out: cannot set NF to a negative value
+ source line number 1
diff --git a/bugs-fixed/nf-self-assign.awk b/bugs-fixed/nf-self-assign.awk
new file mode 100644
index 000000000000..6ae29eef916d
--- /dev/null
+++ b/bugs-fixed/nf-self-assign.awk
@@ -0,0 +1,6 @@
+BEGIN {
+	$0="a b c";
+	OFS=",";
+	NF = NF;
+	print;
+}
diff --git a/bugs-fixed/nf-self-assign.bad b/bugs-fixed/nf-self-assign.bad
new file mode 100644
index 000000000000..3774da60e546
--- /dev/null
+++ b/bugs-fixed/nf-self-assign.bad
@@ -0,0 +1 @@
+a b c
diff --git a/bugs-fixed/nf-self-assign.ok b/bugs-fixed/nf-self-assign.ok
new file mode 100644
index 000000000000..b2ffb02521e6
--- /dev/null
+++ b/bugs-fixed/nf-self-assign.ok
@@ -0,0 +1 @@
+a,b,c
diff --git a/bugs-fixed/numeric-fs.awk b/bugs-fixed/numeric-fs.awk
new file mode 100644
index 000000000000..01e438d4aa28
--- /dev/null
+++ b/bugs-fixed/numeric-fs.awk
@@ -0,0 +1,5 @@
+BEGIN {
+	FS = 0; split("20202", a); print a[1];
+	FS = 1; $0="31313"; print $1;
+	FS = 2; "echo 42424" | getline; print $1;
+}
diff --git a/bugs-fixed/numeric-fs.ok b/bugs-fixed/numeric-fs.ok
new file mode 100644
index 000000000000..dcf37cd5e262
--- /dev/null
+++ b/bugs-fixed/numeric-fs.ok
@@ -0,0 +1,3 @@
+2
+3
+4
diff --git a/bugs-fixed/numeric-output-seps.awk b/bugs-fixed/numeric-output-seps.awk
new file mode 100644
index 000000000000..daa0f72aa6ff
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.awk
@@ -0,0 +1,8 @@
+BEGIN {
+	$0 = "a b c";
+	OFS = 1;
+	ORS = 2;
+	NF = 2;
+	print;
+	print "d", "e";
+}
diff --git a/bugs-fixed/numeric-output-seps.bad b/bugs-fixed/numeric-output-seps.bad
new file mode 100644
index 000000000000..95310f78a7f3
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.bad
@@ -0,0 +1,2 @@
+a b
+d e
diff --git a/bugs-fixed/numeric-output-seps.ok b/bugs-fixed/numeric-output-seps.ok
new file mode 100644
index 000000000000..de6b2026e539
--- /dev/null
+++ b/bugs-fixed/numeric-output-seps.ok
@@ -0,0 +1 @@
+a1b2d1e2
+\ No newline at end of file
diff --git a/bugs-fixed/numeric-rs.awk b/bugs-fixed/numeric-rs.awk
new file mode 100644
index 000000000000..cc7a0a0c08c2
--- /dev/null
+++ b/bugs-fixed/numeric-rs.awk
@@ -0,0 +1,6 @@
+BEGIN {
+	RS = 1;
+	while ("echo a1b1c1d" | getline > 0) {
+		print $1;
+	}
+}
diff --git a/bugs-fixed/numeric-rs.bad b/bugs-fixed/numeric-rs.bad
new file mode 100644
index 000000000000..2027bc6f27c9
--- /dev/null
+++ b/bugs-fixed/numeric-rs.bad
@@ -0,0 +1 @@
+a1b1c1d
diff --git a/bugs-fixed/numeric-rs.ok b/bugs-fixed/numeric-rs.ok
new file mode 100644
index 000000000000..d68dd4031d2a
--- /dev/null
+++ b/bugs-fixed/numeric-rs.ok
@@ -0,0 +1,4 @@
+a
+b
+c
+d
diff --git a/bugs-fixed/numeric-subsep.awk b/bugs-fixed/numeric-subsep.awk
new file mode 100644
index 000000000000..1252e4a99607
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.awk
@@ -0,0 +1,5 @@
+BEGIN {
+    SUBSEP = 123.456;
+    a["hello", "world"] = "foo";
+    print a["hello" SUBSEP "world"];
+}
diff --git a/bugs-fixed/numeric-subsep.bad b/bugs-fixed/numeric-subsep.bad
new file mode 100644
index 000000000000..8b137891791f
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.bad
@@ -0,0 +1 @@
+
diff --git a/bugs-fixed/numeric-subsep.ok b/bugs-fixed/numeric-subsep.ok
new file mode 100644
index 000000000000..257cc5642cb1
--- /dev/null
+++ b/bugs-fixed/numeric-subsep.ok
@@ -0,0 +1 @@
+foo
diff --git a/bugs-fixed/ofs-rebuild.awk b/bugs-fixed/ofs-rebuild.awk
new file mode 100644
index 000000000000..dd2700031524
--- /dev/null
+++ b/bugs-fixed/ofs-rebuild.awk
@@ -0,0 +1,17 @@
+# The bug here is that nawk should use the value of OFS that
+# was current when $0 became invalid to rebuild the record.
+
+BEGIN {
+	OFS = ":"
+	$0 = "a b c d e f g"
+	$3 = "3333"
+	# Conceptually, $0 should now be "a:b:3333:d:e:f:g"
+
+	# Change OFS after (conceptually) rebuilding the record
+	OFS = "<>"
+
+	# Unmodifed nawk prints "a<>b<>3333<>d<>e<>f<>g" because
+	# it delays rebuilding $0 until it's needed, and then it uses
+	# the current value of OFS. Oops.
+	print
+}
diff --git a/bugs-fixed/ofs-rebuild.bad b/bugs-fixed/ofs-rebuild.bad
new file mode 100644
index 000000000000..7570811e2c16
--- /dev/null
+++ b/bugs-fixed/ofs-rebuild.bad
@@ -0,0 +1 @@
+a<>b<>3333<>d<>e<>f<>g
diff --git a/bugs-fixed/ofs-rebuild.ok b/bugs-fixed/ofs-rebuild.ok
new file mode 100644
index 000000000000..26892181f91b
--- /dev/null
+++ b/bugs-fixed/ofs-rebuild.ok
@@ -0,0 +1 @@
+a:b:3333:d:e:f:g
diff --git a/bugs-fixed/space.awk b/bugs-fixed/space.awk
new file mode 100644
index 000000000000..6aa87d2e6259
--- /dev/null
+++ b/bugs-fixed/space.awk
@@ -0,0 +1,22 @@
+BEGIN {
+	c[" "] = "\" \""
+	c["\a"] = "\\a"
+	c["\b"] = "\\b"
+	c["\f"] = "\\f"
+	c["\n"] = "\\n"
+	c["\r"] = "\\r"
+	c["\t"] = "\\t"
+	c["\v"] = "\\v"
+
+	sort = "LC_ALL=C sort"
+
+	for (i in c)
+		printf("%s %s [[:space:]]\n", c[i],
+			i ~ /[[:space:]]/ ? "~" : "!~") | sort
+
+	for (i in c)
+		printf("%s %s [[:blank:]]\n", c[i],
+			i ~ /[[:blank:]]/ ? "~" : "!~") | sort
+
+	close(sort)
+}
diff --git a/bugs-fixed/space.bad b/bugs-fixed/space.bad
new file mode 100644
index 000000000000..f92055fd0c26
--- /dev/null
+++ b/bugs-fixed/space.bad
@@ -0,0 +1,16 @@
+" " ~ [[:blank:]]
+" " ~ [[:space:]]
+\a !~ [[:blank:]]
+\a !~ [[:space:]]
+\b !~ [[:blank:]]
+\b !~ [[:space:]]
+\f ~ [[:blank:]]
+\f ~ [[:space:]]
+\n ~ [[:blank:]]
+\n ~ [[:space:]]
+\r ~ [[:blank:]]
+\r ~ [[:space:]]
+\t ~ [[:blank:]]
+\t ~ [[:space:]]
+\v ~ [[:blank:]]
+\v ~ [[:space:]]
diff --git a/bugs-fixed/space.ok b/bugs-fixed/space.ok
new file mode 100644
index 000000000000..4278c5c9df3b
--- /dev/null
+++ b/bugs-fixed/space.ok
@@ -0,0 +1,16 @@
+" " ~ [[:blank:]]
+" " ~ [[:space:]]
+\a !~ [[:blank:]]
+\a !~ [[:space:]]
+\b !~ [[:blank:]]
+\b !~ [[:space:]]
+\f !~ [[:blank:]]
+\f ~ [[:space:]]
+\n !~ [[:blank:]]
+\n ~ [[:space:]]
+\r !~ [[:blank:]]
+\r ~ [[:space:]]
+\t ~ [[:blank:]]
+\t ~ [[:space:]]
+\v !~ [[:blank:]]
+\v ~ [[:space:]]
diff --git a/bugs-fixed/split-fs-from-array.awk b/bugs-fixed/split-fs-from-array.awk
new file mode 100644
index 000000000000..fce1607c2a97
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.awk
@@ -0,0 +1,5 @@
+BEGIN {
+        a[1] = "elephantie"
+        a[2] = "e"
+        print split(a[1],a,a[2]), a[2], a[3], split(a[2],a,a[2])
+}
diff --git a/bugs-fixed/split-fs-from-array.ok b/bugs-fixed/split-fs-from-array.ok
new file mode 100644
index 000000000000..9402b94f4fae
--- /dev/null
+++ b/bugs-fixed/split-fs-from-array.ok
@@ -0,0 +1 @@
+4 l phanti 2
diff --git a/bugs-fixed/string-conv.awk b/bugs-fixed/string-conv.awk
new file mode 100644
index 000000000000..a1f04aba354b
--- /dev/null
+++ b/bugs-fixed/string-conv.awk
@@ -0,0 +1,13 @@
+BEGIN {
+	OFMT = ">>%.6g<<"
+        a = 12.1234
+	print "a =", a
+        b = a ""
+        print "1 ->", b
+        CONVFMT = "%2.2f"
+        b = a ""
+        print "2 ->", b
+        CONVFMT = "%.12g"
+        b = a ""
+        print "3 ->", b
+}
diff --git a/bugs-fixed/string-conv.bad b/bugs-fixed/string-conv.bad
new file mode 100644
index 000000000000..2ab95e87d0a8
--- /dev/null
+++ b/bugs-fixed/string-conv.bad
@@ -0,0 +1,4 @@
+a = >>12.1234<<
+1 -> >>12.1234<<
+2 -> >>12.1234<<
+3 -> >>12.1234<<
diff --git a/bugs-fixed/string-conv.ok b/bugs-fixed/string-conv.ok
new file mode 100644
index 000000000000..7c097113207a
--- /dev/null
+++ b/bugs-fixed/string-conv.ok
@@ -0,0 +1,4 @@
+a = >>12.1234<<
+1 -> 12.1234
+2 -> 12.12
+3 -> 12.1234
diff --git a/bugs-fixed/subsep-overflow.awk b/bugs-fixed/subsep-overflow.awk
new file mode 100644
index 000000000000..66c7c24db0e6
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.awk
@@ -0,0 +1,24 @@
+function foo(c, n) {
+    s = "";
+    for (i = 0; i < n; i++) {
+        s = s c;
+    }
+    return s;
+}
+
+BEGIN {
+    str1 = foo("a", 4500);
+    str2 = foo("b", 9000);
+
+    a[(SUBSEP = str1), (SUBSEP = str2), "c"] = 1;
+
+    for (k in a) {
+        print length(k);
+    }
+
+    print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+    print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+    delete a[(SUBSEP = str1), (SUBSEP = str2), "c"];
+    print (((SUBSEP = str1), (SUBSEP = str2), "c") in a);
+    print (((SUBSEP = str1) SUBSEP (SUBSEP = str2) SUBSEP "c") in a);
+}
diff --git a/bugs-fixed/subsep-overflow.ok b/bugs-fixed/subsep-overflow.ok
new file mode 100644
index 000000000000..ddbbd78707ee
--- /dev/null
+++ b/bugs-fixed/subsep-overflow.ok
@@ -0,0 +1,5 @@
+27001
+1
+1
+0
+0
diff --git a/bugs-fixed/system-status.awk b/bugs-fixed/system-status.awk
new file mode 100644
index 000000000000..8daf563e6f4f
--- /dev/null
+++ b/bugs-fixed/system-status.awk
@@ -0,0 +1,19 @@
+# Unmodified nawk prints the 16 bit exit status divided by 256, but
+# does so using floating point arithmetic, yielding strange results.
+#
+# The fix is to use the various macros defined for wait(2) and to
+# use the signal number + 256 for death by signal, or signal number + 512
+# for death by signal with core dump.
+
+BEGIN {
+	status = system("exit 42")
+	print "normal status", status
+
+	status = system("kill -HUP $$")
+	print "death by signal status", status
+
+	status = system("kill -ABRT $$")
+	print "death by signal with core dump status", status
+
+	system("rm -f core*")
+}
diff --git a/bugs-fixed/system-status.bad b/bugs-fixed/system-status.bad
new file mode 100644
index 000000000000..a1317dba54a8
--- /dev/null
+++ b/bugs-fixed/system-status.bad
@@ -0,0 +1,3 @@
+normal status 42
+death by signal status 0.00390625
+death by signal with core dump status 0.523438
diff --git a/bugs-fixed/system-status.ok b/bugs-fixed/system-status.ok
new file mode 100644
index 000000000000..737828f5ed7a
--- /dev/null
+++ b/bugs-fixed/system-status.ok
@@ -0,0 +1,3 @@
+normal status 42
+death by signal status 257
+death by signal with core dump status 518
diff --git a/bugs-fixed/unary-plus.awk b/bugs-fixed/unary-plus.awk
new file mode 100644
index 000000000000..ba6185b96704
--- /dev/null
+++ b/bugs-fixed/unary-plus.awk
@@ -0,0 +1,4 @@
+BEGIN {
+	print +"q"
+    print +"43.12345678912345678"
+}
diff --git a/bugs-fixed/unary-plus.bad b/bugs-fixed/unary-plus.bad
new file mode 100644
index 000000000000..76f57d5d580c
--- /dev/null
+++ b/bugs-fixed/unary-plus.bad
@@ -0,0 +1,2 @@
+q
+43.12345678912345678
diff --git a/bugs-fixed/unary-plus.ok b/bugs-fixed/unary-plus.ok
new file mode 100644
index 000000000000..90f97afc5c44
--- /dev/null
+++ b/bugs-fixed/unary-plus.ok
@@ -0,0 +1,2 @@
+0
+43.1235
diff --git a/lex.c b/lex.c
index 0c65a9fe3292..ad8e878a247d 100644
--- a/lex.c
+++ b/lex.c
@@ -170,10 +170,10 @@ int	reg	= 0;	/* 1 => return a REGEXPR now */
 int yylex(void)
 {
 	int c;
-	static char *buf = NULL;
+	static char *buf = 0;
 	static int bufsize = 5; /* BUG: setting this small causes core dump! */
 
-	if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
+	if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
 		FATAL( "out of space in yylex" );
 	if (sc) {
 		sc = 0;
@@ -198,6 +198,7 @@ int yylex(void)
 		yylval.i = c;
 		switch (c) {
 		case '\n':	/* {EOL} */
+			lineno++;
 			RET(NL);
 		case '\r':	/* assume \n is coming */
 		case ' ':	/* {WS}+ */
@@ -213,6 +214,7 @@ int yylex(void)
 		case '\\':
 			if (peek() == '\n') {
 				input();
+				lineno++;
 			} else if (peek() == '\r') {
 				input(); input();	/* \n */
 				lineno++;
@@ -358,10 +360,10 @@ int string(void)
 {
 	int c, n;
 	char *s, *bp;
-	static char *buf = NULL;
+	static char *buf = 0;
 	static int bufsz = 500;
 
-	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
+	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
 		FATAL("out of space for strings");
 	for (bp = buf; (c = input()) != '"'; ) {
 		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
@@ -370,10 +372,11 @@ int string(void)
 		case '\n':
 		case '\r':
 		case 0:
+			*bp = '\0';
 			SYNTAX( "non-terminated string %.10s...", buf );
-			lineno++;
 			if (c == 0)	/* hopeless */
 				FATAL( "giving up" );
+			lineno++;
 			break;
 		case '\\':
 			c = input();
@@ -504,17 +507,18 @@ void startreg(void)	/* next call to yylex will return a regular expression */
 int regexpr(void)
 {
 	int c;
-	static char *buf = NULL;
+	static char *buf = 0;
 	static int bufsz = 500;
 	char *bp;
 
-	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
+	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
 		FATAL("out of space for rex expr");
 	bp = buf;
 	for ( ; (c = input()) != '/' && c != 0; ) {
 		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
 			FATAL("out of space for reg expr %.10s...", buf);
 		if (c == '\n') {
+			*bp = '\0';
 			SYNTAX( "newline in regular expression %.10s...", buf ); 
 			unput('\n');
 			break;
@@ -539,7 +543,7 @@ char	ebuf[300];
 char	*ep = ebuf;
 char	yysbuf[100];	/* pushback buffer */
 char	*yysptr = yysbuf;
-FILE	*yyin = NULL;
+FILE	*yyin = 0;
 
 int input(void)	/* get next lexical input character */
 {
@@ -553,19 +557,19 @@ int input(void)	/* get next lexical input character */
 			lexprog++;
 	} else				/* awk -f ... */
 		c = pgetc();
-	if (c == '\n')
-		lineno++;
-	else if (c == EOF)
+	if (c == EOF)
 		c = 0;
 	if (ep >= ebuf + sizeof ebuf)
 		ep = ebuf;
-	return *ep++ = c;
+	*ep = c;
+	if (c != 0) {
+		ep++;
+	}
+	return (c);
 }
 
 void unput(int c)	/* put lexical character back on input */
 {
-	if (c == '\n')
-		lineno--;
 	if (yysptr >= yysbuf + sizeof(yysbuf))
 		FATAL("pushed back too much: %.20s...", yysbuf);
 	*yysptr++ = c;
diff --git a/lib.c b/lib.c
index 5eeb53d4679d..a365245a0e1a 100644
--- a/lib.c
+++ b/lib.c
@@ -59,7 +59,7 @@ void recinit(unsigned int n)
 {
 	if ( (record = (char *) malloc(n)) == NULL
 	  || (fields = (char *) malloc(n+1)) == NULL
-	  || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL
+	  || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL
 	  || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
 		FATAL("out of space for $0 and fields");
 	*fldtab[0] = dollar0;
@@ -189,12 +189,13 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf)	/* read one record into buf *
 	int sep, c;
 	char *rr, *buf = *pbuf;
 	int bufsize = *pbufsize;
+	char *rs = getsval(rsloc);
 
-	if (strlen(*FS) >= sizeof(inputFS))
+	if (strlen(getsval(fsloc)) >= sizeof (inputFS))
 		FATAL("field separator %.10s... is too long", *FS);
 	/*fflush(stdout); avoids some buffering problem but makes it 25% slower*/
 	strcpy(inputFS, *FS);	/* for subsequent field splitting */
-	if ((sep = **RS) == 0) {
+	if ((sep = *rs) == 0) {
 		sep = '\n';
 		while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
 			;
@@ -208,7 +209,7 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf)	/* read one record into buf *
 					FATAL("input record `%.30s...' too long", buf);
 			*rr++ = c;
 		}
-		if (**RS == sep || c == EOF)
+		if (*rs == sep || c == EOF)
 			break;
 		if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
 			break;
@@ -283,6 +284,8 @@ void fldbld(void)	/* create fields from current record */
 	}
 	fr = fields;
 	i = 0;	/* number of fields accumulated here */
+	if (strlen(getsval(fsloc)) >= sizeof (inputFS))
+		FATAL("field separator %.10s... is too long", *FS);
 	strcpy(inputFS, *FS);
 	if (strlen(inputFS) > 1) {	/* it's a regular expression */
 		i = refldbld(r, inputFS);
@@ -356,6 +359,7 @@ void fldbld(void)	/* create fields from current record */
 		}
 	}
 	setfval(nfloc, (Awkfloat) lastfld);
+	donerec = 1; /* restore */
 	if (dbg) {
 		for (j = 0; j <= lastfld; j++) {
 			p = fldtab[j];
@@ -387,6 +391,21 @@ void newfld(int n)	/* add field n after end of existing lastfld */
 	setfval(nfloc, (Awkfloat) n);
 }
 
+void setlastfld(int n)	/* set lastfld cleaning fldtab cells if necessary */
+{
+	if (n < 0)
+		FATAL("cannot set NF to a negative value");
+	if (n > nfields)
+		growfldtab(n);
+
+	if (lastfld < n)
+	    cleanfld(lastfld+1, n);
+	else
+	    cleanfld(n+1, lastfld);
+
+	lastfld = n;
+}
+
 Cell *fieldadr(int n)	/* get nth field */
 {
 	if (n < 0)
@@ -465,6 +484,7 @@ void recbld(void)	/* create $0 from $1..$NF if necessary */
 {
 	int i;
 	char *r, *p;
+	char *sep = getsval(ofsloc);
 
 	if (donerec == 1)
 		return;
@@ -476,9 +496,9 @@ void recbld(void)	/* create $0 from $1..$NF if necessary */
 		while ((*r = *p++) != 0)
 			r++;
 		if (i < *NF) {
-			if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
+			if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
 				FATAL("created $0 `%.30s...' too long", record);
-			for (p = *OFS; (*r = *p++) != 0; )
+			for (p = sep; (*r = *p++) != 0; )
 				r++;
 		}
 	}
@@ -618,6 +638,8 @@ void eprint(void)	/* try to print context around error */
 
 	if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
 		return;
+	if (ebuf == ep)
+		return;
 	p = ep - 1;
 	if (p > ebuf && *p == '\n')
 		p--;
@@ -681,7 +703,7 @@ int isclvar(const char *s)	/* is s of form var=something ? */
 	for ( ; *s; s++)
 		if (!(isalnum((uschar) *s) || *s == '_'))
 			break;
-	return *s == '=' && s > os && *(s+1) != '=';
+	return *s == '=' && s > os;
 }
 
 /* strtod is supposed to be a proper test of what's a valid number */
diff --git a/main.c b/main.c
index 4b659974b056..98661fcd7829 100644
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 THIS SOFTWARE.
 ****************************************************************/
 
-const char	*version = "version 20121220";
+const char	*version = "version 20190529";
 
 #define DEBUG
 #include <stdio.h>
@@ -54,6 +54,13 @@ int	curpfile = 0;	/* current filename */
 
 int	safe	= 0;	/* 1 => "safe" mode */
 
+/* Can this work with recursive calls?  I don't think so.
+void segvcatch(int n)
+{
+	FATAL("segfault.  Do you have an unbounded recursive call?", n);
+}
+*/
+
 int main(int argc, char *argv[])
 {
 	const char *fs = NULL;
@@ -68,6 +75,7 @@ int main(int argc, char *argv[])
 		exit(1);
 	}
 	signal(SIGFPE, fpecatch);
+	/*signal(SIGSEGV, segvcatch); experiment */
 
 	srand_seed = 1;
 	srand(srand_seed);
@@ -80,7 +88,7 @@ int main(int argc, char *argv[])
 			exit(0);
 			break;
 		}
-		if (strncmp(argv[1], "--", 2) == 0) {	/* explicit end of args */
+		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
 			argc--;
 			argv++;
 			break;
diff --git a/makefile b/makefile
index 88f992421561..3c0b62e1df9e 100644
--- a/makefile
+++ b/makefile
@@ -23,18 +23,21 @@
 # ****************************************************************/
 
 CFLAGS = -g
-CFLAGS = -O2
 CFLAGS =
+CFLAGS = -O2
 
-CC = gcc -Wall -g -Wwrite-strings
-CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
-CC = gcc -g -Wall -pedantic 
-CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
+# compiler options
+#CC = gcc -Wall -g -Wwrite-strings
+#CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing
+#CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
+HOSTCC = gcc -g -Wall -pedantic 
+CC = $(HOSTCC)  # change this is cross-compiling.
 
-YACC = bison -d -y
-YACC = yacc -d -S
+# yacc options.  pick one; this varies a lot by system.
 #YFLAGS = -d -S
-		# -S uses sprintf in yacc parser instead of sprint
+YACC = bison -d -y
+#YACC = yacc -d
+#		-S uses sprintf in yacc parser instead of sprint
 
 OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o
 
@@ -44,7 +47,7 @@ SOURCE = awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \
 LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
 	lib.c run.c tran.c 
 
-SHIP = README FIXES $(SOURCE) ytab[ch].bak makefile  \
+SHIP = README LICENSE FIXES $(SOURCE) ytab[ch].bak makefile  \
 	 awk.1
 
 a.out:	ytab.o $(OFILES)
@@ -52,17 +55,23 @@ a.out:	ytab.o $(OFILES)
 
 $(OFILES):	awk.h ytab.h proto.h
 
-ytab.o:	awk.h proto.h awkgram.y
+#Clear dependency for parallel build: (make -j)
+#YACC generated y.tab.c and y.tab.h at the same time
+#this needs to be a static pattern rules otherwise multiple target
+#are mapped onto multiple executions of yacc, which overwrite 
+#each others outputs.
+y%.c y%.h:	awk.h proto.h awkgram.y
 	$(YACC) $(YFLAGS) awkgram.y
-	mv y.tab.c ytab.c
-	mv y.tab.h ytab.h
-	$(CC) $(CFLAGS) -c ytab.c
+	mv y.$*.c y$*.c
+	mv y.$*.h y$*.h
+
+ytab.h:	ytab.c
 
 proctab.c:	maketab
-	./maketab >proctab.c
+	./maketab ytab.h >proctab.c
 
 maketab:	ytab.h maketab.c
-	$(CC) $(CFLAGS) maketab.c -o maketab
+	$(HOSTCC) $(CFLAGS) maketab.c -o maketab
 
 bundle:
 	@cp ytab.h ytabh.bak
@@ -79,8 +88,22 @@ tar:
 	@zip awk.zip $(SHIP)
 	ls -l awk.zip
 
+gitadd:
+	git add README LICENSE FIXES \
+           awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
+	   lib.c run.c tran.c \
+	   makefile awk.1 awktest.tar
+
+gitpush:
+	# only do this once: 
+	# git remote add origin https://github.com/onetrueawk/awk.git
+	git push -u origin master
+
 names:
 	@echo $(LISTING)
 
 clean:
 	rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda # proctab.c
+
+cleaner:
+	rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda proctab.c ytab*
diff --git a/maketab.c b/maketab.c
index b59e81592fdb..dbe3d241fcc8 100644
--- a/maketab.c
+++ b/maketab.c
@@ -62,6 +62,7 @@ struct xx
 	{ DIVIDE, "arith", " / " },
 	{ MOD, "arith", " % " },
 	{ UMINUS, "arith", " -" },
+	{ UPLUS, "arith", " +" },
 	{ POWER, "arith", " **" },
 	{ PREINCR, "incrdecr", "++" },
 	{ POSTINCR, "incrdecr", "++" },
@@ -124,8 +125,12 @@ int main(int argc, char *argv[])
 	for (i = SIZE; --i >= 0; )
 		names[i] = "";
 
-	if ((fp = fopen("ytab.h", "r")) == NULL) {
-		fprintf(stderr, "maketab can't open ytab.h!\n");
+	if (argc != 2) {
+		fprintf(stderr, "usage: maketab YTAB_H\n");
+		exit(1);
+	}
+	if ((fp = fopen(argv[1], "r")) == NULL) {
+		fprintf(stderr, "maketab can't open %s!\n", argv[1]);
 		exit(1);
 	}
 	printf("static char *printname[%d] = {\n", SIZE);
@@ -134,6 +139,8 @@ int main(int argc, char *argv[])
 		n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
 		if (c != '#' || (n != 4 && strcmp(def,"define") != 0))	/* not a valid #define */
 			continue;
+		if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0)
+			continue;
 		if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
 			/* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */
 			continue;
@@ -149,7 +156,7 @@ int main(int argc, char *argv[])
 		table[p->token-FIRSTTOKEN] = p->name;
 	printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
 	for (i=0; i<SIZE; i++)
-		if (table[i]==NULL)
+		if (table[i]==0)
 			printf("\tnullproc,\t/* %s */\n", names[i]);
 		else
 			printf("\t%s,\t/* %s */\n", table[i], names[i]);
diff --git a/parse.c b/parse.c
index 753a50def1fb..8304ded837ba 100644
--- a/parse.c
+++ b/parse.c
@@ -259,7 +259,7 @@ int isarg(const char *s)		/* is s in argument list for current function? */
 	Node *p = arglist;
 	int n;
 
-	for (n = 0; p != NULL; p = p->nnext, n++)
+	for (n = 0; p != 0; p = p->nnext, n++)
 		if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0)
 			return n;
 	return -1;
diff --git a/proctab.c b/proctab.c
new file mode 100644
index 000000000000..ff212c416c3a
--- /dev/null
+++ b/proctab.c
@@ -0,0 +1,209 @@
+#include <stdio.h>
+#include "awk.h"
+#include "ytab.h"
+
+static char *printname[94] = {
+	(char *) "FIRSTTOKEN",	/* 258 */
+	(char *) "PROGRAM",	/* 259 */
+	(char *) "PASTAT",	/* 260 */
+	(char *) "PASTAT2",	/* 261 */
+	(char *) "XBEGIN",	/* 262 */
+	(char *) "XEND",	/* 263 */
+	(char *) "NL",	/* 264 */
+	(char *) "ARRAY",	/* 265 */
+	(char *) "MATCH",	/* 266 */
+	(char *) "NOTMATCH",	/* 267 */
+	(char *) "MATCHOP",	/* 268 */
+	(char *) "FINAL",	/* 269 */
+	(char *) "DOT",	/* 270 */
+	(char *) "ALL",	/* 271 */
+	(char *) "CCL",	/* 272 */
+	(char *) "NCCL",	/* 273 */
+	(char *) "CHAR",	/* 274 */
+	(char *) "OR",	/* 275 */
+	(char *) "STAR",	/* 276 */
+	(char *) "QUEST",	/* 277 */
+	(char *) "PLUS",	/* 278 */
+	(char *) "EMPTYRE",	/* 279 */
+	(char *) "AND",	/* 280 */
+	(char *) "BOR",	/* 281 */
+	(char *) "APPEND",	/* 282 */
+	(char *) "EQ",	/* 283 */
+	(char *) "GE",	/* 284 */
+	(char *) "GT",	/* 285 */
+	(char *) "LE",	/* 286 */
+	(char *) "LT",	/* 287 */
+	(char *) "NE",	/* 288 */
+	(char *) "IN",	/* 289 */
+	(char *) "ARG",	/* 290 */
+	(char *) "BLTIN",	/* 291 */
+	(char *) "BREAK",	/* 292 */
+	(char *) "CLOSE",	/* 293 */
+	(char *) "CONTINUE",	/* 294 */
+	(char *) "DELETE",	/* 295 */
+	(char *) "DO",	/* 296 */
+	(char *) "EXIT",	/* 297 */
+	(char *) "FOR",	/* 298 */
+	(char *) "FUNC",	/* 299 */
+	(char *) "SUB",	/* 300 */
+	(char *) "GSUB",	/* 301 */
+	(char *) "IF",	/* 302 */
+	(char *) "INDEX",	/* 303 */
+	(char *) "LSUBSTR",	/* 304 */
+	(char *) "MATCHFCN",	/* 305 */
+	(char *) "NEXT",	/* 306 */
+	(char *) "NEXTFILE",	/* 307 */
+	(char *) "ADD",	/* 308 */
+	(char *) "MINUS",	/* 309 */
+	(char *) "MULT",	/* 310 */
+	(char *) "DIVIDE",	/* 311 */
+	(char *) "MOD",	/* 312 */
+	(char *) "ASSIGN",	/* 313 */
+	(char *) "ASGNOP",	/* 314 */
+	(char *) "ADDEQ",	/* 315 */
+	(char *) "SUBEQ",	/* 316 */
+	(char *) "MULTEQ",	/* 317 */
+	(char *) "DIVEQ",	/* 318 */
+	(char *) "MODEQ",	/* 319 */
+	(char *) "POWEQ",	/* 320 */
+	(char *) "PRINT",	/* 321 */
+	(char *) "PRINTF",	/* 322 */
+	(char *) "SPRINTF",	/* 323 */
+	(char *) "ELSE",	/* 324 */
+	(char *) "INTEST",	/* 325 */
+	(char *) "CONDEXPR",	/* 326 */
+	(char *) "POSTINCR",	/* 327 */
+	(char *) "PREINCR",	/* 328 */
+	(char *) "POSTDECR",	/* 329 */
+	(char *) "PREDECR",	/* 330 */
+	(char *) "VAR",	/* 331 */
+	(char *) "IVAR",	/* 332 */
+	(char *) "VARNF",	/* 333 */
+	(char *) "CALL",	/* 334 */
+	(char *) "NUMBER",	/* 335 */
+	(char *) "STRING",	/* 336 */
+	(char *) "REGEXPR",	/* 337 */
+	(char *) "GETLINE",	/* 338 */
+	(char *) "RETURN",	/* 339 */
+	(char *) "SPLIT",	/* 340 */
+	(char *) "SUBSTR",	/* 341 */
+	(char *) "WHILE",	/* 342 */
+	(char *) "CAT",	/* 343 */
+	(char *) "NOT",	/* 344 */
+	(char *) "UMINUS",	/* 345 */
+	(char *) "UPLUS",	/* 346 */
+	(char *) "POWER",	/* 347 */
+	(char *) "DECR",	/* 348 */
+	(char *) "INCR",	/* 349 */
+	(char *) "INDIRECT",	/* 350 */
+	(char *) "LASTTOKEN",	/* 351 */
+};
+
+
+Cell *(*proctab[94])(Node **, int) = {
+	nullproc,	/* FIRSTTOKEN */
+	program,	/* PROGRAM */
+	pastat,	/* PASTAT */
+	dopa2,	/* PASTAT2 */
+	nullproc,	/* XBEGIN */
+	nullproc,	/* XEND */
+	nullproc,	/* NL */
+	array,	/* ARRAY */
+	matchop,	/* MATCH */
+	matchop,	/* NOTMATCH */
+	nullproc,	/* MATCHOP */
+	nullproc,	/* FINAL */
+	nullproc,	/* DOT */
+	nullproc,	/* ALL */
+	nullproc,	/* CCL */
+	nullproc,	/* NCCL */
+	nullproc,	/* CHAR */
+	nullproc,	/* OR */
+	nullproc,	/* STAR */
+	nullproc,	/* QUEST */
+	nullproc,	/* PLUS */
+	nullproc,	/* EMPTYRE */
+	boolop,	/* AND */
+	boolop,	/* BOR */
+	nullproc,	/* APPEND */
+	relop,	/* EQ */
+	relop,	/* GE */
+	relop,	/* GT */
+	relop,	/* LE */
+	relop,	/* LT */
+	relop,	/* NE */
+	instat,	/* IN */
+	arg,	/* ARG */
+	bltin,	/* BLTIN */
+	jump,	/* BREAK */
+	closefile,	/* CLOSE */
+	jump,	/* CONTINUE */
+	awkdelete,	/* DELETE */
+	dostat,	/* DO */
+	jump,	/* EXIT */
+	forstat,	/* FOR */
+	nullproc,	/* FUNC */
+	sub,	/* SUB */
+	gsub,	/* GSUB */
+	ifstat,	/* IF */
+	sindex,	/* INDEX */
+	nullproc,	/* LSUBSTR */
+	matchop,	/* MATCHFCN */
+	jump,	/* NEXT */
+	jump,	/* NEXTFILE */
+	arith,	/* ADD */
+	arith,	/* MINUS */
+	arith,	/* MULT */
+	arith,	/* DIVIDE */
+	arith,	/* MOD */
+	assign,	/* ASSIGN */
+	nullproc,	/* ASGNOP */
+	assign,	/* ADDEQ */
+	assign,	/* SUBEQ */
+	assign,	/* MULTEQ */
+	assign,	/* DIVEQ */
+	assign,	/* MODEQ */
+	assign,	/* POWEQ */
+	printstat,	/* PRINT */
+	awkprintf,	/* PRINTF */
+	awksprintf,	/* SPRINTF */
+	nullproc,	/* ELSE */
+	intest,	/* INTEST */
+	condexpr,	/* CONDEXPR */
+	incrdecr,	/* POSTINCR */
+	incrdecr,	/* PREINCR */
+	incrdecr,	/* POSTDECR */
+	incrdecr,	/* PREDECR */
+	nullproc,	/* VAR */
+	nullproc,	/* IVAR */
+	getnf,	/* VARNF */
+	call,	/* CALL */
+	nullproc,	/* NUMBER */
+	nullproc,	/* STRING */
+	nullproc,	/* REGEXPR */
+	awkgetline,	/* GETLINE */
+	jump,	/* RETURN */
+	split,	/* SPLIT */
+	substr,	/* SUBSTR */
+	whilestat,	/* WHILE */
+	cat,	/* CAT */
+	boolop,	/* NOT */
+	arith,	/* UMINUS */
+	arith,	/* UPLUS */
+	arith,	/* POWER */
+	nullproc,	/* DECR */
+	nullproc,	/* INCR */
+	indirect,	/* INDIRECT */
+	nullproc,	/* LASTTOKEN */
+};
+
+char *tokname(int n)
+{
+	static char buf[100];
+
+	if (n < FIRSTTOKEN || n > LASTTOKEN) {
+		sprintf(buf, "token %d", n);
+		return buf;
+	}
+	return printname[n-FIRSTTOKEN];
+}
diff --git a/proto.h b/proto.h
index 9a657ef73ec1..ad6f2e80a594 100644
--- a/proto.h
+++ b/proto.h
@@ -124,6 +124,7 @@ extern	void	setclvar(char *);
 extern	void	fldbld(void);
 extern	void	cleanfld(int, int);
 extern	void	newfld(int);
+extern	void	setlastfld(int);
 extern	int	refldbld(const char *, const char *);
 extern	void	recbld(void);
 extern	Cell	*fieldadr(int);
@@ -193,3 +194,5 @@ extern	Cell	*gsub(Node **, int);
 
 extern	FILE	*popen(const char *, const char *);
 extern	int	pclose(FILE *);
+
+extern  const char	*flags2str(int flags);
diff --git a/run.c b/run.c
index 5342fe0d0f8b..2dfb3e6c383d 100644
--- a/run.c
+++ b/run.c
@@ -31,6 +31,8 @@ THIS SOFTWARE.
 #include <string.h>
 #include <stdlib.h>
 #include <time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 #include "awk.h"
 #include "ytab.h"
 
@@ -71,23 +73,23 @@ extern	Awkfloat	srand_seed;
 Node	*winner = NULL;	/* root of parse tree */
 Cell	*tmps;		/* free temporary cells for execution */
 
-static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM };
+static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL };
 Cell	*True	= &truecell;
-static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM };
+static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL };
 Cell	*False	= &falsecell;
-static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM };
+static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL };
 Cell	*jbreak	= &breakcell;
-static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM };
+static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL };
 Cell	*jcont	= &contcell;
-static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM };
+static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL };
 Cell	*jnext	= &nextcell;
-static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM };
+static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL };
 Cell	*jnextfile	= &nextfilecell;
-static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM };
+static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL };
 Cell	*jexit	= &exitcell;
-static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM };
+static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL };
 Cell	*jret	= &retcell;
-static Cell	tempcell	={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE };
+static Cell	tempcell	={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
 
 Node	*curnode = NULL;	/* the node being executed, for debugging */
 
@@ -112,7 +114,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
 		if (rminlen)
 			minlen += quantum - rminlen;
 		tbuf = (char *) realloc(*pbuf, minlen);
-		dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
+		dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void *) *pbuf, (void *) tbuf) );
 		if (tbuf == NULL) {
 			if (whatrtn)
 				FATAL("out of memory in %s", whatrtn);
@@ -221,7 +223,7 @@ struct Frame *fp = NULL;	/* frame pointer. bottom level unused */
 
 Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
 {
-	static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE };
+	static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
 	int i, ncall, ndef;
 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
 	Node *x;
@@ -323,14 +325,18 @@ Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
 {
 	Cell *y;
 
+	/* copy is not constant or field */
+
 	y = gettemp();
+	y->tval = x->tval & ~(CON|FLD|REC);
 	y->csub = CCOPY;	/* prevents freeing until call is over */
 	y->nval = x->nval;	/* BUG? */
-	if (isstr(x))
+	if (isstr(x) /* || x->ctype == OCELL */) {
 		y->sval = tostring(x->sval);
+		y->tval &= ~DONTFREE;
+	} else
+		y->tval |= DONTFREE;
 	y->fval = x->fval;
-	y->tval = x->tval & ~(CON|FLD|REC|DONTFREE);	/* copy is not constant or field */
-							/* is DONTFREE right? */
 	return y;
 }
 
@@ -419,6 +425,10 @@ Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
 		} else if (a[0] != NULL) {	/* getline var <file */
 			x = execute(a[0]);
 			setsval(x, buf);
+			if (is_number(x->sval)) {
+				x->fval = atof(x->sval);
+				x->tval |= NUM;
+			}
 			tempfree(x);
 		} else {			/* getline <file */
 			setsval(fldtab[0], buf);
@@ -434,6 +444,10 @@ Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
 			n = getrec(&buf, &bufsize, 0);
 			x = execute(a[0]);
 			setsval(x, buf);
+			if (is_number(x->sval)) {
+				x->fval = atof(x->sval);
+				x->tval |= NUM;
+			}
 			tempfree(x);
 		}
 	}
@@ -456,7 +470,7 @@ Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
 	Node *np;
 	char *buf;
 	int bufsz = recsize;
-	int nsub = strlen(*SUBSEP);
+	int nsub;
 
 	if ((buf = (char *) malloc(bufsz)) == NULL)
 		FATAL("out of memory in array");
@@ -466,6 +480,7 @@ Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
 	for (np = a[1]; np; np = np->nnext) {
 		y = execute(np);	/* subscript */
 		s = getsval(y);
+		nsub = strlen(getsval(subseploc));
 		if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
 			FATAL("out of memory for %s[%s...]", x->nval, buf);
 		strcat(buf, s);
@@ -494,12 +509,12 @@ Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts *
 	Cell *x, *y;
 	Node *np;
 	char *s;
-	int nsub = strlen(*SUBSEP);
+	int nsub;
 
 	x = execute(a[0]);	/* Cell* for symbol table */
 	if (!isarr(x))
 		return True;
-	if (a[1] == NULL) {	/* delete the elements, not the table */
+	if (a[1] == 0) {	/* delete the elements, not the table */
 		freesymtab(x);
 		x->tval &= ~STR;
 		x->tval |= ARR;
@@ -513,9 +528,10 @@ Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts *
 		for (np = a[1]; np; np = np->nnext) {
 			y = execute(np);	/* subscript */
 			s = getsval(y);
+			nsub = strlen(getsval(subseploc));
 			if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
 				FATAL("out of memory deleting %s[%s...]", x->nval, buf);
-			strcat(buf, s);	
+			strcat(buf, s);
 			if (np->nnext)
 				strcat(buf, *SUBSEP);
 			tempfree(y);
@@ -534,7 +550,7 @@ Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
 	char *buf;
 	char *s;
 	int bufsz = recsize;
-	int nsub = strlen(*SUBSEP);
+	int nsub;
 
 	ap = execute(a[1]);	/* array name */
 	if (!isarr(ap)) {
@@ -552,6 +568,7 @@ Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
 	for (p = a[0]; p; p = p->nnext) {
 		x = execute(p);	/* expr */
 		s = getsval(x);
+		nsub = strlen(getsval(subseploc));
 		if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
 			FATAL("out of memory deleting %s[%s...]", x->nval, buf);
 		strcat(buf, s);
@@ -583,7 +600,7 @@ Cell *matchop(Node **a, int n)	/* ~ and match() */
 	}
 	x = execute(a[1]);	/* a[1] = target text */
 	s = getsval(x);
-	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
+	if (a[0] == 0)		/* a[1] == 0: already-compiled reg expr */
 		i = (*mf)((fa *) a[2], s);
 	else {
 		y = execute(a[2]);	/* a[2] = regular expr */
@@ -699,7 +716,7 @@ Cell *gettemp(void)	/* get a tempcell */
 			FATAL("out of space for temporaries");
 		for(i = 1; i < 100; i++)
 			tmps[i-1].cnext = &tmps[i];
-		tmps[i-1].cnext = NULL;
+		tmps[i-1].cnext = 0;
 	}
 	x = tmps;
 	tmps = x->cnext;
@@ -734,18 +751,18 @@ Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
 	int k, m, n;
 	char *s;
 	int temp;
-	Cell *x, *y, *z = NULL;
+	Cell *x, *y, *z = 0;
 
 	x = execute(a[0]);
 	y = execute(a[1]);
-	if (a[2] != NULL)
+	if (a[2] != 0)
 		z = execute(a[2]);
 	s = getsval(x);
 	k = strlen(s) + 1;
 	if (k <= 1) {
 		tempfree(x);
 		tempfree(y);
-		if (a[2] != NULL) {
+		if (a[2] != 0) {
 			tempfree(z);
 		}
 		x = gettemp();
@@ -758,7 +775,7 @@ Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
 	else if (m > k)
 		m = k;
 	tempfree(y);
-	if (a[2] != NULL) {
+	if (a[2] != 0) {
 		n = (int) getfval(z);
 		tempfree(z);
 	} else
@@ -817,6 +834,17 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like co
 	char *buf = *pbuf;
 	int bufsize = *pbufsize;
 
+	static int first = 1;
+	static int have_a_format = 0;
+
+	if (first) {
+		char buf[100];
+
+		sprintf(buf, "%a", 42.0);
+		have_a_format = (strcmp(buf, "0x1.5p+5") == 0);
+		first = 0;
+	}
+
 	os = s;
 	p = buf;
 	if ((fmt = (char *) malloc(fmtsz)) == NULL)
@@ -842,7 +870,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like co
 				FATAL("format item %.30s... ran format() out of memory", os);
 			if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L')
 				break;	/* the ansi panoply */
+			if (*s == '$') {
+				FATAL("'$' not permitted in awk formats");
+			}
 			if (*s == '*') {
+				if (a == NULL) {
+					FATAL("not enough args in printf(%s)", os);
+				}
 				x = execute(a);
 				a = a->nnext;
 				sprintf(t-1, "%d", fmtwd=(int) getfval(x));
@@ -857,8 +891,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like co
 		if (fmtwd < 0)
 			fmtwd = -fmtwd;
 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
-
 		switch (*s) {
+		case 'a': case 'A':
+			if (have_a_format)
+				flag = *s;
+			else
+				flag = 'f';
+			break;
 		case 'f': case 'e': case 'g': case 'E': case 'G':
 			flag = 'f';
 			break;
@@ -901,6 +940,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like co
 			p += strlen(p);
 			sprintf(p, "%s", t);
 			break;
+		case 'a':
+		case 'A':
 		case 'f':	sprintf(p, fmt, getfval(x)); break;
 		case 'd':	sprintf(p, fmt, (long) getfval(x)); break;
 		case 'u':	sprintf(p, fmt, (int) getfval(x)); break;
@@ -1003,7 +1044,7 @@ Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
 	x = execute(a[0]);
 	i = getfval(x);
 	tempfree(x);
-	if (n != UMINUS) {
+	if (n != UMINUS && n != UPLUS) {
 		y = execute(a[1]);
 		j = getfval(y);
 		tempfree(y);
@@ -1033,6 +1074,8 @@ Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
 	case UMINUS:
 		i = -i;
 		break;
+    case UPLUS: /* handled by getfval(), above */
+		break;
 	case POWER:
 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
 			i = ipow(i, (int) j);
@@ -1088,8 +1131,8 @@ Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
 	y = execute(a[1]);
 	x = execute(a[0]);
 	if (n == ASSIGN) {	/* ordinary assignment */
-		if (x == y && !(x->tval & (FLD|REC)))	/* self-assignment: */
-			;		/* leave alone unless it's a field */
+		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
+			;	/* self-assignment: leave alone unless it's a field or NF */
 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
 			setsval(x, getsval(y));
 			x->fval = getfval(y);
@@ -1146,25 +1189,26 @@ Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
 {
 	Cell *x, *y, *z;
 	int n1, n2;
-	char *s;
+	char *s = NULL;
+	int ssz = 0;
 
 	x = execute(a[0]);
+	n1 = strlen(getsval(x));
+	adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
+	(void) strncpy(s, x->sval, ssz);
+
 	y = execute(a[1]);
-	getsval(x);
-	getsval(y);
-	n1 = strlen(x->sval);
-	n2 = strlen(y->sval);
-	s = (char *) malloc(n1 + n2 + 1);
-	if (s == NULL)
-		FATAL("out of space concatenating %.15s... and %.15s...",
-			x->sval, y->sval);
-	strcpy(s, x->sval);
-	strcpy(s+n1, y->sval);
+	n2 = strlen(getsval(y));
+	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
+	(void) strncpy(s + n1, y->sval, ssz - n1);
+
 	tempfree(x);
 	tempfree(y);
+
 	z = gettemp();
 	z->sval = s;
 	z->tval = STR;
+
 	return(z);
 }
 
@@ -1172,7 +1216,7 @@ Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
 {
 	Cell *x;
 
-	if (a[0] == NULL)
+	if (a[0] == 0)
 		x = execute(a[1]);
 	else {
 		x = execute(a[0]);
@@ -1209,20 +1253,22 @@ Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
 
 Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
 {
-	Cell *x = NULL, *y, *ap;
+	Cell *x = 0, *y, *ap;
 	char *s, *origs;
+	char *fs, *origfs = NULL;
 	int sep;
-	char *t, temp, num[50], *fs = NULL;
+	char *t, temp, num[50];
 	int n, tempstat, arg3type;
 
 	y = execute(a[0]);	/* source string */
 	origs = s = strdup(getsval(y));
 	arg3type = ptoi(a[3]);
-	if (a[2] == NULL)		/* fs string */
-		fs = *FS;
+	if (a[2] == 0)		/* fs string */
+		fs = getsval(fsloc);
 	else if (arg3type == STRING) {	/* split(str,arr,"string") */
 		x = execute(a[2]);
-		fs = getsval(x);
+		origfs = fs = strdup(getsval(x));
+		tempfree(x);
 	} else if (arg3type == REGEXPR)
 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
 	else
@@ -1337,9 +1383,7 @@ Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
 	tempfree(ap);
 	tempfree(y);
 	free(origs);
-	if (a[2] != NULL && arg3type == STRING) {
-		tempfree(x);
-	}
+	free(origfs);
 	x = gettemp();
 	x->tval = NUM;
 	x->fval = n;
@@ -1369,7 +1413,7 @@ Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
 	if (istrue(x)) {
 		tempfree(x);
 		x = execute(a[1]);
-	} else if (a[2] != NULL) {
+	} else if (a[2] != 0) {
 		tempfree(x);
 		x = execute(a[2]);
 	}
@@ -1421,7 +1465,7 @@ Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
 	x = execute(a[0]);
 	tempfree(x);
 	for (;;) {
-		if (a[1]!=NULL) {
+		if (a[1]!=0) {
 			x = execute(a[1]);
 			if (!istrue(x)) return(x);
 			else tempfree(x);
@@ -1479,6 +1523,7 @@ Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg lis
 	Node *nextarg;
 	FILE *fp;
 	void flush_all(void);
+	int status = 0;
 
 	t = ptoi(a[0]);
 	x = execute(a[1]);
@@ -1503,7 +1548,7 @@ Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg lis
 	case FCOS:
 		u = cos(getfval(x)); break;
 	case FATAN:
-		if (nextarg == NULL) {
+		if (nextarg == 0) {
 			WARNING("atan2 requires two arguments; returning 1.0");
 			u = 1.0;
 		} else {
@@ -1515,7 +1560,20 @@ Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg lis
 		break;
 	case FSYSTEM:
 		fflush(stdout);		/* in case something is buffered already */
-		u = (Awkfloat) system(getsval(x)) / 256;   /* 256 is unix-dep */
+		status = system(getsval(x));
+		u = status;
+		if (status != -1) {
+			if (WIFEXITED(status)) {
+				u = WEXITSTATUS(status);
+			} else if (WIFSIGNALED(status)) {
+				u = WTERMSIG(status) + 256;
+#ifdef WCOREDUMP
+				if (WCOREDUMP(status))
+					u += 256;
+#endif
+			} else	/* something else?!? */
+				u = 0;
+		}
 		break;
 	case FRAND:
 		/* in principle, rand() returns something in 0..RAND_MAX */
@@ -1564,7 +1622,7 @@ Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg lis
 	tempfree(x);
 	x = gettemp();
 	setfval(x, u);
-	if (nextarg != NULL) {
+	if (nextarg != 0) {
 		WARNING("warning: function has too many arguments");
 		for ( ; nextarg; nextarg = nextarg->nnext)
 			execute(nextarg);
@@ -1578,7 +1636,7 @@ Cell *printstat(Node **a, int n)	/* print a[0] */
 	Cell *y;
 	FILE *fp;
 
-	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
+	if (a[1] == 0)	/* a[1] is redirection operator, a[2] is file */
 		fp = stdout;
 	else
 		fp = redirect(ptoi(a[1]), a[2]);
@@ -1587,11 +1645,11 @@ Cell *printstat(Node **a, int n)	/* print a[0] */
 		fputs(getpssval(y), fp);
 		tempfree(y);
 		if (x->nnext == NULL)
-			fputs(*ORS, fp);
+			fputs(getsval(orsloc), fp);
 		else
-			fputs(*OFS, fp);
+			fputs(getsval(ofsloc), fp);
 	}
-	if (a[1] != NULL)
+	if (a[1] != 0)
 		fflush(fp);
 	if (ferror(fp))
 		FATAL("write error on %s", filename(fp));
@@ -1600,8 +1658,6 @@ Cell *printstat(Node **a, int n)	/* print a[0] */
 
 Cell *nullproc(Node **a, int n)
 {
-	n = n;
-	a = a;
 	return 0;
 }
 
@@ -1650,7 +1706,7 @@ FILE *openfile(int a, const char *us)
 {
 	const char *s = us;
 	int i, m;
-	FILE *fp = NULL;
+	FILE *fp = 0;
 
 	if (*s == '\0')
 		FATAL("null file name in print or getline");
@@ -1665,7 +1721,7 @@ FILE *openfile(int a, const char *us)
 		return NULL;
 
 	for (i=0; i < nfiles; i++)
-		if (files[i].fp == NULL)
+		if (files[i].fp == 0)
 			break;
 	if (i >= nfiles) {
 		struct files *nf;
@@ -1715,7 +1771,6 @@ Cell *closefile(Node **a, int n)
 	Cell *x;
 	int i, stat;
 
-	n = n;
 	x = execute(a[0]);
 	getsval(x);
 	stat = -1;
@@ -1782,7 +1837,7 @@ Cell *sub(Node **a, int nnn)	/* substitute command */
 		FATAL("out of memory in sub");
 	x = execute(a[3]);	/* target string */
 	t = getsval(x);
-	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
+	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
 		pfa = (fa *) a[1];	/* regular expression */
 	else {
 		y = execute(a[1]);
@@ -1822,7 +1877,7 @@ Cell *sub(Node **a, int nnn)	/* substitute command */
 		if (pb > buf + bufsz)
 			FATAL("sub result2 %.30s too big; can't happen", buf);
 		setsval(x, buf);	/* BUG: should be able to avoid copy */
-		result = True;
+		result = True;;
 	}
 	tempfree(x);
 	tempfree(y);
@@ -1845,7 +1900,7 @@ Cell *gsub(Node **a, int nnn)	/* global substitute */
 	num = 0;
 	x = execute(a[3]);	/* target string */
 	t = getsval(x);
-	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
+	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
 		pfa = (fa *) a[1];	/* regular expression */
 	else {
 		y = execute(a[1]);
diff --git a/tran.c b/tran.c
index e364ebd81928..d1dfe2b2f176 100644
--- a/tran.c
+++ b/tran.c
@@ -55,10 +55,14 @@ Cell	*fsloc;		/* FS */
 Cell	*nrloc;		/* NR */
 Cell	*nfloc;		/* NF */
 Cell	*fnrloc;	/* FNR */
+Cell	*ofsloc;	/* OFS */
+Cell	*orsloc;	/* ORS */
+Cell	*rsloc;		/* RS */
 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
 Cell	*rstartloc;	/* RSTART */
 Cell	*rlengthloc;	/* RLENGTH */
+Cell	*subseploc;	/* SUBSEP */
 Cell	*symtabloc;	/* SYMTAB */
 
 Cell	*nullloc;	/* a guaranteed empty cell */
@@ -67,6 +71,18 @@ Cell	*literal0;
 
 extern Cell **fldtab;
 
+static void
+setfree(Cell *vp)
+{
+	if (&vp->sval == FS || &vp->sval == RS ||
+	    &vp->sval == OFS || &vp->sval == ORS ||
+	    &vp->sval == OFMT || &vp->sval == CONVFMT ||
+	    &vp->sval == FILENAME || &vp->sval == SUBSEP)
+		vp->tval |= DONTFREE;
+	else
+		vp->tval &= ~DONTFREE;
+}
+
 void syminit(void)	/* initialize symbol table with builtin vars */
 {
 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
@@ -76,9 +92,12 @@ void syminit(void)	/* initialize symbol table with builtin vars */
 
 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
 	FS = &fsloc->sval;
-	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
-	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
-	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
+	rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
+	RS = &rsloc->sval;
+	ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
+	OFS = &ofsloc->sval;
+	orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
+	ORS = &orsloc->sval;
 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
@@ -88,7 +107,8 @@ void syminit(void)	/* initialize symbol table with builtin vars */
 	NR = &nrloc->fval;
 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
 	FNR = &fnrloc->fval;
-	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
+	subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
+	SUBSEP = &subseploc->sval;
 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
 	RSTART = &rstartloc->fval;
 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
@@ -174,7 +194,7 @@ void freesymtab(Cell *ap)	/* free a symbol table */
 			free(cp); 
 			tp->nelem--;
 		}
-		tp->tab[i] = NULL;
+		tp->tab[i] = 0;
 	}
 	if (tp->nelem != 0)
 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
@@ -282,6 +302,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
 {
 	int fldno;
 
+	f += 0.0;		/* normalise negative zero to positive zero */
 	if ((vp->tval & (NUM | STR)) == 0) 
 		funnyvar(vp, "assign to");
 	if (isfld(vp)) {
@@ -290,13 +311,21 @@ Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
 		if (fldno > *NF)
 			newfld(fldno);
 		   dprintf( ("setting field %d to %g\n", fldno, f) );
+	} else if (&vp->fval == NF) {
+		donerec = 0;	/* mark $0 invalid */
+		setlastfld(f);
+		dprintf( ("setting NF to %g\n", f) );
 	} else if (isrec(vp)) {
 		donefld = 0;	/* mark $1... invalid */
 		donerec = 1;
+	} else if (vp == ofsloc) {
+		if (donerec == 0)
+			recbld();
 	}
 	if (freeable(vp))
 		xfree(vp->sval); /* free any previous string */
-	vp->tval &= ~STR;	/* mark string invalid */
+	vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
+	vp->fmt = NULL;
 	vp->tval |= NUM;	/* mark number ok */
 	if (f == -0)  /* who would have thought this possible? */
 		f = 0;
@@ -318,6 +347,7 @@ char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
 {
 	char *t;
 	int fldno;
+	Awkfloat f;
 
 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", 
 		(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
@@ -328,20 +358,32 @@ char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
 		fldno = atoi(vp->nval);
 		if (fldno > *NF)
 			newfld(fldno);
-		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
+		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, (void *) s) );
 	} else if (isrec(vp)) {
 		donefld = 0;	/* mark $1... invalid */
 		donerec = 1;
+	} else if (vp == ofsloc) {
+		if (donerec == 0)
+			recbld();
 	}
-	t = tostring(s);	/* in case it's self-assign */
+	t = s ? tostring(s) : tostring("");	/* in case it's self-assign */
 	if (freeable(vp))
 		xfree(vp->sval);
-	vp->tval &= ~NUM;
+	vp->tval &= ~(NUM|CONVC|CONVO);
 	vp->tval |= STR;
-	vp->tval &= ~DONTFREE;
+	vp->fmt = NULL;
+	setfree(vp);
 	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", 
-		(void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
-	return(vp->sval = t);
+		(void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) );
+	vp->sval = t;
+	if (&vp->fval == NF) {
+		donerec = 0;	/* mark $0 invalid */
+		f = getfval(vp);
+		setlastfld(f);
+		dprintf( ("setting NF to %g\n", f) );
+	}
+
+	return(vp->sval);
 }
 
 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
@@ -364,7 +406,7 @@ Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
 
 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
 {
-	char s[100];	/* BUG: unchecked */
+	char s[256];
 	double dtemp;
 
 	if ((vp->tval & (NUM | STR)) == 0)
@@ -373,19 +415,80 @@ static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cel
 		fldbld();
 	else if (isrec(vp) && donerec == 0)
 		recbld();
+
+	/*
+	 * ADR: This is complicated and more fragile than is desirable.
+	 * Retrieving a string value for a number associates the string
+	 * value with the scalar.  Previously, the string value was
+	 * sticky, meaning if converted via OFMT that became the value
+	 * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
+	 * changed after a string value was retrieved, the original value
+	 * was maintained and used.  Also not per POSIX.
+	 *
+	 * We work around this design by adding two additional flags,
+	 * CONVC and CONVO, indicating how the string value was
+	 * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
+	 * of the pointer to the xFMT format string used for the
+	 * conversion.  This pointer is only read, **never** dereferenced.
+	 * The next time we do a conversion, if it's coming from the same
+	 * xFMT as last time, and the pointer value is different, we
+	 * know that the xFMT format string changed, and we need to
+	 * redo the conversion. If it's the same, we don't have to.
+	 *
+	 * There are also several cases where we don't do a conversion,
+	 * such as for a field (see the checks below).
+	 */
+
+	/* Don't duplicate the code for actually updating the value */
+#define update_str_val(vp) \
+	{ \
+		if (freeable(vp)) \
+			xfree(vp->sval); \
+		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */ \
+			snprintf(s, sizeof (s), "%.30g", vp->fval); \
+		else \
+			snprintf(s, sizeof (s), *fmt, vp->fval); \
+		vp->sval = tostring(s); \
+		vp->tval &= ~DONTFREE; \
+		vp->tval |= STR; \
+	}
+
 	if (isstr(vp) == 0) {
-		if (freeable(vp))
-			xfree(vp->sval);
-		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
-			sprintf(s, "%.30g", vp->fval);
-		else
-			sprintf(s, *fmt, vp->fval);
-		vp->sval = tostring(s);
-		vp->tval &= ~DONTFREE;
-		vp->tval |= STR;
+		update_str_val(vp);
+		if (fmt == OFMT) {
+			vp->tval &= ~CONVC;
+			vp->tval |= CONVO;
+		} else {
+			/* CONVFMT */
+			vp->tval &= ~CONVO;
+			vp->tval |= CONVC;
+		}
+		vp->fmt = *fmt;
+	} else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) {
+		goto done;
+	} else if (isstr(vp)) {
+		if (fmt == OFMT) {
+			if ((vp->tval & CONVC) != 0
+			    || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
+				update_str_val(vp);
+				vp->tval &= ~CONVC;
+				vp->tval |= CONVO;
+				vp->fmt = *fmt;
+			}
+		} else {
+			/* CONVFMT */
+			if ((vp->tval & CONVO) != 0
+			    || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
+				update_str_val(vp);
+				vp->tval &= ~CONVO;
+				vp->tval |= CONVC;
+				vp->fmt = *fmt;
+			}
+		}
 	}
+done:
 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
-		(void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
+		(void*)vp, NN(vp->nval), vp->sval, (void *) vp->sval, vp->tval) );
 	return(vp->sval);
 }
 
@@ -457,3 +560,37 @@ char *qstring(const char *is, int delim)	/* collect string up to next delim */
 	*bp++ = 0;
 	return (char *) buf;
 }
+
+const char *flags2str(int flags)
+{
+	static const struct ftab {
+		const char *name;
+		int value;
+	} flagtab[] = {
+		{ "NUM", NUM },
+		{ "STR", STR },
+		{ "DONTFREE", DONTFREE },
+		{ "CON", CON },
+		{ "ARR", ARR },
+		{ "FCN", FCN },
+		{ "FLD", FLD },
+		{ "REC", REC },
+		{ "CONVC", CONVC },
+		{ "CONVO", CONVO },
+		{ NULL, 0 }
+	};
+	static char buf[100];
+	int i;
+	char *cp = buf;
+
+	for (i = 0; flagtab[i].name != NULL; i++) {
+		if ((flags & flagtab[i].value) != 0) {
+			if (cp > buf)
+				*cp++ = '|';
+			strcpy(cp, flagtab[i].name);
+			cp += strlen(cp);
+		}
+	}
+
+	return buf;
+}