aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/one-true-awk/ChangeLog24
-rw-r--r--contrib/one-true-awk/FIXES14
-rw-r--r--contrib/one-true-awk/awk.152
-rw-r--r--contrib/one-true-awk/awk.h8
-rw-r--r--contrib/one-true-awk/awkgram.y20
-rw-r--r--contrib/one-true-awk/b.c34
-rwxr-xr-xcontrib/one-true-awk/bugs-fixed/REGRESS2
-rw-r--r--contrib/one-true-awk/bugs-fixed/system-status.ok23
-rw-r--r--contrib/one-true-awk/lex.c9
-rw-r--r--contrib/one-true-awk/main.c2
-rw-r--r--contrib/one-true-awk/maketab.c1
-rw-r--r--contrib/one-true-awk/parse.c23
-rw-r--r--contrib/one-true-awk/proto.h3
-rw-r--r--contrib/one-true-awk/run.c244
14 files changed, 44 insertions, 395 deletions
diff --git a/contrib/one-true-awk/ChangeLog b/contrib/one-true-awk/ChangeLog
index dea4ed7e3187..6ce9417c10da 100644
--- a/contrib/one-true-awk/ChangeLog
+++ b/contrib/one-true-awk/ChangeLog
@@ -47,30 +47,6 @@
* test/T.lilly: Remove gawk warnings from output, improves
portability.
-2019-10-17 Arnold D. Robbins <arnold@skeeve.com>
-
- Pull in systime() and strftime() from the NetBSD awk.
-
- * awk.1: Document the functions.
- * run.c (bltin): Implement the functions.
- * awk.h: Add defines for systime and strftime.
- * lex.c: Add support for systime and strftime.
-
-2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
-
- Integrate features from different *BSD versions of awk.
- Gensub support from NetBSD. Bitwise functions from OpenBSD.
-
- * awk.h: Add defines for and, or, xor, compl, lshift and rshift.
- * awkgram.y: Add support for gensub.
- * maketab.c: Ditto.
- * lex.c: Add support for gensub and bitwise functions.
- * parse.c (node5, op5): New functions.
- * proto.h (node5, op5): New declarations.
- * run.c (bltin): Implement the bitwise functions.
- (gensub): New function.
- * awk.1: Document additional functions.
-
2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
* b.c (fnematch): Change type of pbuf from unsigned char to char.
diff --git a/contrib/one-true-awk/FIXES b/contrib/one-true-awk/FIXES
index a043b356fafa..c4eef3bd8ea0 100644
--- a/contrib/one-true-awk/FIXES
+++ b/contrib/one-true-awk/FIXES
@@ -25,6 +25,20 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the
second edition of the AWK book was published in September 2023.
+Apr 22, 2024:
+ fixed regex engine gototab reallocation issue that was
+ introduced during the Nov 24 rewrite. Thanks to Arnold Robbins.
+ Fixed a scan bug in split in the case the separator is a single
+ character. thanks to Oguz Ismail for spotting the issue.
+
+Mar 10, 2024:
+ fixed use-after-free bug in fnematch due to adjbuf invalidating
+ the pointers to buf. thanks to github user caffe3 for spotting
+ the issue and providing a fix, and to Miguel Pineiro Jr.
+ for the alternative fix.
+ MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max.
+ thanks to Miguel Pineiro Jr.
+
Jan 22, 2024:
Restore the ability to compile with g++. Thanks to
Arnold Robbins.
diff --git a/contrib/one-true-awk/awk.1 b/contrib/one-true-awk/awk.1
index 496a2a652379..ef40a0104468 100644
--- a/contrib/one-true-awk/awk.1
+++ b/contrib/one-true-awk/awk.1
@@ -305,25 +305,6 @@ and
.B gsub
return the number of replacements.
.TP
-\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR
-replaces instances of
-.I pat
-in
-.I target
-with
-.IR repl .
-If
-.I how
-is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise,
-.I how
-is a number indicating which occurrence to replace. If no
-.IR target ,
-use
-.BR $0 .
-Return the resulting string;
-.I target
-is not modified.
-.TP
.BI sprintf( fmt , " expr" , " ...\fB)
the string resulting from formatting
.I expr ...
@@ -332,28 +313,6 @@ according to the
format
.IR fmt .
.TP
-.B systime()
-returns the current date and time as a standard
-``seconds since the epoch'' value.
-.TP
-.BI strftime( fmt ", " timestamp\^ )
-formats
-.I timestamp
-(a value in seconds since the epoch)
-according to
-.IR fmt ,
-which is a format string as supported by
-.IR strftime (3).
-Both
-.I timestamp
-and
-.I fmt
-may be omitted; if no
-.IR timestamp ,
-the current time of day is used, and if no
-.IR fmt ,
-a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used.
-.TP
.BI system( cmd )
executes
.I cmd
@@ -413,17 +372,6 @@ In all cases,
returns 1 for a successful input,
0 for end of file, and \-1 for an error.
.PP
-The functions
-.BR compl ,
-.BR and ,
-.BR or ,
-.BR xor ,
-.BR lshift ,
-and
-.B rshift
-peform the corresponding bitwise operations on their
-operands, which are first truncated to integer.
-.PP
Patterns are arbitrary Boolean combinations
(with
.BR "! || &&" )
diff --git a/contrib/one-true-awk/awk.h b/contrib/one-true-awk/awk.h
index 740447ee2167..76180e47f16a 100644
--- a/contrib/one-true-awk/awk.h
+++ b/contrib/one-true-awk/awk.h
@@ -154,14 +154,6 @@ extern Cell *symtabloc; /* SYMTAB */
#define FTOUPPER 12
#define FTOLOWER 13
#define FFLUSH 14
-#define FAND 15
-#define FFOR 16
-#define FXOR 17
-#define FCOMPL 18
-#define FLSHIFT 19
-#define FRSHIFT 20
-#define FSYSTIME 21
-#define FSTRFTIME 22
/* Node: parse tree is made of nodes, with Cell's at bottom */
diff --git a/contrib/one-true-awk/awkgram.y b/contrib/one-true-awk/awkgram.y
index 233253a4307b..db804e117e19 100644
--- a/contrib/one-true-awk/awkgram.y
+++ b/contrib/one-true-awk/awkgram.y
@@ -53,7 +53,7 @@ Node *arglist = 0; /* list of args for current function */
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
-%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
+%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
%token <i> ADD MINUS MULT DIVIDE MOD
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
%token <i> PRINT PRINTF SPRINTF
@@ -377,24 +377,6 @@ term:
| INCR var { $$ = op1(PREINCR, $2); }
| var DECR { $$ = op1(POSTDECR, $1); }
| var INCR { $$ = op1(POSTINCR, $1); }
- | GENSUB '(' reg_expr comma pattern comma pattern ')'
- { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
- | GENSUB '(' pattern comma pattern comma pattern ')'
- { if (constnode($3)) {
- $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
- free($3);
- } else
- $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
- }
- | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
- { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
- | GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
- { if (constnode($3)) {
- $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
- free($3);
- } else
- $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
- }
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
diff --git a/contrib/one-true-awk/b.c b/contrib/one-true-awk/b.c
index 4c438fab4cd4..f650269753c7 100644
--- a/contrib/one-true-awk/b.c
+++ b/contrib/one-true-awk/b.c
@@ -651,8 +651,8 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implem
if (tab->inuse + 1 >= tab->allocated)
resize_gototab(f, state);
- f->gototab[state].entries[f->gototab[state].inuse-1].ch = ch;
- f->gototab[state].entries[f->gototab[state].inuse-1].state = val;
+ f->gototab[state].entries[f->gototab[state].inuse].ch = ch;
+ f->gototab[state].entries[f->gototab[state].inuse].state = val;
f->gototab[state].inuse++;
return val;
} else {
@@ -677,9 +677,9 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implem
gtt *tab = & f->gototab[state];
if (tab->inuse + 1 >= tab->allocated)
resize_gototab(f, state);
- ++tab->inuse;
f->gototab[state].entries[tab->inuse].ch = ch;
f->gototab[state].entries[tab->inuse].state = val;
+ ++tab->inuse;
qsort(f->gototab[state].entries,
f->gototab[state].inuse, sizeof(gtte), entry_cmp);
@@ -830,8 +830,6 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
}
-#define MAX_UTF_BYTES 4 // UTF-8 is up to 4 bytes long
-
/*
* NAME
* fnematch
@@ -868,16 +866,28 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
do {
/*
- * Call u8_rune with at least MAX_UTF_BYTES ahead in
+ * Call u8_rune with at least awk_mb_cur_max ahead in
* the buffer until EOF interferes.
*/
- if (k - j < MAX_UTF_BYTES) {
- if (k + MAX_UTF_BYTES > buf + bufsize) {
+ if (k - j < awk_mb_cur_max) {
+ if (k + awk_mb_cur_max > buf + bufsize) {
+ char *obuf = buf;
adjbuf((char **) &buf, &bufsize,
- bufsize + MAX_UTF_BYTES,
+ bufsize + awk_mb_cur_max,
quantum, 0, "fnematch");
+
+ /* buf resized, maybe moved. update pointers */
+ *pbufsize = bufsize;
+ if (obuf != buf) {
+ i = buf + (i - obuf);
+ j = buf + (j - obuf);
+ k = buf + (k - obuf);
+ *pbuf = buf;
+ if (patlen)
+ patbeg = buf + (patbeg - obuf);
+ }
}
- for (n = MAX_UTF_BYTES ; n > 0; n--) {
+ for (n = awk_mb_cur_max ; n > 0; n--) {
*k++ = (c = getc(f)) != EOF ? c : 0;
if (c == EOF) {
if (ferror(f))
@@ -914,10 +924,6 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
s = 2;
} while (1);
- /* adjbuf() may have relocated a resized buffer. Inform the world. */
- *pbuf = buf;
- *pbufsize = bufsize;
-
if (patlen) {
/*
* Under no circumstances is the last character fed to
diff --git a/contrib/one-true-awk/bugs-fixed/REGRESS b/contrib/one-true-awk/bugs-fixed/REGRESS
index acdbeebb6271..30bdc7cd5c0f 100755
--- a/contrib/one-true-awk/bugs-fixed/REGRESS
+++ b/contrib/one-true-awk/bugs-fixed/REGRESS
@@ -27,6 +27,6 @@ do
then
rm -f $OUT
else
- echo '++++ $i failed!'
+ echo "+++ $i failed!"
fi
done
diff --git a/contrib/one-true-awk/bugs-fixed/system-status.ok2 b/contrib/one-true-awk/bugs-fixed/system-status.ok2
new file mode 100644
index 000000000000..f1f631e1cb33
--- /dev/null
+++ b/contrib/one-true-awk/bugs-fixed/system-status.ok2
@@ -0,0 +1,3 @@
+normal status 42
+death by signal status 257
+death by signal with core dump status 262
diff --git a/contrib/one-true-awk/lex.c b/contrib/one-true-awk/lex.c
index 141cc81d2b59..0473a338c906 100644
--- a/contrib/one-true-awk/lex.c
+++ b/contrib/one-true-awk/lex.c
@@ -47,11 +47,9 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "BEGIN", XBEGIN, XBEGIN },
{ "END", XEND, XEND },
{ "NF", VARNF, VARNF },
- { "and", FAND, BLTIN },
{ "atan2", FATAN, BLTIN },
{ "break", BREAK, BREAK },
{ "close", CLOSE, CLOSE },
- { "compl", FCOMPL, BLTIN },
{ "continue", CONTINUE, CONTINUE },
{ "cos", FCOS, BLTIN },
{ "delete", DELETE, DELETE },
@@ -63,7 +61,6 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "for", FOR, FOR },
{ "func", FUNC, FUNC },
{ "function", FUNC, FUNC },
- { "gensub", GENSUB, GENSUB },
{ "getline", GETLINE, GETLINE },
{ "gsub", GSUB, GSUB },
{ "if", IF, IF },
@@ -72,30 +69,24 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "int", FINT, BLTIN },
{ "length", FLENGTH, BLTIN },
{ "log", FLOG, BLTIN },
- { "lshift", FLSHIFT, BLTIN },
{ "match", MATCHFCN, MATCHFCN },
{ "next", NEXT, NEXT },
{ "nextfile", NEXTFILE, NEXTFILE },
- { "or", FFOR, BLTIN },
{ "print", PRINT, PRINT },
{ "printf", PRINTF, PRINTF },
{ "rand", FRAND, BLTIN },
{ "return", RETURN, RETURN },
- { "rshift", FRSHIFT, BLTIN },
{ "sin", FSIN, BLTIN },
{ "split", SPLIT, SPLIT },
{ "sprintf", SPRINTF, SPRINTF },
{ "sqrt", FSQRT, BLTIN },
{ "srand", FSRAND, BLTIN },
- { "strftime", FSTRFTIME, BLTIN },
{ "sub", SUB, SUB },
{ "substr", SUBSTR, SUBSTR },
{ "system", FSYSTEM, BLTIN },
- { "systime", FSYSTIME, BLTIN },
{ "tolower", FTOLOWER, BLTIN },
{ "toupper", FTOUPPER, BLTIN },
{ "while", WHILE, WHILE },
- { "xor", FXOR, BLTIN },
};
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
diff --git a/contrib/one-true-awk/main.c b/contrib/one-true-awk/main.c
index 73af89ec1058..0e70288a92f3 100644
--- a/contrib/one-true-awk/main.c
+++ b/contrib/one-true-awk/main.c
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20240122";
+const char *version = "version 20240422";
#define DEBUG
#include <stdio.h>
diff --git a/contrib/one-true-awk/maketab.c b/contrib/one-true-awk/maketab.c
index 3a80c87725ac..3747efa03702 100644
--- a/contrib/one-true-awk/maketab.c
+++ b/contrib/one-true-awk/maketab.c
@@ -104,7 +104,6 @@ struct xx
{ ARG, "arg", "arg" },
{ VARNF, "getnf", "NF" },
{ GETLINE, "awkgetline", "getline" },
- { GENSUB, "gensub", "gensub" },
{ 0, "", "" },
};
diff --git a/contrib/one-true-awk/parse.c b/contrib/one-true-awk/parse.c
index 2b7fd1928930..14608be7570a 100644
--- a/contrib/one-true-awk/parse.c
+++ b/contrib/one-true-awk/parse.c
@@ -93,20 +93,6 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
-Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
-{
- Node *x;
-
- x = nodealloc(5);
- x->nobj = a;
- x->narg[0] = b;
- x->narg[1] = c;
- x->narg[2] = d;
- x->narg[3] = e;
- x->narg[4] = f;
- return(x);
-}
-
Node *stat1(int a, Node *b)
{
Node *x;
@@ -179,15 +165,6 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
-Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
-{
- Node *x;
-
- x = node5(a,b,c,d,e,f);
- x->ntype = NEXPR;
- return(x);
-}
-
Node *celltonode(Cell *a, int b)
{
Node *x;
diff --git a/contrib/one-true-awk/proto.h b/contrib/one-true-awk/proto.h
index b44f9e7a5599..ed63e7875da3 100644
--- a/contrib/one-true-awk/proto.h
+++ b/contrib/one-true-awk/proto.h
@@ -73,14 +73,12 @@ extern Node *node1(int, Node *);
extern Node *node2(int, Node *, Node *);
extern Node *node3(int, Node *, Node *, Node *);
extern Node *node4(int, Node *, Node *, Node *, Node *);
-extern Node *node5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat3(int, Node *, Node *, Node *);
extern Node *op2(int, Node *, Node *);
extern Node *op1(int, Node *);
extern Node *stat1(int, Node *);
extern Node *op3(int, Node *, Node *, Node *);
extern Node *op4(int, Node *, Node *, Node *, Node *);
-extern Node *op5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat2(int, Node *, Node *);
extern Node *stat4(int, Node *, Node *, Node *, Node *);
extern Node *celltonode(Cell *, int);
@@ -199,7 +197,6 @@ extern const char *filename(FILE *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
extern Cell *dosub(Node **, int);
-extern Cell *gensub(Node **, int);
extern FILE *popen(const char *, const char *);
extern int pclose(FILE *);
diff --git a/contrib/one-true-awk/run.c b/contrib/one-true-awk/run.c
index ede3ba7d9827..99306992df41 100644
--- a/contrib/one-true-awk/run.c
+++ b/contrib/one-true-awk/run.c
@@ -1827,7 +1827,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
for (;;) {
n++;
t = s;
- while (*s != sep && *s != '\n' && *s != '\0')
+ while (*s != sep && *s != '\0')
s++;
temp = *s;
setptr(s, '\0');
@@ -2062,14 +2062,12 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
{
Cell *x, *y;
Awkfloat u;
- int t, sz;
+ int t;
Awkfloat tmp;
- char *buf, *fmt;
+ char *buf;
Node *nextarg;
FILE *fp;
int status = 0;
- time_t tv;
- struct tm *tm;
int estatus = 0;
t = ptoi(a[0]);
@@ -2111,64 +2109,6 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
nextarg = nextarg->nnext;
}
break;
- case FCOMPL:
- u = ~((int)getfval(x));
- break;
- case FAND:
- if (nextarg == 0) {
- WARNING("and requires two arguments; returning 0");
- u = 0;
- break;
- }
- y = execute(a[1]->nnext);
- u = ((int)getfval(x)) & ((int)getfval(y));
- tempfree(y);
- nextarg = nextarg->nnext;
- break;
- case FFOR:
- if (nextarg == 0) {
- WARNING("or requires two arguments; returning 0");
- u = 0;
- break;
- }
- y = execute(a[1]->nnext);
- u = ((int)getfval(x)) | ((int)getfval(y));
- tempfree(y);
- nextarg = nextarg->nnext;
- break;
- case FXOR:
- if (nextarg == 0) {
- WARNING("xor requires two arguments; returning 0");
- u = 0;
- break;
- }
- y = execute(a[1]->nnext);
- u = ((int)getfval(x)) ^ ((int)getfval(y));
- tempfree(y);
- nextarg = nextarg->nnext;
- break;
- case FLSHIFT:
- if (nextarg == 0) {
- WARNING("lshift requires two arguments; returning 0");
- u = 0;
- break;
- }
- y = execute(a[1]->nnext);
- u = ((int)getfval(x)) << ((int)getfval(y));
- tempfree(y);
- nextarg = nextarg->nnext;
- break;
- case FRSHIFT:
- if (nextarg == 0) {
- WARNING("rshift requires two arguments; returning 0");
- u = 0;
- break;
- }
- y = execute(a[1]->nnext);
- u = ((int)getfval(x)) >> ((int)getfval(y));
- tempfree(y);
- nextarg = nextarg->nnext;
- break;
case FSYSTEM:
fflush(stdout); /* in case something is buffered already */
estatus = status = system(getsval(x));
@@ -2223,41 +2163,6 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
else
u = fflush(fp);
break;
- case FSYSTIME:
- u = time((time_t *) 0);
- break;
- case FSTRFTIME:
- /* strftime([format [,timestamp]]) */
- if (nextarg) {
- y = execute(nextarg);
- nextarg = nextarg->nnext;
- tv = (time_t) getfval(y);
- tempfree(y);
- } else
- tv = time((time_t *) 0);
- tm = localtime(&tv);
- if (tm == NULL)
- FATAL("bad time %ld", (long)tv);
-
- if (isrec(x)) {
- /* format argument not provided, use default */
- fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
- } else
- fmt = tostring(getsval(x));
-
- sz = 32;
- buf = NULL;
- do {
- if ((buf = realloc(buf, (sz *= 2))) == NULL)
- FATAL("out of memory in strftime");
- } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
-
- y = gettemp();
- setsval(y, buf);
- free(fmt);
- free(buf);
-
- return y;
default: /* can't happen */
FATAL("illegal function type %d", t);
break;
@@ -2501,7 +2406,7 @@ void backsub(char **pb_ptr, const char **sptr_ptr);
Cell *dosub(Node **a, int subop) /* sub and gsub */
{
fa *pfa;
- int tempstat;
+ int tempstat = 0;
char *repl;
Cell *x;
@@ -2637,147 +2542,6 @@ next_search:
return x;
}
-Cell *gensub(Node **a, int nnn) /* global selective substitute */
- /* XXX incomplete - doesn't support backreferences \0 ... \9 */
-{
- Cell *x, *y, *res, *h;
- char *rptr;
- const char *sptr;
- char *buf, *pb;
- const char *t, *q;
- fa *pfa;
- int mflag, tempstat, num, whichm;
- int bufsz = recsize;
-
- if ((buf = malloc(bufsz)) == NULL)
- FATAL("out of memory in gensub");
- mflag = 0; /* if mflag == 0, can replace empty string */
- num = 0;
- x = execute(a[4]); /* source string */
- t = getsval(x);
- res = copycell(x); /* target string - initially copy of source */
- res->csub = CTEMP; /* result values are temporary */
- if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
- pfa = (fa *) a[1]; /* regular expression */
- else {
- y = execute(a[1]);
- pfa = makedfa(getsval(y), 1);
- tempfree(y);
- }
- y = execute(a[2]); /* replacement string */
- h = execute(a[3]); /* which matches should be replaced */
- sptr = getsval(h);
- if (sptr[0] == 'g' || sptr[0] == 'G')
- whichm = -1;
- else {
- /*
- * The specified number is index of replacement, starting
- * from 1. GNU awk treats index lower than 0 same as
- * 1, we do same for compatibility.
- */
- whichm = (int) getfval(h) - 1;
- if (whichm < 0)
- whichm = 0;
- }
- tempfree(h);
-
- if (pmatch(pfa, t)) {
- char *sl;
-
- tempstat = pfa->initstat;
- pfa->initstat = 2;
- pb = buf;
- rptr = getsval(y);
- /*
- * XXX if there are any backreferences in subst string,
- * complain now.
- */
- for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
- if (strchr("0123456789", sl[1])) {
- FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
- }
- }
-
- do {
- if (whichm >= 0 && whichm != num) {
- num++;
- adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
-
- /* copy the part of string up to and including
- * match to output buffer */
- while (t < patbeg + patlen)
- *pb++ = *t++;
- continue;
- }
-
- if (patlen == 0 && *patbeg != 0) { /* matched empty string */
- if (mflag == 0) { /* can replace empty */
- num++;
- sptr = rptr;
- while (*sptr != 0) {
- adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
- if (*sptr == '\\') {
- backsub(&pb, &sptr);
- } else if (*sptr == '&') {
- sptr++;
- adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
- for (q = patbeg; q < patbeg+patlen; )
- *pb++ = *q++;
- } else
- *pb++ = *sptr++;
- }
- }
- if (*t == 0) /* at end */
- goto done;
- adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
- *pb++ = *t++;
- if (pb > buf + bufsz) /* BUG: not sure of this test */
- FATAL("gensub result0 %.30s too big; can't happen", buf);
- mflag = 0;
- }
- else { /* matched nonempty string */
- num++;
- sptr = t;
- adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
- while (sptr < patbeg)
- *pb++ = *sptr++;
- sptr = rptr;
- while (*sptr != 0) {
- adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
- if (*sptr == '\\') {
- backsub(&pb, &sptr);
- } else if (*sptr == '&') {
- sptr++;
- adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
- for (q = patbeg; q < patbeg+patlen; )
- *pb++ = *q++;
- } else
- *pb++ = *sptr++;
- }
- t = patbeg + patlen;
- if (patlen == 0 || *t == 0 || *(t-1) == 0)
- goto done;
- if (pb > buf + bufsz)
- FATAL("gensub result1 %.30s too big; can't happen", buf);
- mflag = 1;
- }
- } while (pmatch(pfa,t));
- sptr = t;
- adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
- while ((*pb++ = *sptr++) != 0)
- ;
- done: if (pb > buf + bufsz)
- FATAL("gensub result2 %.30s too big; can't happen", buf);
- *pb = '\0';
- setsval(res, buf);
- pfa->initstat = tempstat;
- }
- tempfree(x);
- tempfree(y);
- free(buf);
- return(res);
-}
-
void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
{ /* sptr[0] == '\\' */
char *pb = *pb_ptr;