summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyle Evans <kevans@FreeBSD.org>2020-12-05 03:16:05 +0000
committerKyle Evans <kevans@FreeBSD.org>2020-12-05 03:16:05 +0000
commit6b986646d434baa21ae3d74d6a662ad206c7ddbd (patch)
tree11153e51ca240d2b5256c0f35e6d0f9feeaeca1b
parentca53e5aedfebcc1b4091b68e01b2d5cae923f85e (diff)
Notes
-rw-r--r--contrib/netbsd-tests/lib/libc/regex/data/meta.in2
-rw-r--r--lib/libc/regex/engine.c37
-rw-r--r--lib/libc/regex/regcomp.c16
-rw-r--r--lib/libc/regex/regex2.h2
-rw-r--r--lib/libregex/tests/gnuext.in12
5 files changed, 61 insertions, 8 deletions
diff --git a/contrib/netbsd-tests/lib/libc/regex/data/meta.in b/contrib/netbsd-tests/lib/libc/regex/data/meta.in
index eb24075aea62..b8f14aad8c74 100644
--- a/contrib/netbsd-tests/lib/libc/regex/data/meta.in
+++ b/contrib/netbsd-tests/lib/libc/regex/data/meta.in
@@ -5,7 +5,7 @@ a\*c & a*c a*c
a\\b & a\b a\b
a\\\*b & a\*b a\*b
# Begin FreeBSD
-a\bc &C EESCAPE
+a\bc & abc
# End FreeBSD
a\ &C EESCAPE
a\\bc & a\bc a\bc
diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c
index 79af9a4790b3..bb40018c07e1 100644
--- a/lib/libc/regex/engine.c
+++ b/lib/libc/regex/engine.c
@@ -118,6 +118,7 @@ static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_
#define BOW (BOL-4)
#define EOW (BOL-5)
#define BADCHAR (BOL-6)
+#define NWBND (BOL-7)
#define NONCHAR(c) ((c) <= OUT)
/* sflags */
#define SBOS 0x0001
@@ -463,6 +464,8 @@ dissect(struct match *m,
case OEOW:
case OBOS:
case OEOS:
+ case OWBND:
+ case ONWBND:
break;
case OANY:
case OANYOF:
@@ -691,6 +694,21 @@ backref(struct match *m,
else
return(NULL);
break;
+ case OWBND:
+ if (ISBOW(m, sp) || ISEOW(m, sp))
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case ONWBND:
+ if (((sp == m->beginp) && !ISWORD(*sp)) ||
+ (sp == m->endp && !ISWORD(*(sp - 1))))
+ { /* yes, beginning/end of subject */ }
+ else if (ISWORD(*(sp - 1)) == ISWORD(*sp))
+ { /* yes, beginning/end of subject */ }
+ else
+ return(NULL);
+ break;
case OBOW:
if (ISBOW(m, sp))
{ /* yes */ }
@@ -916,6 +934,17 @@ walk(struct match *m, const char *start, const char *stop, sopno startst,
st = step(m->g, startst, stopst, st, flagch, st, sflags);
SP("sboweow", st, c);
}
+ if (lastc != OUT && c != OUT &&
+ ISWORD(lastc) == ISWORD(c)) {
+ flagch = NWBND;
+ } else if ((lastc == OUT && !ISWORD(c)) ||
+ (c == OUT && !ISWORD(lastc))) {
+ flagch = NWBND;
+ }
+ if (flagch == NWBND) {
+ st = step(m->g, startst, stopst, st, flagch, st, sflags);
+ SP("snwbnd", st, c);
+ }
/* are we done? */
if (ISSET(st, stopst)) {
@@ -1017,6 +1046,14 @@ step(struct re_guts *g,
if (ch == EOW)
FWD(aft, bef, 1);
break;
+ case OWBND:
+ if (ch == BOW || ch == EOW)
+ FWD(aft, bef, 1);
+ break;
+ case ONWBND:
+ if (ch == NWBND)
+ FWD(aft, aft, 1);
+ break;
case OANY:
if (!NONCHAR(ch))
FWD(aft, bef, 1);
diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c
index fd44fd60cc65..0eb4b4430996 100644
--- a/lib/libc/regex/regcomp.c
+++ b/lib/libc/regex/regcomp.c
@@ -486,6 +486,12 @@ p_ere_exp(struct parse *p, struct branchc *bc)
case '\'':
EMIT(OEOS, 0);
break;
+ case 'B':
+ EMIT(ONWBND, 0);
+ break;
+ case 'b':
+ EMIT(OWBND, 0);
+ break;
case 'W':
case 'w':
case 'S':
@@ -845,6 +851,12 @@ p_simp_re(struct parse *p, struct branchc *bc)
case BACKSL|'\'':
EMIT(OEOS, 0);
break;
+ case BACKSL|'B':
+ EMIT(ONWBND, 0);
+ break;
+ case BACKSL|'b':
+ EMIT(OWBND, 0);
+ break;
case BACKSL|'W':
case BACKSL|'w':
case BACKSL|'S':
@@ -1892,6 +1904,8 @@ findmust(struct parse *p, struct re_guts *g)
case OEOL:
case OBOS:
case OEOS:
+ case OWBND:
+ case ONWBND:
case O_QUEST:
case O_CH:
case OEND:
@@ -2043,6 +2057,8 @@ altoffset(sop *scan, int offset)
try++;
case OBOW:
case OEOW:
+ case OWBND:
+ case ONWBND:
case OLPAREN:
case ORPAREN:
case OOR2:
diff --git a/lib/libc/regex/regex2.h b/lib/libc/regex/regex2.h
index 1c41656694f3..012823d3261a 100644
--- a/lib/libc/regex/regex2.h
+++ b/lib/libc/regex/regex2.h
@@ -106,6 +106,8 @@ typedef unsigned long sopno;
#define OEOW (20L<<OPSHIFT) /* end word - */
#define OBOS (21L<<OPSHIFT) /* begin subj. - */
#define OEOS (22L<<OPSHIFT) /* end subj. - */
+#define OWBND (23L<<OPSHIFT) /* word bound - */
+#define ONWBND (24L<<OPSHIFT) /* not bound - */
/*
* Structures for [] character-set representation.
diff --git a/lib/libregex/tests/gnuext.in b/lib/libregex/tests/gnuext.in
index ebd052fb8b75..8f49854235a9 100644
--- a/lib/libregex/tests/gnuext.in
+++ b/lib/libregex/tests/gnuext.in
@@ -17,14 +17,12 @@ a\|b\|c b abc a
\s\+ b aSNTb SNT
# Word boundaries (\b, \B, \<, \>, \`, \')
# (is/not boundary, start/end word, start/end subject string)
-# Most of these are disabled for the moment, and will be re-enabled as
-# we become feature complete.
-#\babc\b & <abc> abc
+\babc\b & <abc> abc
\<abc\> & <abc> abc
-#\Babc\B & abc
-#\B[abc]\B & <abc> b
-#\B[abc]+ - <abc> bc
-#\B[abc]\+ b <abc> bc
+\Babc\B & abc
+\B[abc]\B & <abc> b
+\B[abc]+ - <abc> bc
+\B[abc]\+ b <abc> bc
\`abc & abc abc
abc\' & abc abc
\`abc\' & abc abc