diff options
author | Kyle Evans <kevans@FreeBSD.org> | 2020-12-05 03:16:05 +0000 |
---|---|---|
committer | Kyle Evans <kevans@FreeBSD.org> | 2020-12-05 03:16:05 +0000 |
commit | 6b986646d434baa21ae3d74d6a662ad206c7ddbd (patch) | |
tree | 11153e51ca240d2b5256c0f35e6d0f9feeaeca1b | |
parent | ca53e5aedfebcc1b4091b68e01b2d5cae923f85e (diff) |
Notes
-rw-r--r-- | contrib/netbsd-tests/lib/libc/regex/data/meta.in | 2 | ||||
-rw-r--r-- | lib/libc/regex/engine.c | 37 | ||||
-rw-r--r-- | lib/libc/regex/regcomp.c | 16 | ||||
-rw-r--r-- | lib/libc/regex/regex2.h | 2 | ||||
-rw-r--r-- | lib/libregex/tests/gnuext.in | 12 |
5 files changed, 61 insertions, 8 deletions
diff --git a/contrib/netbsd-tests/lib/libc/regex/data/meta.in b/contrib/netbsd-tests/lib/libc/regex/data/meta.in index eb24075aea62..b8f14aad8c74 100644 --- a/contrib/netbsd-tests/lib/libc/regex/data/meta.in +++ b/contrib/netbsd-tests/lib/libc/regex/data/meta.in @@ -5,7 +5,7 @@ a\*c & a*c a*c a\\b & a\b a\b a\\\*b & a\*b a\*b # Begin FreeBSD -a\bc &C EESCAPE +a\bc & abc # End FreeBSD a\ &C EESCAPE a\\bc & a\bc a\bc diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c index 79af9a4790b3..bb40018c07e1 100644 --- a/lib/libc/regex/engine.c +++ b/lib/libc/regex/engine.c @@ -118,6 +118,7 @@ static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_ #define BOW (BOL-4) #define EOW (BOL-5) #define BADCHAR (BOL-6) +#define NWBND (BOL-7) #define NONCHAR(c) ((c) <= OUT) /* sflags */ #define SBOS 0x0001 @@ -463,6 +464,8 @@ dissect(struct match *m, case OEOW: case OBOS: case OEOS: + case OWBND: + case ONWBND: break; case OANY: case OANYOF: @@ -691,6 +694,21 @@ backref(struct match *m, else return(NULL); break; + case OWBND: + if (ISBOW(m, sp) || ISEOW(m, sp)) + { /* yes */ } + else + return(NULL); + break; + case ONWBND: + if (((sp == m->beginp) && !ISWORD(*sp)) || + (sp == m->endp && !ISWORD(*(sp - 1)))) + { /* yes, beginning/end of subject */ } + else if (ISWORD(*(sp - 1)) == ISWORD(*sp)) + { /* yes, beginning/end of subject */ } + else + return(NULL); + break; case OBOW: if (ISBOW(m, sp)) { /* yes */ } @@ -916,6 +934,17 @@ walk(struct match *m, const char *start, const char *stop, sopno startst, st = step(m->g, startst, stopst, st, flagch, st, sflags); SP("sboweow", st, c); } + if (lastc != OUT && c != OUT && + ISWORD(lastc) == ISWORD(c)) { + flagch = NWBND; + } else if ((lastc == OUT && !ISWORD(c)) || + (c == OUT && !ISWORD(lastc))) { + flagch = NWBND; + } + if (flagch == NWBND) { + st = step(m->g, startst, stopst, st, flagch, st, sflags); + SP("snwbnd", st, c); + } /* are we done? */ if (ISSET(st, stopst)) { @@ -1017,6 +1046,14 @@ step(struct re_guts *g, if (ch == EOW) FWD(aft, bef, 1); break; + case OWBND: + if (ch == BOW || ch == EOW) + FWD(aft, bef, 1); + break; + case ONWBND: + if (ch == NWBND) + FWD(aft, aft, 1); + break; case OANY: if (!NONCHAR(ch)) FWD(aft, bef, 1); diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index fd44fd60cc65..0eb4b4430996 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -486,6 +486,12 @@ p_ere_exp(struct parse *p, struct branchc *bc) case '\'': EMIT(OEOS, 0); break; + case 'B': + EMIT(ONWBND, 0); + break; + case 'b': + EMIT(OWBND, 0); + break; case 'W': case 'w': case 'S': @@ -845,6 +851,12 @@ p_simp_re(struct parse *p, struct branchc *bc) case BACKSL|'\'': EMIT(OEOS, 0); break; + case BACKSL|'B': + EMIT(ONWBND, 0); + break; + case BACKSL|'b': + EMIT(OWBND, 0); + break; case BACKSL|'W': case BACKSL|'w': case BACKSL|'S': @@ -1892,6 +1904,8 @@ findmust(struct parse *p, struct re_guts *g) case OEOL: case OBOS: case OEOS: + case OWBND: + case ONWBND: case O_QUEST: case O_CH: case OEND: @@ -2043,6 +2057,8 @@ altoffset(sop *scan, int offset) try++; case OBOW: case OEOW: + case OWBND: + case ONWBND: case OLPAREN: case ORPAREN: case OOR2: diff --git a/lib/libc/regex/regex2.h b/lib/libc/regex/regex2.h index 1c41656694f3..012823d3261a 100644 --- a/lib/libc/regex/regex2.h +++ b/lib/libc/regex/regex2.h @@ -106,6 +106,8 @@ typedef unsigned long sopno; #define OEOW (20L<<OPSHIFT) /* end word - */ #define OBOS (21L<<OPSHIFT) /* begin subj. - */ #define OEOS (22L<<OPSHIFT) /* end subj. - */ +#define OWBND (23L<<OPSHIFT) /* word bound - */ +#define ONWBND (24L<<OPSHIFT) /* not bound - */ /* * Structures for [] character-set representation. diff --git a/lib/libregex/tests/gnuext.in b/lib/libregex/tests/gnuext.in index ebd052fb8b75..8f49854235a9 100644 --- a/lib/libregex/tests/gnuext.in +++ b/lib/libregex/tests/gnuext.in @@ -17,14 +17,12 @@ a\|b\|c b abc a \s\+ b aSNTb SNT # Word boundaries (\b, \B, \<, \>, \`, \') # (is/not boundary, start/end word, start/end subject string) -# Most of these are disabled for the moment, and will be re-enabled as -# we become feature complete. -#\babc\b & <abc> abc +\babc\b & <abc> abc \<abc\> & <abc> abc -#\Babc\B & abc -#\B[abc]\B & <abc> b -#\B[abc]+ - <abc> bc -#\B[abc]\+ b <abc> bc +\Babc\B & abc +\B[abc]\B & <abc> b +\B[abc]+ - <abc> bc +\B[abc]\+ b <abc> bc \`abc & abc abc abc\' & abc abc \`abc\' & abc abc |