aboutsummaryrefslogtreecommitdiff
path: root/usr.bin/grep
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/grep')
-rw-r--r--usr.bin/grep/Makefile61
-rw-r--r--usr.bin/grep/Makefile.depend18
-rw-r--r--usr.bin/grep/Makefile.depend.options7
-rw-r--r--usr.bin/grep/file.c248
-rw-r--r--usr.bin/grep/grep.1592
-rw-r--r--usr.bin/grep/grep.c724
-rw-r--r--usr.bin/grep/grep.h159
-rw-r--r--usr.bin/grep/queue.c142
-rw-r--r--usr.bin/grep/tests/Makefile55
-rw-r--r--usr.bin/grep/tests/Makefile.depend10
-rwxr-xr-xusr.bin/grep/tests/grep_freebsd_test.sh127
-rw-r--r--usr.bin/grep/util.c856
-rw-r--r--usr.bin/grep/zgrep.1112
-rwxr-xr-xusr.bin/grep/zgrep.sh230
14 files changed, 3341 insertions, 0 deletions
diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile
new file mode 100644
index 000000000000..c72b86656148
--- /dev/null
+++ b/usr.bin/grep/Makefile
@@ -0,0 +1,61 @@
+# $NetBSD: Makefile,v 1.4 2011/02/16 01:31:33 joerg Exp $
+# $OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $
+
+.include <src.opts.mk>
+
+PACKAGE= runtime
+
+PROG= grep
+MAN= grep.1 zgrep.1
+
+SRCS= file.c grep.c queue.c util.c
+
+SCRIPTS= zgrep.sh
+LINKS= ${BINDIR}/zgrep ${BINDIR}/zfgrep \
+ ${BINDIR}/zgrep ${BINDIR}/zegrep \
+ ${BINDIR}/zgrep ${BINDIR}/bzgrep \
+ ${BINDIR}/zgrep ${BINDIR}/bzegrep \
+ ${BINDIR}/zgrep ${BINDIR}/bzfgrep \
+ ${BINDIR}/zgrep ${BINDIR}/lzgrep \
+ ${BINDIR}/zgrep ${BINDIR}/lzegrep \
+ ${BINDIR}/zgrep ${BINDIR}/lzfgrep \
+ ${BINDIR}/zgrep ${BINDIR}/xzgrep \
+ ${BINDIR}/zgrep ${BINDIR}/xzegrep \
+ ${BINDIR}/zgrep ${BINDIR}/xzfgrep \
+ ${BINDIR}/zgrep ${BINDIR}/zstdgrep \
+ ${BINDIR}/zgrep ${BINDIR}/zstdegrep \
+ ${BINDIR}/zgrep ${BINDIR}/zstdfgrep
+
+LINKS+= ${BINDIR}/grep ${BINDIR}/egrep \
+ ${BINDIR}/grep ${BINDIR}/fgrep \
+ ${BINDIR}/grep ${BINDIR}/rgrep \
+
+MLINKS= zgrep.1 zfgrep.1 \
+ zgrep.1 zegrep.1 \
+ zgrep.1 bzgrep.1 \
+ zgrep.1 bzegrep.1 \
+ zgrep.1 bzfgrep.1 \
+ zgrep.1 lzgrep.1 \
+ zgrep.1 lzegrep.1 \
+ zgrep.1 lzfgrep.1 \
+ zgrep.1 xzgrep.1 \
+ zgrep.1 xzegrep.1 \
+ zgrep.1 xzfgrep.1 \
+ zgrep.1 zstdgrep.1 \
+ zgrep.1 zstdegrep.1 \
+ zgrep.1 zstdfgrep.1
+
+MLINKS+= grep.1 egrep.1 \
+ grep.1 fgrep.1 \
+ grep.1 rgrep.1
+
+CFLAGS.gcc+= --param max-inline-insns-single=500
+
+.if !defined(BOOTSTRAPPING)
+LIBADD+= regex
+.endif
+
+HAS_TESTS=
+SUBDIR.${MK_TESTS}+= tests
+
+.include <bsd.prog.mk>
diff --git a/usr.bin/grep/Makefile.depend b/usr.bin/grep/Makefile.depend
new file mode 100644
index 000000000000..92fa8c2d80f7
--- /dev/null
+++ b/usr.bin/grep/Makefile.depend
@@ -0,0 +1,18 @@
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libbz2 \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/libregex \
+ lib/libz \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/usr.bin/grep/Makefile.depend.options b/usr.bin/grep/Makefile.depend.options
new file mode 100644
index 000000000000..b62a74f3895c
--- /dev/null
+++ b/usr.bin/grep/Makefile.depend.options
@@ -0,0 +1,7 @@
+# This file is not autogenerated - take care!
+
+DIRDEPS_OPTIONS= GNU_GREP_COMPAT
+
+DIRDEPS.GNU_GREP_COMPAT.yes= gnu/lib/libregex
+
+.include <dirdeps-options.mk>
diff --git a/usr.bin/grep/file.c b/usr.bin/grep/file.c
new file mode 100644
index 000000000000..0b8240504ce6
--- /dev/null
+++ b/usr.bin/grep/file.c
@@ -0,0 +1,248 @@
+/* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */
+/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 1999 James Howard and Dag-Erling Smørgrav
+ * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "grep.h"
+
+#define MAXBUFSIZ (32 * 1024)
+#define LNBUFBUMP 80
+
+static char *buffer;
+static char *bufpos;
+static size_t bufrem;
+static size_t fsiz;
+
+static char *lnbuf;
+static size_t lnbuflen;
+
+static inline int
+grep_refill(struct file *f)
+{
+ ssize_t nr;
+
+ if (filebehave == FILE_MMAP)
+ return (0);
+
+ bufpos = buffer;
+ bufrem = 0;
+
+ nr = read(f->fd, buffer, MAXBUFSIZ);
+ if (nr < 0 && errno == EISDIR)
+ nr = 0;
+ if (nr < 0)
+ return (-1);
+
+ bufrem = nr;
+ return (0);
+}
+
+static inline int
+grep_lnbufgrow(size_t newlen)
+{
+
+ if (lnbuflen < newlen) {
+ lnbuf = grep_realloc(lnbuf, newlen);
+ lnbuflen = newlen;
+ }
+
+ return (0);
+}
+
+char *
+grep_fgetln(struct file *f, struct parsec *pc)
+{
+ char *p;
+ size_t len;
+ size_t off;
+ ptrdiff_t diff;
+
+ /* Fill the buffer, if necessary */
+ if (bufrem == 0 && grep_refill(f) != 0)
+ goto error;
+
+ if (bufrem == 0) {
+ /* Return zero length to indicate EOF */
+ pc->ln.len= 0;
+ return (bufpos);
+ }
+
+ /* Look for a newline in the remaining part of the buffer */
+ if ((p = memchr(bufpos, fileeol, bufrem)) != NULL) {
+ ++p; /* advance over newline */
+ len = p - bufpos;
+ if (grep_lnbufgrow(len + 1))
+ goto error;
+ memcpy(lnbuf, bufpos, len);
+ bufrem -= len;
+ bufpos = p;
+ pc->ln.len = len;
+ lnbuf[len] = '\0';
+ return (lnbuf);
+ }
+
+ /* We have to copy the current buffered data to the line buffer */
+ for (len = bufrem, off = 0; ; len += bufrem) {
+ /* Make sure there is room for more data */
+ if (grep_lnbufgrow(len + LNBUFBUMP))
+ goto error;
+ memcpy(lnbuf + off, bufpos, len - off);
+ /* With FILE_MMAP, this is EOF; there's no more to refill */
+ if (filebehave == FILE_MMAP) {
+ bufrem -= len;
+ break;
+ }
+ off = len;
+ /* Fetch more to try and find EOL/EOF */
+ if (grep_refill(f) != 0)
+ goto error;
+ if (bufrem == 0)
+ /* EOF: return partial line */
+ break;
+ if ((p = memchr(bufpos, fileeol, bufrem)) == NULL)
+ continue;
+ /* got it: finish up the line (like code above) */
+ ++p;
+ diff = p - bufpos;
+ len += diff;
+ if (grep_lnbufgrow(len + 1))
+ goto error;
+ memcpy(lnbuf + off, bufpos, diff);
+ bufrem -= diff;
+ bufpos = p;
+ break;
+ }
+ pc->ln.len = len;
+ lnbuf[len] = '\0';
+ return (lnbuf);
+
+error:
+ pc->ln.len = 0;
+ return (NULL);
+}
+
+/*
+ * Opens a file for processing.
+ */
+struct file *
+grep_open(const char *path)
+{
+ struct file *f;
+
+ f = grep_malloc(sizeof *f);
+ memset(f, 0, sizeof *f);
+ if (path == NULL) {
+ /* Processing stdin implies --line-buffered. */
+ lbflag = true;
+ f->fd = STDIN_FILENO;
+ } else if ((f->fd = open(path, O_RDONLY)) == -1)
+ goto error1;
+
+ if (filebehave == FILE_MMAP) {
+ struct stat st;
+
+ if (fstat(f->fd, &st) == -1 || !S_ISREG(st.st_mode))
+ filebehave = FILE_STDIO;
+ else {
+ int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC;
+#ifdef MAP_PREFAULT_READ
+ flags |= MAP_PREFAULT_READ;
+#endif
+ fsiz = st.st_size;
+ buffer = mmap(NULL, fsiz, PROT_READ, flags,
+ f->fd, (off_t)0);
+ if (buffer == MAP_FAILED)
+ filebehave = FILE_STDIO;
+ else {
+ bufrem = st.st_size;
+ bufpos = buffer;
+ madvise(buffer, st.st_size, MADV_SEQUENTIAL);
+ }
+ }
+ }
+
+ if ((buffer == NULL) || (buffer == MAP_FAILED))
+ buffer = grep_malloc(MAXBUFSIZ);
+
+ /* Fill read buffer, also catches errors early */
+ if (bufrem == 0 && grep_refill(f) != 0)
+ goto error2;
+
+ /* Check for binary stuff, if necessary */
+ if (binbehave != BINFILE_TEXT && fileeol != '\0' &&
+ memchr(bufpos, '\0', bufrem) != NULL)
+ f->binary = true;
+
+ return (f);
+
+error2:
+ close(f->fd);
+error1:
+ free(f);
+ return (NULL);
+}
+
+/*
+ * Closes a file.
+ */
+void
+grep_close(struct file *f)
+{
+
+ close(f->fd);
+
+ /* Reset read buffer and line buffer */
+ if (filebehave == FILE_MMAP) {
+ munmap(buffer, fsiz);
+ buffer = NULL;
+ }
+ bufpos = buffer;
+ bufrem = 0;
+
+ free(lnbuf);
+ lnbuf = NULL;
+ lnbuflen = 0;
+}
diff --git a/usr.bin/grep/grep.1 b/usr.bin/grep/grep.1
new file mode 100644
index 000000000000..9b50565ec605
--- /dev/null
+++ b/usr.bin/grep/grep.1
@@ -0,0 +1,592 @@
+.\" $NetBSD: grep.1,v 1.2 2011/02/16 01:31:33 joerg Exp $
+.\" $OpenBSD: grep.1,v 1.38 2010/04/05 06:30:59 jmc Exp $
+.\" Copyright (c) 1980, 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd December 18, 2022
+.Dt GREP 1
+.Os
+.Sh NAME
+.Nm grep ,
+.Nm egrep ,
+.Nm fgrep ,
+.Nm rgrep
+.Nd file pattern searcher
+.Sh SYNOPSIS
+.Nm grep
+.Bk -words
+.Op Fl abcdDEFGHhIiLlmnOopqRSsUVvwxz
+.Op Fl A Ar num
+.Op Fl B Ar num
+.Op Fl C Ar num
+.Op Fl e Ar pattern
+.Op Fl f Ar file
+.Op Fl Fl binary-files= Ns Ar value
+.Op Fl Fl color Ns Op Cm = Ns Ar when
+.Op Fl Fl colour Ns Op Cm = Ns Ar when
+.Op Fl Fl context= Ns Ar num
+.Op Fl Fl label
+.Op Fl Fl line-buffered
+.Op Fl Fl null
+.Op Ar pattern
+.Op Ar
+.Ek
+.Sh DESCRIPTION
+The
+.Nm grep
+utility searches any given input files,
+selecting lines that match one or more patterns.
+By default, a pattern matches an input line if the regular expression
+(RE) in the pattern matches the input line
+without its trailing newline.
+An empty expression matches every line.
+Each input line that matches at least one of the patterns is written
+to the standard output.
+.Pp
+.Nm grep
+is used for simple patterns and
+basic regular expressions
+.Pq BREs ;
+.Nm egrep
+can handle extended regular expressions
+.Pq EREs .
+See
+.Xr re_format 7
+for more information on regular expressions.
+.Nm fgrep
+is quicker than both
+.Nm grep
+and
+.Nm egrep ,
+but can only handle fixed patterns
+(i.e., it does not interpret regular expressions).
+Patterns may consist of one or more lines,
+allowing any of the pattern lines to match a portion of the input.
+.Pp
+The following options are available:
+.Bl -tag -width indent
+.It Fl A Ar num , Fl Fl after-context= Ns Ar num
+Print
+.Ar num
+lines of trailing context after each match.
+See also the
+.Fl B
+and
+.Fl C
+options.
+.It Fl a , Fl Fl text
+Treat all files as ASCII text.
+Normally
+.Nm
+will simply print
+.Dq Binary file ... matches
+if files contain binary characters.
+Use of this option forces
+.Nm
+to output lines matching the specified pattern.
+.It Fl B Ar num , Fl Fl before-context= Ns Ar num
+Print
+.Ar num
+lines of leading context before each match.
+See also the
+.Fl A
+and
+.Fl C
+options.
+.It Fl b , Fl Fl byte-offset
+The offset in bytes of a matched pattern is
+displayed in front of the respective matched line.
+.It Fl C Ar num , Fl Fl context= Ns Ar num
+Print
+.Ar num
+lines of leading and trailing context surrounding each match.
+See also the
+.Fl A
+and
+.Fl B
+options.
+.It Fl c , Fl Fl count
+Only a count of selected lines is written to standard output.
+.It Fl Fl colour= Ns Oo Ar when Oc , Fl Fl color= Ns Oo Ar when Oc
+Mark up the matching text with the expression stored in the
+.Ev GREP_COLOR
+environment variable.
+The possible values of
+.Ar when
+are
+.Dq Cm never ,
+.Dq Cm always
+and
+.Dq Cm auto .
+.It Fl D Ar action , Fl Fl devices= Ns Ar action
+Specify the demanded
+.Ar action
+for devices, FIFOs and sockets.
+The default
+.Ar action
+is
+.Dq Cm read ,
+which means, that they are read as if they were normal files.
+If the
+.Ar action
+is set to
+.Dq Cm skip ,
+devices are silently skipped.
+.It Fl d Ar action , Fl Fl directories= Ns Ar action
+Specify the demanded
+.Ar action
+for directories.
+It is
+.Dq Cm read
+by default, which means that the directories
+are read in the same manner as normal files.
+Other possible values are
+.Dq Cm skip
+to silently ignore the directories, and
+.Dq Cm recurse
+to read them recursively, which has the same effect as the
+.Fl R
+and
+.Fl r
+option.
+.It Fl E , Fl Fl extended-regexp
+Interpret
+.Ar pattern
+as an extended regular expression
+(i.e., force
+.Nm grep
+to behave as
+.Nm egrep ) .
+.It Fl e Ar pattern , Fl Fl regexp= Ns Ar pattern
+Specify a
+.Ar pattern
+used during the search of the input:
+an input line is selected if it matches any of the specified patterns.
+This option is most useful when multiple
+.Fl e
+options are used to specify multiple patterns,
+or when a
+.Ar pattern
+begins with a dash
+.Pq Sq - .
+.It Fl Fl exclude Ar pattern
+If specified, it excludes files matching the given
+filename
+.Ar pattern
+from the search.
+Note that
+.Fl Fl exclude
+and
+.Fl Fl include
+patterns are processed in the order given.
+If a name matches multiple patterns, the latest matching rule wins.
+If no
+.Fl Fl include
+pattern is specified, all files are searched that are
+not excluded.
+Patterns are matched to the full path specified,
+not only to the filename component.
+.It Fl Fl exclude-dir Ar pattern
+If
+.Fl R
+is specified, it excludes directories matching the
+given filename
+.Ar pattern
+from the search.
+Note that
+.Fl Fl exclude-dir
+and
+.Fl Fl include-dir
+patterns are processed in the order given.
+If a name matches multiple patterns, the latest matching rule wins.
+If no
+.Fl Fl include-dir
+pattern is specified, all directories are searched that are
+not excluded.
+.It Fl F , Fl Fl fixed-strings
+Interpret
+.Ar pattern
+as a set of fixed strings
+(i.e., force
+.Nm grep
+to behave as
+.Nm fgrep ) .
+.It Fl f Ar file , Fl Fl file= Ns Ar file
+Read one or more newline separated patterns from
+.Ar file .
+Empty pattern lines match every input line.
+Newlines are not considered part of a pattern.
+If
+.Ar file
+is empty, nothing is matched.
+.It Fl G , Fl Fl basic-regexp
+Interpret
+.Ar pattern
+as a basic regular expression
+(i.e., force
+.Nm grep
+to behave as traditional
+.Nm grep ) .
+.It Fl H
+Always print filename headers with output lines.
+.It Fl h , Fl Fl no-filename
+Never print filename headers
+.Pq i.e., filenames
+with output lines.
+.It Fl Fl help
+Print a brief help message.
+.It Fl I
+Ignore binary files.
+This option is equivalent to the
+.Dq Fl Fl binary-files= Ns Cm without-match
+option.
+.It Fl i , Fl Fl ignore-case
+Perform case insensitive matching.
+By default,
+.Nm grep
+is case sensitive.
+.It Fl Fl include Ar pattern
+If specified, only files matching the given filename
+.Ar pattern
+are searched.
+Note that
+.Fl Fl include
+and
+.Fl Fl exclude
+patterns are processed in the order given.
+If a name matches multiple patterns, the latest matching rule wins.
+Patterns are matched to the full path specified,
+not only to the filename component.
+.It Fl Fl include-dir Ar pattern
+If
+.Fl R
+is specified, only directories matching the given filename
+.Ar pattern
+are searched.
+Note that
+.Fl Fl include-dir
+and
+.Fl Fl exclude-dir
+patterns are processed in the order given.
+If a name matches multiple patterns, the latest matching rule wins.
+.It Fl L , Fl Fl files-without-match
+Only the names of files not containing selected lines are written to
+standard output.
+Pathnames are listed once per file searched.
+If the standard input is searched, the string
+.Dq (standard input)
+is written unless a
+.Fl Fl label
+is specified.
+.It Fl l , Fl Fl files-with-matches
+Only the names of files containing selected lines are written to
+standard output.
+.Nm grep
+will only search a file until a match has been found,
+making searches potentially less expensive.
+Pathnames are listed once per file searched.
+If the standard input is searched, the string
+.Dq (standard input)
+is written unless a
+.Fl Fl label
+is specified.
+.It Fl Fl label
+Label to use in place of
+.Dq (standard input)
+for a file name where a file name would normally be printed.
+This option applies to
+.Fl H ,
+.Fl L ,
+and
+.Fl l .
+.It Fl Fl mmap
+Use
+.Xr mmap 2
+instead of
+.Xr read 2
+to read input, which can result in better performance under some
+circumstances but can cause undefined behaviour.
+.It Fl m Ar num , Fl Fl max-count= Ns Ar num
+Stop reading the file after
+.Ar num
+matches.
+.It Fl n , Fl Fl line-number
+Each output line is preceded by its relative line number in the file,
+starting at line 1.
+The line number counter is reset for each file processed.
+This option is ignored if
+.Fl c ,
+.Fl L ,
+.Fl l ,
+or
+.Fl q
+is
+specified.
+.It Fl Fl null
+Prints a zero-byte after the file name.
+.It Fl O
+If
+.Fl R
+is specified, follow symbolic links only if they were explicitly listed
+on the command line.
+The default is not to follow symbolic links.
+.It Fl o , Fl Fl only-matching
+Prints only the matching part of the lines.
+.It Fl p
+If
+.Fl R
+is specified, no symbolic links are followed.
+This is the default.
+.It Fl q , Fl Fl quiet , Fl Fl silent
+Quiet mode:
+suppress normal output.
+.Nm grep
+will only search a file until a match has been found,
+making searches potentially less expensive.
+.It Fl R , Fl r , Fl Fl recursive
+Recursively search subdirectories listed.
+(i.e., force
+.Nm grep
+to behave as
+.Nm rgrep ) .
+.It Fl S
+If
+.Fl R
+is specified, all symbolic links are followed.
+The default is not to follow symbolic links.
+.It Fl s , Fl Fl no-messages
+Silent mode.
+Nonexistent and unreadable files are ignored
+(i.e., their error messages are suppressed).
+.It Fl U , Fl Fl binary
+Search binary files, but do not attempt to print them.
+.It Fl u
+This option has no effect and is provided only for compatibility with GNU grep.
+.It Fl V , Fl Fl version
+Display version information and exit.
+.It Fl v , Fl Fl invert-match
+Selected lines are those
+.Em not
+matching any of the specified patterns.
+.It Fl w , Fl Fl word-regexp
+The expression is searched for as a word (as if surrounded by
+.Sq [[:<:]]
+and
+.Sq [[:>:]] ;
+see
+.Xr re_format 7 ) .
+This option has no effect if
+.Fl x
+is also specified.
+.It Fl x , Fl Fl line-regexp
+Only input lines selected against an entire fixed string or regular
+expression are considered to be matching lines.
+.It Fl y
+Equivalent to
+.Fl i .
+Obsoleted.
+.It Fl z , Fl Fl null-data
+Treat input and output data as sequences of lines terminated by a
+zero-byte instead of a newline.
+.It Fl Fl binary-files= Ns Ar value
+Controls searching and printing of binary files.
+Options are:
+.Bl -tag -compact -width "binary (default)"
+.It Cm binary No (default)
+Search binary files but do not print them.
+.It Cm without-match
+Do not search binary files.
+.It Cm text
+Treat all files as text.
+.El
+.It Fl Fl line-buffered
+Force output to be line buffered.
+By default, output is line buffered when standard output is a terminal
+and block buffered otherwise.
+.El
+.Pp
+If no file arguments are specified, the standard input is used.
+Additionally,
+.Dq Cm -
+may be used in place of a file name, anywhere that a file name is accepted, to
+read from standard input.
+This includes both
+.Fl f
+and file arguments.
+.Sh ENVIRONMENT
+The following environment variables affect the execution of
+.Nm :
+.Bl -tag -width "GREP_OPTIONS"
+.It Ev GREP_COLOR
+This variable specifies the color used to highlight matched (non-empty) text.
+.It Ev GREP_OPTIONS
+This variable specifies default options
+to be placed in front of any explicit options.
+It may cause problems with portable scripts.
+.It Ev TERM
+This variable specifies the type name of the terminal, console or display-device
+type to be used.
+See
+.Xr term 7 .
+.El
+.Sh EXIT STATUS
+The
+.Nm grep
+utility exits with one of the following values:
+.Pp
+.Bl -tag -width flag -compact
+.It Li 0
+One or more lines were selected.
+.It Li 1
+No lines were selected.
+.It Li \*(Gt1
+An error occurred.
+.El
+.Sh EXAMPLES
+.Bl -dash
+.It
+Find all occurrences of the pattern
+.Sq patricia
+in a file:
+.Pp
+.Dl $ grep 'patricia' myfile
+.It
+Same as above but looking only for complete words:
+.Pp
+.Dl $ grep -w 'patricia' myfile
+.It
+Count occurrences of the exact pattern
+.Sq FOO
+:
+.Pp
+.Dl $ grep -c FOO myfile
+.It
+Same as above but ignoring case:
+.Pp
+.Dl $ grep -c -i FOO myfile
+.It
+Find all occurrences of the pattern
+.Ql .Pp
+at the beginning of a line:
+.Pp
+.Dl $ grep '^\e.Pp' myfile
+.Pp
+The apostrophes ensure the entire expression is evaluated by
+.Nm grep
+instead of by the user's shell.
+The caret
+.Ql ^
+matches the null string at the beginning of a line,
+and the
+.Ql \e
+escapes the
+.Ql \&. ,
+which would otherwise match any character.
+.It
+Find all lines in a file which do not contain the words
+.Sq foo
+or
+.Sq bar :
+.Pp
+.Dl $ grep -v -e 'foo' -e 'bar' myfile
+.It
+Peruse the file
+.Sq calendar
+looking for either 19, 20, or 25 using extended regular expressions:
+.Pp
+.Dl $ egrep '19|20|25' calendar
+.It
+Show matching lines and the name of the
+.Sq *.h
+files which contain the pattern
+.Sq FIXME .
+Do the search recursively from the
+.Pa /usr/src/sys/arm
+directory
+.Pp
+.Dl $ grep -H -R FIXME --include="*.h" /usr/src/sys/arm/
+.It
+Same as above but show only the name of the matching file:
+.Pp
+.Dl $ grep -l -R FIXME --include="*.h" /usr/src/sys/arm/
+.It
+Show lines containing the text
+.Sq foo .
+The matching part of the output is colored and every line is prefixed with
+the line number and the offset in the file for those lines that matched.
+.Pp
+.Dl $ grep -b --colour -n foo myfile
+.It
+Show lines that match the extended regular expression patterns read from the
+standard input:
+.Pp
+.Dl $ echo -e 'Free\enBSD\enAll.*reserved' | grep -E -f - myfile
+.It
+Show lines from the output of the
+.Xr pciconf 8
+command matching the specified extended regular expression along with
+three lines of leading context and one line of trailing context:
+.Pp
+.Dl $ pciconf -lv | grep -B3 -A1 -E 'class.*=.*storage'
+.It
+Suppress any output and use the exit status to show an appropriate message:
+.Pp
+.Dl $ grep -q foo myfile && echo File matches
+.El
+.Sh SEE ALSO
+.Xr ed 1 ,
+.Xr ex 1 ,
+.Xr sed 1 ,
+.Xr zgrep 1 ,
+.Xr re_format 7
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2008
+specification.
+.Pp
+The flags
+.Op Fl AaBbCDdGHhILmopRSUVw
+are extensions to that specification, and the behaviour of the
+.Fl f
+flag when used with an empty pattern file is left undefined.
+.Pp
+All long options are provided for compatibility with
+GNU versions of this utility.
+.Pp
+Historic versions of the
+.Nm grep
+utility also supported the flags
+.Op Fl ruy .
+This implementation supports those options;
+however, their use is strongly discouraged.
+.Sh HISTORY
+The
+.Nm grep
+command first appeared in
+.At v6 .
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
new file mode 100644
index 000000000000..feaf17d7c1e1
--- /dev/null
+++ b/usr.bin/grep/grep.c
@@ -0,0 +1,724 @@
+/* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
+/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 1999 James Howard and Dag-Erling Smørgrav
+ * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <limits.h>
+#include <libgen.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "grep.h"
+
+const char *errstr[] = {
+ "",
+/* 1*/ "(standard input)",
+/* 2*/ "unknown %s option",
+/* 3*/ "usage: %s [-abcDEFGHhIiLlmnOopqRSsUVvwxz] [-A num] [-B num] [-C num]\n",
+/* 4*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
+/* 5*/ "\t[--context=num] [--directories=action] [--label] [--line-buffered]\n",
+/* 6*/ "\t[--null] [pattern] [file ...]\n",
+/* 7*/ "Binary file %s matches\n",
+/* 8*/ "%s (BSD grep, GNU compatible) %s\n",
+};
+
+/* Flags passed to regcomp() and regexec() */
+int cflags = REG_NOSUB | REG_NEWLINE;
+int eflags = REG_STARTEND;
+
+bool matchall;
+
+/* Searching patterns */
+unsigned int patterns;
+static unsigned int pattern_sz;
+struct pat *pattern;
+regex_t *r_pattern;
+
+/* Filename exclusion/inclusion patterns */
+unsigned int fpatterns, dpatterns;
+static unsigned int fpattern_sz, dpattern_sz;
+struct epat *dpattern, *fpattern;
+
+/* For regex errors */
+char re_error[RE_ERROR_BUF + 1];
+
+/* Command-line flags */
+long long Aflag; /* -A x: print x lines trailing each match */
+long long Bflag; /* -B x: print x lines leading each match */
+bool Hflag; /* -H: always print file name */
+bool Lflag; /* -L: only show names of files with no matches */
+bool bflag; /* -b: show block numbers for each match */
+bool cflag; /* -c: only show a count of matching lines */
+bool hflag; /* -h: don't print filename headers */
+bool iflag; /* -i: ignore case */
+bool lflag; /* -l: only show names of files with matches */
+bool mflag; /* -m x: stop reading the files after x matches */
+long long mcount; /* count for -m */
+long long mlimit; /* requested value for -m */
+char fileeol; /* indicator for eol */
+bool nflag; /* -n: show line numbers in front of matching lines */
+bool oflag; /* -o: print only matching part */
+bool qflag; /* -q: quiet mode (don't output anything) */
+bool sflag; /* -s: silent mode (ignore errors) */
+bool vflag; /* -v: only show non-matching lines */
+bool wflag; /* -w: pattern must start and end on word boundaries */
+bool xflag; /* -x: pattern must match entire line */
+bool lbflag; /* --line-buffered */
+bool nullflag; /* --null */
+char *label; /* --label */
+const char *color; /* --color */
+int grepbehave = GREP_BASIC; /* -EFG: type of the regex */
+int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
+int filebehave = FILE_STDIO;
+int devbehave = DEV_READ; /* -D: handling of devices */
+int dirbehave = DIR_READ; /* -dRr: handling of directories */
+int linkbehave = LINK_SKIP; /* -OpS: handling of symlinks */
+
+bool dexclude, dinclude; /* --exclude-dir and --include-dir */
+bool fexclude, finclude; /* --exclude and --include */
+
+enum {
+ BIN_OPT = CHAR_MAX + 1,
+ COLOR_OPT,
+ HELP_OPT,
+ MMAP_OPT,
+ LINEBUF_OPT,
+ LABEL_OPT,
+ NULL_OPT,
+ R_EXCLUDE_OPT,
+ R_INCLUDE_OPT,
+ R_DEXCLUDE_OPT,
+ R_DINCLUDE_OPT
+};
+
+static inline const char *init_color(const char *);
+
+/* Housekeeping */
+bool file_err; /* file reading error */
+
+/*
+ * Prints usage information and returns 2.
+ */
+static void
+usage(void)
+{
+ fprintf(stderr, errstr[3], getprogname());
+ fprintf(stderr, "%s", errstr[4]);
+ fprintf(stderr, "%s", errstr[5]);
+ fprintf(stderr, "%s", errstr[6]);
+ exit(2);
+}
+
+static const char *optstr = "0123456789A:B:C:D:EFGHILOSRUVabcd:e:f:hilm:nopqrsuvwxyz";
+
+static const struct option long_options[] =
+{
+ {"binary-files", required_argument, NULL, BIN_OPT},
+ {"help", no_argument, NULL, HELP_OPT},
+ {"mmap", no_argument, NULL, MMAP_OPT},
+ {"line-buffered", no_argument, NULL, LINEBUF_OPT},
+ {"label", required_argument, NULL, LABEL_OPT},
+ {"null", no_argument, NULL, NULL_OPT},
+ {"color", optional_argument, NULL, COLOR_OPT},
+ {"colour", optional_argument, NULL, COLOR_OPT},
+ {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
+ {"include", required_argument, NULL, R_INCLUDE_OPT},
+ {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
+ {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
+ {"after-context", required_argument, NULL, 'A'},
+ {"text", no_argument, NULL, 'a'},
+ {"before-context", required_argument, NULL, 'B'},
+ {"byte-offset", no_argument, NULL, 'b'},
+ {"context", optional_argument, NULL, 'C'},
+ {"count", no_argument, NULL, 'c'},
+ {"devices", required_argument, NULL, 'D'},
+ {"directories", required_argument, NULL, 'd'},
+ {"extended-regexp", no_argument, NULL, 'E'},
+ {"regexp", required_argument, NULL, 'e'},
+ {"fixed-strings", no_argument, NULL, 'F'},
+ {"file", required_argument, NULL, 'f'},
+ {"basic-regexp", no_argument, NULL, 'G'},
+ {"no-filename", no_argument, NULL, 'h'},
+ {"with-filename", no_argument, NULL, 'H'},
+ {"ignore-case", no_argument, NULL, 'i'},
+ {"files-with-matches", no_argument, NULL, 'l'},
+ {"files-without-match", no_argument, NULL, 'L'},
+ {"max-count", required_argument, NULL, 'm'},
+ {"line-number", no_argument, NULL, 'n'},
+ {"only-matching", no_argument, NULL, 'o'},
+ {"quiet", no_argument, NULL, 'q'},
+ {"silent", no_argument, NULL, 'q'},
+ {"recursive", no_argument, NULL, 'r'},
+ {"no-messages", no_argument, NULL, 's'},
+ {"binary", no_argument, NULL, 'U'},
+ {"unix-byte-offsets", no_argument, NULL, 'u'},
+ {"invert-match", no_argument, NULL, 'v'},
+ {"version", no_argument, NULL, 'V'},
+ {"word-regexp", no_argument, NULL, 'w'},
+ {"line-regexp", no_argument, NULL, 'x'},
+ {"null-data", no_argument, NULL, 'z'},
+ {NULL, no_argument, NULL, 0}
+};
+
+/*
+ * Adds a searching pattern to the internal array.
+ */
+static void
+add_pattern(char *pat, size_t len)
+{
+
+ /* Check if we can do a shortcut */
+ if (len == 0) {
+ matchall = true;
+ return;
+ }
+ /* Increase size if necessary */
+ if (patterns == pattern_sz) {
+ pattern_sz *= 2;
+ pattern = grep_realloc(pattern, ++pattern_sz *
+ sizeof(struct pat));
+ }
+ if (len > 0 && pat[len - 1] == '\n')
+ --len;
+ /* pat may not be NUL-terminated */
+ pattern[patterns].pat = grep_malloc(len + 1);
+ memcpy(pattern[patterns].pat, pat, len);
+ pattern[patterns].len = len;
+ pattern[patterns].pat[len] = '\0';
+ ++patterns;
+}
+
+/*
+ * Adds a file include/exclude pattern to the internal array.
+ */
+static void
+add_fpattern(const char *pat, int mode)
+{
+
+ /* Increase size if necessary */
+ if (fpatterns == fpattern_sz) {
+ fpattern_sz *= 2;
+ fpattern = grep_realloc(fpattern, ++fpattern_sz *
+ sizeof(struct epat));
+ }
+ fpattern[fpatterns].pat = grep_strdup(pat);
+ fpattern[fpatterns].mode = mode;
+ ++fpatterns;
+}
+
+/*
+ * Adds a directory include/exclude pattern to the internal array.
+ */
+static void
+add_dpattern(const char *pat, int mode)
+{
+
+ /* Increase size if necessary */
+ if (dpatterns == dpattern_sz) {
+ dpattern_sz *= 2;
+ dpattern = grep_realloc(dpattern, ++dpattern_sz *
+ sizeof(struct epat));
+ }
+ dpattern[dpatterns].pat = grep_strdup(pat);
+ dpattern[dpatterns].mode = mode;
+ ++dpatterns;
+}
+
+/*
+ * Reads searching patterns from a file and adds them with add_pattern().
+ */
+static void
+read_patterns(const char *fn)
+{
+ struct stat st;
+ FILE *f;
+ char *line;
+ size_t len;
+ ssize_t rlen;
+
+ if (strcmp(fn, "-") == 0)
+ f = stdin;
+ else if ((f = fopen(fn, "r")) == NULL)
+ err(2, "%s", fn);
+ if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
+ fclose(f);
+ return;
+ }
+ len = 0;
+ line = NULL;
+ while ((rlen = getline(&line, &len, f)) != -1) {
+ if (line[0] == '\0')
+ continue;
+ add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
+ }
+
+ free(line);
+ if (ferror(f))
+ err(2, "%s", fn);
+ if (strcmp(fn, "-") != 0)
+ fclose(f);
+}
+
+static inline const char *
+init_color(const char *d)
+{
+ char *c;
+
+ c = getenv("GREP_COLOR");
+ return (c != NULL && c[0] != '\0' ? c : d);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char **aargv, **eargv, *eopts;
+ char *ep;
+ const char *pn;
+ long long l;
+ unsigned int aargc, eargc, i;
+ int c, lastc, needpattern, newarg, prevoptind;
+ bool matched;
+
+ setlocale(LC_ALL, "");
+
+ /*
+ * Check how we've bene invoked to determine the behavior we should
+ * exhibit. In this way we can have all the functionalities in one
+ * binary without the need of scripting and using ugly hacks.
+ */
+ pn = getprogname();
+ switch (pn[0]) {
+ case 'e':
+ grepbehave = GREP_EXTENDED;
+ break;
+ case 'f':
+ grepbehave = GREP_FIXED;
+ break;
+ case 'r':
+ dirbehave = DIR_RECURSE;
+ Hflag = true;
+ break;
+ }
+
+ lastc = '\0';
+ newarg = 1;
+ prevoptind = 1;
+ needpattern = 1;
+ fileeol = '\n';
+
+ eopts = getenv("GREP_OPTIONS");
+
+ /* support for extra arguments in GREP_OPTIONS */
+ eargc = 0;
+ if (eopts != NULL && eopts[0] != '\0') {
+ char *str;
+
+ /* make an estimation of how many extra arguments we have */
+ for (unsigned int j = 0; j < strlen(eopts); j++)
+ if (eopts[j] == ' ')
+ eargc++;
+
+ eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
+
+ eargc = 0;
+ /* parse extra arguments */
+ while ((str = strsep(&eopts, " ")) != NULL)
+ if (str[0] != '\0')
+ eargv[eargc++] = grep_strdup(str);
+
+ aargv = (char **)grep_calloc(eargc + argc + 1,
+ sizeof(char *));
+
+ aargv[0] = argv[0];
+ for (i = 0; i < eargc; i++)
+ aargv[i + 1] = eargv[i];
+ for (int j = 1; j < argc; j++, i++)
+ aargv[i + 1] = argv[j];
+
+ aargc = eargc + argc;
+ } else {
+ aargv = argv;
+ aargc = argc;
+ }
+
+ while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
+ -1)) {
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (newarg || !isdigit(lastc))
+ Aflag = 0;
+ else if (Aflag > LLONG_MAX / 10 - 1) {
+ errno = ERANGE;
+ err(2, NULL);
+ }
+
+ Aflag = Bflag = (Aflag * 10) + (c - '0');
+ break;
+ case 'C':
+ if (optarg == NULL) {
+ Aflag = Bflag = 2;
+ break;
+ }
+ /* FALLTHROUGH */
+ case 'A':
+ /* FALLTHROUGH */
+ case 'B':
+ errno = 0;
+ l = strtoll(optarg, &ep, 10);
+ if (errno == ERANGE || errno == EINVAL)
+ err(2, NULL);
+ else if (ep[0] != '\0') {
+ errno = EINVAL;
+ err(2, NULL);
+ } else if (l < 0) {
+ errno = EINVAL;
+ err(2, "context argument must be non-negative");
+ }
+
+ if (c == 'A')
+ Aflag = l;
+ else if (c == 'B')
+ Bflag = l;
+ else
+ Aflag = Bflag = l;
+ break;
+ case 'a':
+ binbehave = BINFILE_TEXT;
+ break;
+ case 'b':
+ bflag = true;
+ break;
+ case 'c':
+ cflag = true;
+ break;
+ case 'D':
+ if (strcasecmp(optarg, "skip") == 0)
+ devbehave = DEV_SKIP;
+ else if (strcasecmp(optarg, "read") == 0)
+ devbehave = DEV_READ;
+ else
+ errx(2, errstr[2], "--devices");
+ break;
+ case 'd':
+ if (strcasecmp("recurse", optarg) == 0) {
+ Hflag = true;
+ dirbehave = DIR_RECURSE;
+ } else if (strcasecmp("skip", optarg) == 0)
+ dirbehave = DIR_SKIP;
+ else if (strcasecmp("read", optarg) == 0)
+ dirbehave = DIR_READ;
+ else
+ errx(2, errstr[2], "--directories");
+ break;
+ case 'E':
+ grepbehave = GREP_EXTENDED;
+ break;
+ case 'e':
+ {
+ char *token;
+ char *string = optarg;
+
+ while ((token = strsep(&string, "\n")) != NULL)
+ add_pattern(token, strlen(token));
+ }
+ needpattern = 0;
+ break;
+ case 'F':
+ grepbehave = GREP_FIXED;
+ break;
+ case 'f':
+ read_patterns(optarg);
+ needpattern = 0;
+ break;
+ case 'G':
+ grepbehave = GREP_BASIC;
+ break;
+ case 'H':
+ Hflag = true;
+ break;
+ case 'h':
+ Hflag = false;
+ hflag = true;
+ break;
+ case 'I':
+ binbehave = BINFILE_SKIP;
+ break;
+ case 'i':
+ case 'y':
+ iflag = true;
+ cflags |= REG_ICASE;
+ break;
+ case 'L':
+ lflag = false;
+ Lflag = true;
+ break;
+ case 'l':
+ Lflag = false;
+ lflag = true;
+ break;
+ case 'm':
+ mflag = true;
+ errno = 0;
+ mlimit = mcount = strtoll(optarg, &ep, 10);
+ if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
+ ((errno == EINVAL) && (mcount == 0)))
+ err(2, NULL);
+ else if (ep[0] != '\0') {
+ errno = EINVAL;
+ err(2, NULL);
+ }
+ break;
+ case 'n':
+ nflag = true;
+ break;
+ case 'O':
+ linkbehave = LINK_EXPLICIT;
+ break;
+ case 'o':
+ oflag = true;
+ cflags &= ~REG_NOSUB;
+ break;
+ case 'p':
+ linkbehave = LINK_SKIP;
+ break;
+ case 'q':
+ qflag = true;
+ break;
+ case 'S':
+ linkbehave = LINK_READ;
+ break;
+ case 'R':
+ case 'r':
+ dirbehave = DIR_RECURSE;
+ Hflag = true;
+ break;
+ case 's':
+ sflag = true;
+ break;
+ case 'U':
+ binbehave = BINFILE_BIN;
+ break;
+ case 'u':
+ case MMAP_OPT:
+ filebehave = FILE_MMAP;
+ break;
+ case 'V':
+ printf(errstr[8], getprogname(), VERSION);
+ exit(0);
+ case 'v':
+ vflag = true;
+ break;
+ case 'w':
+ wflag = true;
+ cflags &= ~REG_NOSUB;
+ break;
+ case 'x':
+ xflag = true;
+ cflags &= ~REG_NOSUB;
+ break;
+ case 'z':
+ fileeol = '\0';
+ cflags &= ~REG_NEWLINE;
+ break;
+ case BIN_OPT:
+ if (strcasecmp("binary", optarg) == 0)
+ binbehave = BINFILE_BIN;
+ else if (strcasecmp("without-match", optarg) == 0)
+ binbehave = BINFILE_SKIP;
+ else if (strcasecmp("text", optarg) == 0)
+ binbehave = BINFILE_TEXT;
+ else
+ errx(2, errstr[2], "--binary-files");
+ break;
+ case COLOR_OPT:
+ color = NULL;
+ if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
+ strcasecmp("tty", optarg) == 0 ||
+ strcasecmp("if-tty", optarg) == 0) {
+ char *term;
+
+ term = getenv("TERM");
+ if (isatty(STDOUT_FILENO) && term != NULL &&
+ strcasecmp(term, "dumb") != 0)
+ color = init_color("01;31");
+ } else if (strcasecmp("always", optarg) == 0 ||
+ strcasecmp("yes", optarg) == 0 ||
+ strcasecmp("force", optarg) == 0) {
+ color = init_color("01;31");
+ } else if (strcasecmp("never", optarg) != 0 &&
+ strcasecmp("none", optarg) != 0 &&
+ strcasecmp("no", optarg) != 0)
+ errx(2, errstr[2], "--color");
+ cflags &= ~REG_NOSUB;
+ break;
+ case LABEL_OPT:
+ label = optarg;
+ break;
+ case LINEBUF_OPT:
+ lbflag = true;
+ break;
+ case NULL_OPT:
+ nullflag = true;
+ break;
+ case R_INCLUDE_OPT:
+ finclude = true;
+ add_fpattern(optarg, INCL_PAT);
+ break;
+ case R_EXCLUDE_OPT:
+ fexclude = true;
+ add_fpattern(optarg, EXCL_PAT);
+ break;
+ case R_DINCLUDE_OPT:
+ dinclude = true;
+ add_dpattern(optarg, INCL_PAT);
+ break;
+ case R_DEXCLUDE_OPT:
+ dexclude = true;
+ add_dpattern(optarg, EXCL_PAT);
+ break;
+ case HELP_OPT:
+ default:
+ usage();
+ }
+ lastc = c;
+ newarg = optind != prevoptind;
+ prevoptind = optind;
+ }
+ aargc -= optind;
+ aargv += optind;
+
+ /* xflag takes precedence, don't confuse the matching bits. */
+ if (wflag && xflag)
+ wflag = false;
+
+ /* Fail if we don't have any pattern */
+ if (aargc == 0 && needpattern)
+ usage();
+
+ /* Process patterns from command line */
+ if (aargc != 0 && needpattern) {
+ char *token;
+ char *string = *aargv;
+
+ while ((token = strsep(&string, "\n")) != NULL)
+ add_pattern(token, strlen(token));
+ --aargc;
+ ++aargv;
+ }
+
+ switch (grepbehave) {
+ case GREP_BASIC:
+ break;
+ case GREP_FIXED:
+ /*
+ * regex(3) implementations that support fixed-string searches generally
+ * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
+ * here. If neither are defined, GREP_FIXED later implies that the
+ * internal literal matcher should be used. Other cflags that have
+ * the same interpretation as REG_NOSPEC and REG_LITERAL should be
+ * similarly added here, and grep.h should be amended to take this into
+ * consideration when defining WITH_INTERNAL_NOSPEC.
+ */
+#if defined(REG_NOSPEC)
+ cflags |= REG_NOSPEC;
+#elif defined(REG_LITERAL)
+ cflags |= REG_LITERAL;
+#endif
+ break;
+ case GREP_EXTENDED:
+ cflags |= REG_EXTENDED;
+ break;
+ default:
+ /* NOTREACHED */
+ usage();
+ }
+
+ r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
+
+#ifdef WITH_INTERNAL_NOSPEC
+ if (grepbehave != GREP_FIXED) {
+#else
+ {
+#endif
+ /* Check if cheating is allowed (always is for fgrep). */
+ for (i = 0; i < patterns; ++i) {
+ c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
+ if (c != 0) {
+ regerror(c, &r_pattern[i], re_error,
+ RE_ERROR_BUF);
+ errx(2, "%s", re_error);
+ }
+ }
+ }
+
+ if (lbflag)
+ setlinebuf(stdout);
+
+ if ((aargc == 0 || aargc == 1) && !Hflag)
+ hflag = true;
+
+ initqueue();
+
+ if (aargc == 0 && dirbehave != DIR_RECURSE)
+ exit(!procfile("-"));
+
+ if (dirbehave == DIR_RECURSE)
+ matched = grep_tree(aargv);
+ else
+ for (matched = false; aargc--; ++aargv) {
+ if ((finclude || fexclude) && !file_matching(*aargv))
+ continue;
+ if (procfile(*aargv))
+ matched = true;
+ }
+
+ if (Lflag)
+ matched = !matched;
+
+ /*
+ * Calculate the correct return value according to the
+ * results and the command line option.
+ */
+ exit(matched ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));
+}
diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h
new file mode 100644
index 000000000000..211fe1ae75c8
--- /dev/null
+++ b/usr.bin/grep/grep.h
@@ -0,0 +1,159 @@
+/* $NetBSD: grep.h,v 1.5 2011/02/27 17:33:37 joerg Exp $ */
+/* $OpenBSD: grep.h,v 1.15 2010/04/05 03:03:55 tedu Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 1999 James Howard and Dag-Erling Smørgrav
+ * Copyright (c) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <bzlib.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <zlib.h>
+
+extern const char *errstr[];
+
+#define VERSION "2.6.0-FreeBSD"
+
+#define GREP_FIXED 0
+#define GREP_BASIC 1
+#define GREP_EXTENDED 2
+
+#if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
+#define WITH_INTERNAL_NOSPEC
+#endif
+
+#define BINFILE_BIN 0
+#define BINFILE_SKIP 1
+#define BINFILE_TEXT 2
+
+#define FILE_STDIO 0
+#define FILE_MMAP 1
+
+#define DIR_READ 0
+#define DIR_SKIP 1
+#define DIR_RECURSE 2
+
+#define DEV_READ 0
+#define DEV_SKIP 1
+
+#define LINK_READ 0
+#define LINK_EXPLICIT 1
+#define LINK_SKIP 2
+
+#define EXCL_PAT 0
+#define INCL_PAT 1
+
+#define MAX_MATCHES 32
+
+struct file {
+ int fd;
+ bool binary;
+};
+
+struct str {
+ off_t boff;
+ off_t off;
+ size_t len;
+ char *dat;
+ char *file;
+ int line_no;
+};
+
+struct pat {
+ char *pat;
+ int len;
+};
+
+struct epat {
+ char *pat;
+ int mode;
+};
+
+/*
+ * Parsing context; used to hold things like matches made and
+ * other useful bits
+ */
+struct parsec {
+ regmatch_t matches[MAX_MATCHES]; /* Matches made */
+ /* XXX TODO: This should be a chunk, not a line */
+ struct str ln; /* Current line */
+ size_t lnstart; /* Position in line */
+ size_t matchidx; /* Latest match index */
+ int printed; /* Metadata printed? */
+ bool binary; /* Binary file? */
+ bool cntlines; /* Count lines? */
+};
+
+/* Flags passed to regcomp() and regexec() */
+extern int cflags, eflags;
+
+/* Command line flags */
+extern bool Eflag, Fflag, Gflag, Hflag, Lflag,
+ bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag,
+ qflag, sflag, vflag, wflag, xflag;
+extern bool dexclude, dinclude, fexclude, finclude, lbflag, nullflag;
+extern long long Aflag, Bflag;
+extern long long mcount;
+extern long long mlimit;
+extern char fileeol;
+extern char *label;
+extern const char *color;
+extern int binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave;
+
+extern bool file_err, matchall;
+extern unsigned int dpatterns, fpatterns, patterns;
+extern struct pat *pattern;
+extern struct epat *dpattern, *fpattern;
+extern regex_t *er_pattern, *r_pattern;
+
+/* For regex errors */
+#define RE_ERROR_BUF 512
+extern char re_error[RE_ERROR_BUF + 1]; /* Seems big enough */
+
+/* util.c */
+bool file_matching(const char *fname);
+bool procfile(const char *fn);
+bool grep_tree(char **argv);
+void *grep_malloc(size_t size);
+void *grep_calloc(size_t nmemb, size_t size);
+void *grep_realloc(void *ptr, size_t size);
+char *grep_strdup(const char *str);
+void grep_printline(struct str *line, int sep);
+
+/* queue.c */
+void initqueue(void);
+bool enqueue(struct str *x);
+void printqueue(void);
+void clearqueue(void);
+
+/* file.c */
+void grep_close(struct file *f);
+struct file *grep_open(const char *path);
+char *grep_fgetln(struct file *f, struct parsec *pc);
diff --git a/usr.bin/grep/queue.c b/usr.bin/grep/queue.c
new file mode 100644
index 000000000000..296e19fb0f6d
--- /dev/null
+++ b/usr.bin/grep/queue.c
@@ -0,0 +1,142 @@
+/* $NetBSD: queue.c,v 1.5 2011/08/31 16:24:57 plunky Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 1999 James Howard and Dag-Erling Smørgrav
+ * All rights reserved.
+ * Copyright (c) 2020 Kyle Evans <kevans@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * A really poor man's queue. It does only what it has to and gets out of
+ * Dodge. It is used in place of <sys/queue.h> to get a better performance.
+ */
+
+#include <sys/param.h>
+#include <sys/queue.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "grep.h"
+
+typedef struct str qentry_t;
+
+static long long filled;
+static qentry_t *qend, *qpool;
+
+/*
+ * qnext is the next entry to populate. qlist is where the list actually
+ * starts, for the purposes of printing.
+ */
+static qentry_t *qlist, *qnext;
+
+void
+initqueue(void)
+{
+
+ qlist = qnext = qpool = grep_calloc(Bflag, sizeof(*qpool));
+ qend = qpool + (Bflag - 1);
+}
+
+static qentry_t *
+advqueue(qentry_t *itemp)
+{
+
+ if (itemp == qend)
+ return (qpool);
+ return (itemp + 1);
+}
+
+/*
+ * Enqueue another line; return true if we've dequeued a line as a result
+ */
+bool
+enqueue(struct str *x)
+{
+ qentry_t *item;
+ bool rotated;
+
+ item = qnext;
+ qnext = advqueue(qnext);
+ rotated = false;
+
+ if (filled < Bflag) {
+ filled++;
+ } else if (filled == Bflag) {
+ /* We had already filled up coming in; just rotate. */
+ qlist = advqueue(qlist);
+ rotated = true;
+ free(item->dat);
+ }
+ /* len + 1 for NUL-terminator */
+ item->dat = grep_malloc(sizeof(char) * x->len + 1);
+ item->len = x->len;
+ item->line_no = x->line_no;
+ item->boff = x->boff;
+ item->off = x->off;
+ memcpy(item->dat, x->dat, x->len);
+ item->dat[x->len] = '\0';
+ item->file = x->file;
+
+ return (rotated);
+}
+
+void
+printqueue(void)
+{
+ qentry_t *item;
+
+ item = qlist;
+ do {
+ /* Buffer must have ended early. */
+ if (item->dat == NULL)
+ break;
+
+ grep_printline(item, '-');
+ free(item->dat);
+ item->dat = NULL;
+ item = advqueue(item);
+ } while (item != qlist);
+
+ qlist = qnext = qpool;
+ filled = 0;
+}
+
+void
+clearqueue(void)
+{
+ qentry_t *item;
+
+ item = qlist;
+ do {
+ free(item->dat);
+ item->dat = NULL;
+ item = advqueue(item);
+ } while (item != qlist);
+
+ qlist = qnext = qpool;
+ filled = 0;
+}
diff --git a/usr.bin/grep/tests/Makefile b/usr.bin/grep/tests/Makefile
new file mode 100644
index 000000000000..b3c79657e53c
--- /dev/null
+++ b/usr.bin/grep/tests/Makefile
@@ -0,0 +1,55 @@
+PACKAGE= tests
+
+ATF_TESTS_SH+= grep_freebsd_test
+NETBSD_ATF_TESTS_SH= grep_test
+
+${PACKAGE}FILES+= d_basic.out
+${PACKAGE}FILES+= d_begin_end_a.out
+${PACKAGE}FILES+= d_begin_end_b.out
+${PACKAGE}FILES+= d_binary.out
+${PACKAGE}FILES+= d_color_a.in
+${PACKAGE}FILES+= d_color_a.out
+${PACKAGE}FILES+= d_color_b.in
+${PACKAGE}FILES+= d_color_b.out
+${PACKAGE}FILES+= d_color_c.out
+${PACKAGE}FILES+= d_context2_a.out
+${PACKAGE}FILES+= d_context2_b.out
+${PACKAGE}FILES+= d_context2_c.out
+${PACKAGE}FILES+= d_context_a.in
+${PACKAGE}FILES+= d_context_a.out
+${PACKAGE}FILES+= d_context_b.in
+${PACKAGE}FILES+= d_context_e.in
+${PACKAGE}FILES+= d_context_b.out
+${PACKAGE}FILES+= d_context_c.out
+${PACKAGE}FILES+= d_context_d.out
+${PACKAGE}FILES+= d_context_e.out
+${PACKAGE}FILES+= d_context_f.out
+${PACKAGE}FILES+= d_context_g.out
+${PACKAGE}FILES+= d_egrep.out
+${PACKAGE}FILES+= d_escmap.in
+${PACKAGE}FILES+= d_f_file_empty.in
+${PACKAGE}FILES+= d_file_exp.in
+${PACKAGE}FILES+= d_file_exp.out
+${PACKAGE}FILES+= d_ignore_case.out
+${PACKAGE}FILES+= d_input
+${PACKAGE}FILES+= d_invert.in
+${PACKAGE}FILES+= d_invert.out
+${PACKAGE}FILES+= d_oflag_zerolen_a.in
+${PACKAGE}FILES+= d_oflag_zerolen_a.out
+${PACKAGE}FILES+= d_oflag_zerolen_b.in
+${PACKAGE}FILES+= d_oflag_zerolen_b.out
+${PACKAGE}FILES+= d_oflag_zerolen_c.in
+${PACKAGE}FILES+= d_oflag_zerolen_c.out
+${PACKAGE}FILES+= d_oflag_zerolen_d.in
+${PACKAGE}FILES+= d_oflag_zerolen_e.in
+${PACKAGE}FILES+= d_oflag_zerolen_e.out
+${PACKAGE}FILES+= d_recurse.out
+${PACKAGE}FILES+= d_recurse_symlink.err
+${PACKAGE}FILES+= d_recurse_symlink.out
+${PACKAGE}FILES+= d_whole_line.out
+${PACKAGE}FILES+= d_word_regexps.out
+${PACKAGE}FILES+= d_zgrep.out
+
+.include <netbsd-tests.test.mk>
+
+.include <bsd.test.mk>
diff --git a/usr.bin/grep/tests/Makefile.depend b/usr.bin/grep/tests/Makefile.depend
new file mode 100644
index 000000000000..11aba52f82cf
--- /dev/null
+++ b/usr.bin/grep/tests/Makefile.depend
@@ -0,0 +1,10 @@
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/usr.bin/grep/tests/grep_freebsd_test.sh b/usr.bin/grep/tests/grep_freebsd_test.sh
new file mode 100755
index 000000000000..906b70645151
--- /dev/null
+++ b/usr.bin/grep/tests/grep_freebsd_test.sh
@@ -0,0 +1,127 @@
+#
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2017 Kyle Evans <kevans@FreeBSD.org>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+
+# What grep(1) are we working with?
+# - 0 : bsdgrep
+# - 1 : gnu grep (ports)
+GREP_TYPE_BSD=0
+GREP_TYPE_GNU=1
+
+grep_type()
+{
+ local grep_version=$(grep --version)
+
+ case "$grep_version" in
+ *"BSD grep"*)
+ return $GREP_TYPE_BSD
+ ;;
+ *"GNU grep"*)
+ return $GREP_TYPE_GNU
+ ;;
+ esac
+ atf_fail "unknown grep type: $grep_version"
+}
+
+atf_test_case grep_r_implied
+grep_r_implied_body()
+{
+ grep_type
+ if [ $? -ne $GREP_TYPE_BSD ]; then
+ atf_skip "this test only works with bsdgrep(1)"
+ fi
+
+ (cd "$(atf_get_srcdir)" && grep -r --exclude="*.out" -e "test" .) > d_grep_r_implied.out
+
+ atf_check -s exit:0 -x \
+ "(cd $(atf_get_srcdir) && grep -r --exclude=\"*.out\" -e \"test\") | diff d_grep_r_implied.out -"
+}
+
+atf_test_case rgrep
+rgrep_head()
+{
+ atf_set "require.progs" "rgrep"
+}
+rgrep_body()
+{
+ atf_check -o save:d_grep_r_implied.out grep -r --exclude="*.out" -e "test" "$(atf_get_srcdir)"
+ atf_check -o file:d_grep_r_implied.out rgrep --exclude="*.out" -e "test" "$(atf_get_srcdir)"
+}
+
+atf_test_case gnuext
+gnuext_body()
+{
+ grep_type
+ _type=$?
+
+ atf_check -o save:grep_alnum.out grep -o '[[:alnum:]]' /COPYRIGHT
+ atf_check -o file:grep_alnum.out grep -o '\w' /COPYRIGHT
+
+ atf_check -o save:grep_nalnum.out grep -o '[^[:alnum:]]' /COPYRIGHT
+ atf_check -o file:grep_nalnum.out grep -o '\W' /COPYRIGHT
+
+ atf_check -o save:grep_space.out grep -o '[[:space:]]' /COPYRIGHT
+ atf_check -o file:grep_space.out grep -o '\s' /COPYRIGHT
+
+ atf_check -o save:grep_nspace.out grep -o '[^[:space:]]' /COPYRIGHT
+ atf_check -o file:grep_nspace.out grep -o '\S' /COPYRIGHT
+
+}
+
+atf_test_case zflag
+zflag_body()
+{
+
+ # The -z flag should pick up 'foo' and 'bar' as on the same line with
+ # 'some kind of junk' in between; a bug was present that instead made
+ # it process this incorrectly.
+ printf "foo\nbar\0" > in
+
+ atf_check grep -qz "foo.*bar" in
+}
+
+atf_test_case color_dupe
+color_dupe_body()
+{
+
+ # This assumes a MAX_MATCHES of exactly 32. Previously buggy procline()
+ # calls would terminate the line premature every MAX_MATCHES matches,
+ # meaning we'd see the line be output again for the next MAX_MATCHES
+ # number of matches.
+ jot -nb 'A' -s '' 33 > in
+
+ atf_check -o save:color.out grep --color=always . in
+ atf_check -o match:"^ +1 color.out" wc -l color.out
+}
+
+atf_init_test_cases()
+{
+ atf_add_test_case grep_r_implied
+ atf_add_test_case rgrep
+ atf_add_test_case gnuext
+ atf_add_test_case zflag
+ atf_add_test_case color_dupe
+}
diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c
new file mode 100644
index 000000000000..5b40405852b3
--- /dev/null
+++ b/usr.bin/grep/util.c
@@ -0,0 +1,856 @@
+/* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */
+/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 1999 James Howard and Dag-Erling Smørgrav
+ * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2017 Kyle Evans <kevans@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <fnmatch.h>
+#include <fts.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "grep.h"
+
+static bool first_match = true;
+
+/*
+ * Match printing context
+ */
+struct mprintc {
+ long long tail; /* Number of trailing lines to record */
+ int last_outed; /* Number of lines since last output */
+ bool doctx; /* Printing context? */
+ bool printmatch; /* Printing matches? */
+ bool same_file; /* Same file as previously printed? */
+};
+
+static void procmatch_match(struct mprintc *mc, struct parsec *pc);
+static void procmatch_nomatch(struct mprintc *mc, struct parsec *pc);
+static bool procmatches(struct mprintc *mc, struct parsec *pc, bool matched);
+#ifdef WITH_INTERNAL_NOSPEC
+static int litexec(const struct pat *pat, const char *string,
+ size_t nmatch, regmatch_t pmatch[]);
+#endif
+static bool procline(struct parsec *pc);
+static bool printline(struct parsec *pc, int sep, size_t *last_out);
+static void printline_metadata(struct str *line, int sep);
+
+bool
+file_matching(const char *fname)
+{
+ char *fname_base, *fname_buf;
+ bool ret;
+
+ ret = finclude ? false : true;
+ fname_buf = strdup(fname);
+ if (fname_buf == NULL)
+ err(2, "strdup");
+ fname_base = basename(fname_buf);
+
+ for (unsigned int i = 0; i < fpatterns; ++i) {
+ if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
+ fnmatch(fpattern[i].pat, fname_base, 0) == 0)
+ /*
+ * The last pattern matched wins exclusion/inclusion
+ * rights, so we can't reasonably bail out early here.
+ */
+ ret = (fpattern[i].mode != EXCL_PAT);
+ }
+ free(fname_buf);
+ return (ret);
+}
+
+static inline bool
+dir_matching(const char *dname)
+{
+ bool ret;
+
+ ret = dinclude ? false : true;
+
+ for (unsigned int i = 0; i < dpatterns; ++i) {
+ if (dname != NULL && fnmatch(dpattern[i].pat, dname, 0) == 0)
+ /*
+ * The last pattern matched wins exclusion/inclusion
+ * rights, so we can't reasonably bail out early here.
+ */
+ ret = (dpattern[i].mode != EXCL_PAT);
+ }
+ return (ret);
+}
+
+/*
+ * Processes a directory when a recursive search is performed with
+ * the -R option. Each appropriate file is passed to procfile().
+ */
+bool
+grep_tree(char **argv)
+{
+ FTS *fts;
+ FTSENT *p;
+ int fts_flags;
+ bool matched, ok;
+ const char *wd[] = { ".", NULL };
+
+ matched = false;
+
+ /* This switch effectively initializes 'fts_flags' */
+ switch(linkbehave) {
+ case LINK_EXPLICIT:
+ fts_flags = FTS_COMFOLLOW | FTS_PHYSICAL;
+ break;
+ case LINK_SKIP:
+ fts_flags = FTS_PHYSICAL;
+ break;
+ default:
+ fts_flags = FTS_LOGICAL | FTS_NOSTAT;
+ }
+
+ fts_flags |= FTS_NOCHDIR;
+
+ fts = fts_open((argv[0] == NULL) ?
+ __DECONST(char * const *, wd) : argv, fts_flags, NULL);
+ if (fts == NULL)
+ err(2, "fts_open");
+ while (errno = 0, (p = fts_read(fts)) != NULL) {
+ switch (p->fts_info) {
+ case FTS_DNR:
+ case FTS_ERR:
+ case FTS_NS:
+ file_err = true;
+ if(!sflag)
+ warnc(p->fts_errno, "%s", p->fts_path);
+ break;
+ case FTS_D:
+ if (dexclude || dinclude)
+ if (!dir_matching(p->fts_name) ||
+ !dir_matching(p->fts_path))
+ fts_set(fts, p, FTS_SKIP);
+ break;
+ case FTS_DC:
+ /* Print a warning for recursive directory loop */
+ warnx("warning: %s: recursive directory loop",
+ p->fts_path);
+ break;
+ case FTS_DP:
+ break;
+ case FTS_SL:
+ /*
+ * Skip symlinks for LINK_EXPLICIT and
+ * LINK_SKIP. Note that due to FTS_COMFOLLOW,
+ * symlinks on the command line are followed
+ * for LINK_EXPLICIT and not reported as
+ * symlinks.
+ */
+ break;
+ default:
+ /* Check for file exclusion/inclusion */
+ ok = true;
+ if (fexclude || finclude)
+ ok &= file_matching(p->fts_path);
+
+ if (ok && procfile(p->fts_path))
+ matched = true;
+ break;
+ }
+ }
+ if (errno != 0)
+ err(2, "fts_read");
+
+ fts_close(fts);
+ return (matched);
+}
+
+static void
+procmatch_match(struct mprintc *mc, struct parsec *pc)
+{
+
+ if (mc->doctx) {
+ if (!first_match && (!mc->same_file || mc->last_outed > 0))
+ printf("--\n");
+ if (Bflag > 0)
+ printqueue();
+ mc->tail = Aflag;
+ }
+
+ /* Print the matching line, but only if not quiet/binary */
+ if (mc->printmatch) {
+ size_t last_out;
+ bool terminated;
+
+ last_out = 0;
+ terminated = printline(pc, ':', &last_out);
+ while (pc->matchidx >= MAX_MATCHES) {
+ /* Reset matchidx and try again */
+ pc->matchidx = 0;
+ if (procline(pc) == !vflag)
+ terminated = printline(pc, ':', &last_out);
+ else
+ break;
+ }
+
+ /*
+ * The above loop processes the entire line as long as we keep
+ * hitting the maximum match count. At this point, we know
+ * that there's nothing left to be printed and can terminate the
+ * line.
+ */
+ if (!terminated)
+ printline(pc, ':', &last_out);
+
+ first_match = false;
+ mc->same_file = true;
+ mc->last_outed = 0;
+ }
+}
+
+static void
+procmatch_nomatch(struct mprintc *mc, struct parsec *pc)
+{
+
+ /* Deal with any -A context as needed */
+ if (mc->tail > 0) {
+ grep_printline(&pc->ln, '-');
+ mc->tail--;
+ if (Bflag > 0)
+ clearqueue();
+ } else if (Bflag == 0 || (Bflag > 0 && enqueue(&pc->ln)))
+ /*
+ * Enqueue non-matching lines for -B context. If we're not
+ * actually doing -B context or if the enqueue resulted in a
+ * line being rotated out, then go ahead and increment
+ * last_outed to signify a gap between context/match.
+ */
+ ++mc->last_outed;
+}
+
+/*
+ * Process any matches in the current parsing context, return a boolean
+ * indicating whether we should halt any further processing or not. 'true' to
+ * continue processing, 'false' to halt.
+ */
+static bool
+procmatches(struct mprintc *mc, struct parsec *pc, bool matched)
+{
+
+ if (mflag && mcount <= 0) {
+ /*
+ * We already hit our match count, but we need to keep dumping
+ * lines until we've lost our tail.
+ */
+ grep_printline(&pc->ln, '-');
+ mc->tail--;
+ return (mc->tail != 0);
+ }
+
+ /*
+ * XXX TODO: This should loop over pc->matches and handle things on a
+ * line-by-line basis, setting up a `struct str` as needed.
+ */
+ /* Deal with any -B context or context separators */
+ if (matched) {
+ procmatch_match(mc, pc);
+
+ /* Count the matches if we have a match limit */
+ if (mflag) {
+ /* XXX TODO: Decrement by number of matched lines */
+ mcount -= 1;
+ if (mcount <= 0)
+ return (mc->tail != 0);
+ }
+ } else if (mc->doctx)
+ procmatch_nomatch(mc, pc);
+
+ return (true);
+}
+
+/*
+ * Opens a file and processes it. Each file is processed line-by-line
+ * passing the lines to procline().
+ */
+bool
+procfile(const char *fn)
+{
+ struct parsec pc;
+ struct mprintc mc;
+ struct file *f;
+ struct stat sb;
+ mode_t s;
+ int lines;
+ bool line_matched;
+
+ if (strcmp(fn, "-") == 0) {
+ fn = label != NULL ? label : errstr[1];
+ f = grep_open(NULL);
+ } else {
+ if (stat(fn, &sb) == 0) {
+ /* Check if we need to process the file */
+ s = sb.st_mode & S_IFMT;
+ if (dirbehave == DIR_SKIP && s == S_IFDIR)
+ return (false);
+ if (devbehave == DEV_SKIP && (s == S_IFIFO ||
+ s == S_IFCHR || s == S_IFBLK || s == S_IFSOCK))
+ return (false);
+ }
+ f = grep_open(fn);
+ }
+ if (f == NULL) {
+ file_err = true;
+ if (!sflag)
+ warn("%s", fn);
+ return (false);
+ }
+
+ pc.ln.file = grep_strdup(fn);
+ pc.ln.line_no = 0;
+ pc.ln.len = 0;
+ pc.ln.boff = 0;
+ pc.ln.off = -1;
+ pc.binary = f->binary;
+ pc.cntlines = false;
+ memset(&mc, 0, sizeof(mc));
+ mc.printmatch = true;
+ if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag ||
+ lflag || Lflag)
+ mc.printmatch = false;
+ if (mc.printmatch && (Aflag != 0 || Bflag != 0))
+ mc.doctx = true;
+ if (mc.printmatch && (Aflag != 0 || Bflag != 0 || mflag || nflag))
+ pc.cntlines = true;
+ mcount = mlimit;
+
+ for (lines = 0; lines == 0 || !(lflag || qflag); ) {
+ /*
+ * XXX TODO: We need to revisit this in a chunking world. We're
+ * not going to be doing per-line statistics because of the
+ * overhead involved. procmatches can figure that stuff out as
+ * needed. */
+ /* Reset per-line statistics */
+ pc.printed = 0;
+ pc.matchidx = 0;
+ pc.lnstart = 0;
+ pc.ln.boff = 0;
+ pc.ln.off += pc.ln.len + 1;
+ /* XXX TODO: Grab a chunk */
+ if ((pc.ln.dat = grep_fgetln(f, &pc)) == NULL ||
+ pc.ln.len == 0)
+ break;
+
+ if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol)
+ --pc.ln.len;
+ pc.ln.line_no++;
+
+ /* Return if we need to skip a binary file */
+ if (pc.binary && binbehave == BINFILE_SKIP) {
+ grep_close(f);
+ free(pc.ln.file);
+ free(f);
+ return (0);
+ }
+
+ if (mflag && mcount <= 0) {
+ /*
+ * Short-circuit, already hit match count and now we're
+ * just picking up any remaining pieces.
+ */
+ if (!procmatches(&mc, &pc, false))
+ break;
+ continue;
+ }
+ line_matched = procline(&pc) == !vflag;
+ if (line_matched)
+ ++lines;
+
+ /* Halt processing if we hit our match limit */
+ if (!procmatches(&mc, &pc, line_matched))
+ break;
+ }
+ if (Bflag > 0)
+ clearqueue();
+ grep_close(f);
+
+ if (cflag && !qflag) {
+ if (!hflag)
+ printf("%s:", pc.ln.file);
+ printf("%u\n", lines);
+ }
+ if (lflag && !qflag && lines != 0)
+ printf("%s%c", fn, nullflag ? 0 : '\n');
+ if (Lflag && !qflag && lines == 0)
+ printf("%s%c", fn, nullflag ? 0 : '\n');
+ if (lines != 0 && !cflag && !lflag && !Lflag &&
+ binbehave == BINFILE_BIN && f->binary && !qflag)
+ printf(errstr[7], fn);
+
+ free(pc.ln.file);
+ free(f);
+ return (lines != 0);
+}
+
+#ifdef WITH_INTERNAL_NOSPEC
+/*
+ * Internal implementation of literal string search within a string, modeled
+ * after regexec(3), for use when the regex(3) implementation doesn't offer
+ * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD
+ * config, but in other scenarios such as building against libgnuregex or on
+ * some non-FreeBSD OSes.
+ */
+static int
+litexec(const struct pat *pat, const char *string, size_t nmatch,
+ regmatch_t pmatch[])
+{
+ char *(*strstr_fn)(const char *, const char *);
+ char *sub, *subject;
+ const char *search;
+ size_t idx, n, ofs, stringlen;
+
+ if (cflags & REG_ICASE)
+ strstr_fn = strcasestr;
+ else
+ strstr_fn = strstr;
+ idx = 0;
+ ofs = pmatch[0].rm_so;
+ stringlen = pmatch[0].rm_eo;
+ if (ofs >= stringlen)
+ return (REG_NOMATCH);
+ subject = strndup(string, stringlen);
+ if (subject == NULL)
+ return (REG_ESPACE);
+ for (n = 0; ofs < stringlen;) {
+ search = (subject + ofs);
+ if ((unsigned long)pat->len > strlen(search))
+ break;
+ sub = strstr_fn(search, pat->pat);
+ /*
+ * Ignoring the empty string possibility due to context: grep optimizes
+ * for empty patterns and will never reach this point.
+ */
+ if (sub == NULL)
+ break;
+ ++n;
+ /* Fill in pmatch if necessary */
+ if (nmatch > 0) {
+ pmatch[idx].rm_so = ofs + (sub - search);
+ pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len;
+ if (++idx == nmatch)
+ break;
+ ofs = pmatch[idx].rm_so + 1;
+ } else
+ /* We only needed to know if we match or not */
+ break;
+ }
+ free(subject);
+ if (n > 0 && nmatch > 0)
+ for (n = idx; n < nmatch; ++n)
+ pmatch[n].rm_so = pmatch[n].rm_eo = -1;
+
+ return (n > 0 ? 0 : REG_NOMATCH);
+}
+#endif /* WITH_INTERNAL_NOSPEC */
+
+#define iswword(x) (iswalnum((x)) || (x) == L'_')
+
+/*
+ * Processes a line comparing it with the specified patterns. Each pattern
+ * is looped to be compared along with the full string, saving each and every
+ * match, which is necessary to colorize the output and to count the
+ * matches. The matching lines are passed to printline() to display the
+ * appropriate output.
+ */
+static bool
+procline(struct parsec *pc)
+{
+ regmatch_t pmatch, lastmatch, chkmatch;
+ wchar_t wbegin, wend;
+ size_t st, nst;
+ unsigned int i;
+ int r = 0, leflags = eflags;
+ size_t startm = 0, matchidx;
+ unsigned int retry;
+ bool lastmatched, matched;
+
+ matchidx = pc->matchidx;
+
+ /* Null pattern shortcuts. */
+ if (matchall) {
+ if (xflag && pc->ln.len == 0) {
+ /* Matches empty lines (-x). */
+ return (true);
+ } else if (!wflag && !xflag) {
+ /* Matches every line (no -w or -x). */
+ return (true);
+ }
+
+ /*
+ * If we only have the NULL pattern, whether we match or not
+ * depends on if we got here with -w or -x. If either is set,
+ * the answer is no. If we have other patterns, we'll defer
+ * to them.
+ */
+ if (patterns == 0) {
+ return (!(wflag || xflag));
+ }
+ } else if (patterns == 0) {
+ /* Pattern file with no patterns. */
+ return (false);
+ }
+
+ matched = false;
+ st = pc->lnstart;
+ nst = 0;
+ /* Initialize to avoid a false positive warning from GCC. */
+ lastmatch.rm_so = lastmatch.rm_eo = 0;
+
+ /* Loop to process the whole line */
+ while (st <= pc->ln.len) {
+ lastmatched = false;
+ startm = matchidx;
+ retry = 0;
+ if (st > 0 && pc->ln.dat[st - 1] != fileeol)
+ leflags |= REG_NOTBOL;
+ /* Loop to compare with all the patterns */
+ for (i = 0; i < patterns; i++) {
+ pmatch.rm_so = st;
+ pmatch.rm_eo = pc->ln.len;
+#ifdef WITH_INTERNAL_NOSPEC
+ if (grepbehave == GREP_FIXED)
+ r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
+ else
+#endif
+ r = regexec(&r_pattern[i], pc->ln.dat, 1, &pmatch,
+ leflags);
+ if (r != 0)
+ continue;
+ /* Check for full match */
+ if (xflag && (pmatch.rm_so != 0 ||
+ (size_t)pmatch.rm_eo != pc->ln.len))
+ continue;
+ /* Check for whole word match */
+ if (wflag) {
+ wbegin = wend = L' ';
+ if (pmatch.rm_so != 0 &&
+ sscanf(&pc->ln.dat[pmatch.rm_so - 1],
+ "%lc", &wbegin) != 1)
+ r = REG_NOMATCH;
+ else if ((size_t)pmatch.rm_eo !=
+ pc->ln.len &&
+ sscanf(&pc->ln.dat[pmatch.rm_eo],
+ "%lc", &wend) != 1)
+ r = REG_NOMATCH;
+ else if (iswword(wbegin) ||
+ iswword(wend))
+ r = REG_NOMATCH;
+ /*
+ * If we're doing whole word matching and we
+ * matched once, then we should try the pattern
+ * again after advancing just past the start of
+ * the earliest match. This allows the pattern
+ * to match later on in the line and possibly
+ * still match a whole word.
+ */
+ if (r == REG_NOMATCH &&
+ (retry == pc->lnstart ||
+ (unsigned int)pmatch.rm_so + 1 < retry))
+ retry = pmatch.rm_so + 1;
+ if (r == REG_NOMATCH)
+ continue;
+ }
+ lastmatched = true;
+ lastmatch = pmatch;
+
+ if (matchidx == 0)
+ matched = true;
+
+ /*
+ * Replace previous match if the new one is earlier
+ * and/or longer. This will lead to some amount of
+ * extra work if -o/--color are specified, but it's
+ * worth it from a correctness point of view.
+ */
+ if (matchidx > startm) {
+ chkmatch = pc->matches[matchidx - 1];
+ if (pmatch.rm_so < chkmatch.rm_so ||
+ (pmatch.rm_so == chkmatch.rm_so &&
+ (pmatch.rm_eo - pmatch.rm_so) >
+ (chkmatch.rm_eo - chkmatch.rm_so))) {
+ pc->matches[matchidx - 1] = pmatch;
+ nst = pmatch.rm_eo;
+ }
+ } else {
+ /* Advance as normal if not */
+ pc->matches[matchidx++] = pmatch;
+ nst = pmatch.rm_eo;
+ }
+ /* avoid excessive matching - skip further patterns */
+ if ((color == NULL && !oflag) || qflag || lflag ||
+ matchidx >= MAX_MATCHES) {
+ pc->lnstart = nst;
+ lastmatched = false;
+ break;
+ }
+ }
+
+ /*
+ * Advance to just past the start of the earliest match, try
+ * again just in case we still have a chance to match later in
+ * the string.
+ */
+ if (!lastmatched && retry > pc->lnstart) {
+ st = retry;
+ continue;
+ }
+
+ /* XXX TODO: We will need to keep going, since we're chunky */
+ /* One pass if we are not recording matches */
+ if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag))
+ break;
+
+ /* If we didn't have any matches or REG_NOSUB set */
+ if (!lastmatched || (cflags & REG_NOSUB))
+ nst = pc->ln.len;
+
+ if (!lastmatched)
+ /* No matches */
+ break;
+ else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo)
+ /* Zero-length match -- advance one more so we don't get stuck */
+ nst++;
+
+ /* Advance st based on previous matches */
+ st = nst;
+ pc->lnstart = st;
+ }
+
+ /* Reflect the new matchidx in the context */
+ pc->matchidx = matchidx;
+ return matched;
+}
+
+/*
+ * Safe malloc() for internal use.
+ */
+void *
+grep_malloc(size_t size)
+{
+ void *ptr;
+
+ if (size == 0)
+ return (NULL);
+ if ((ptr = malloc(size)) == NULL)
+ err(2, "malloc");
+ return (ptr);
+}
+
+/*
+ * Safe calloc() for internal use.
+ */
+void *
+grep_calloc(size_t nmemb, size_t size)
+{
+ void *ptr;
+
+ if (nmemb == 0 || size == 0)
+ return (NULL);
+ if ((ptr = calloc(nmemb, size)) == NULL)
+ err(2, "calloc");
+ return (ptr);
+}
+
+/*
+ * Safe realloc() for internal use.
+ */
+void *
+grep_realloc(void *ptr, size_t size)
+{
+
+ if ((ptr = realloc(ptr, size)) == NULL)
+ err(2, "realloc");
+ return (ptr);
+}
+
+/*
+ * Safe strdup() for internal use.
+ */
+char *
+grep_strdup(const char *str)
+{
+ char *ret;
+
+ if ((ret = strdup(str)) == NULL)
+ err(2, "strdup");
+ return (ret);
+}
+
+/*
+ * Print an entire line as-is, there are no inline matches to consider. This is
+ * used for printing context.
+ */
+void grep_printline(struct str *line, int sep) {
+ printline_metadata(line, sep);
+ fwrite(line->dat, line->len, 1, stdout);
+ putchar(fileeol);
+
+ fflush(stdout);
+}
+
+static void
+printline_metadata(struct str *line, int sep)
+{
+ bool printsep;
+
+ printsep = false;
+ if (!hflag) {
+ if (!nullflag) {
+ fputs(line->file, stdout);
+ printsep = true;
+ } else {
+ printf("%s", line->file);
+ putchar(0);
+ }
+ }
+ if (nflag) {
+ if (printsep)
+ putchar(sep);
+ printf("%d", line->line_no);
+ printsep = true;
+ }
+ if (bflag) {
+ if (printsep)
+ putchar(sep);
+ printf("%lld", (long long)(line->off + line->boff));
+ printsep = true;
+ }
+ if (printsep)
+ putchar(sep);
+}
+
+/*
+ * Prints a matching line according to the command line options. We need
+ * *last_out to be populated on entry in case this is just a continuation of
+ * matches within the same line.
+ *
+ * Returns true if the line was terminated, false if it was not.
+ */
+static bool
+printline(struct parsec *pc, int sep, size_t *last_out)
+{
+ size_t a = *last_out;
+ size_t i, matchidx;
+ regmatch_t match;
+ bool terminated;
+
+ /*
+ * Nearly all paths below will terminate the line by default, but it is
+ * avoided in some circumstances in case we don't have the full context
+ * available here.
+ */
+ terminated = true;
+
+ /* If matchall, everything matches but don't actually print for -o */
+ if (oflag && matchall)
+ return (terminated);
+
+ matchidx = pc->matchidx;
+
+ /* --color and -o */
+ if ((oflag || color) && (pc->printed > 0 || matchidx > 0)) {
+ /* Only print metadata once per line if --color */
+ if (!oflag && pc->printed == 0) {
+ printline_metadata(&pc->ln, sep);
+ }
+ for (i = 0; i < matchidx; i++) {
+ match = pc->matches[i];
+ /* Don't output zero length matches */
+ if (match.rm_so == match.rm_eo)
+ continue;
+ /*
+ * Metadata is printed on a per-line basis, so every
+ * match gets file metadata with the -o flag.
+ */
+ if (oflag) {
+ pc->ln.boff = match.rm_so;
+ printline_metadata(&pc->ln, sep);
+ } else {
+ fwrite(pc->ln.dat + a, match.rm_so - a, 1,
+ stdout);
+ }
+ if (color)
+ fprintf(stdout, "\33[%sm\33[K", color);
+ fwrite(pc->ln.dat + match.rm_so,
+ match.rm_eo - match.rm_so, 1, stdout);
+ if (color)
+ fprintf(stdout, "\33[m\33[K");
+ a = match.rm_eo;
+ if (oflag)
+ putchar('\n');
+ }
+
+ /*
+ * Don't terminate if we reached the match limit; we may have
+ * other matches on this line to process.
+ */
+ *last_out = a;
+ if (!oflag && matchidx != MAX_MATCHES) {
+ if (pc->ln.len - a > 0) {
+ fwrite(pc->ln.dat + a, pc->ln.len - a, 1,
+ stdout);
+ *last_out = pc->ln.len;
+ }
+ putchar('\n');
+ fflush(stdout);
+ } else if (!oflag) {
+ /*
+ * -o is terminated on every match output, so this
+ * branch is only designed to capture MAX_MATCHES in a
+ * line which may be a signal to us for a lack of
+ * context. The caller will know more and call us again
+ * to terminate if it needs to.
+ */
+ terminated = false;
+ } else {
+ fflush(stdout);
+ }
+ } else
+ grep_printline(&pc->ln, sep);
+ pc->printed++;
+ return (terminated);
+}
diff --git a/usr.bin/grep/zgrep.1 b/usr.bin/grep/zgrep.1
new file mode 100644
index 000000000000..76eb145db82d
--- /dev/null
+++ b/usr.bin/grep/zgrep.1
@@ -0,0 +1,112 @@
+.\" Copyright (c) 2018 Baptiste Daroussin <bapt@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd July 20, 2020
+.Dt ZGREP 1
+.Os
+.Sh NAME
+.Nm zgrep ,
+.Nm zegrep ,
+.Nm zfgrep ,
+.Nm bzgrep ,
+.Nm bzegrep ,
+.Nm bzfgrep ,
+.Nm lzgrep ,
+.Nm lzegrep ,
+.Nm lzfgrep ,
+.Nm xzgrep ,
+.Nm xzegrep ,
+.Nm xzfgrep ,
+.Nm zstdgrep ,
+.Nm zstdegrep ,
+.Nm zstdfgrep
+.Nd grep compressed files
+.Sh SYNOPSIS
+.Nm
+.Oo Ar flags Oc Ar files
+.Nm zegrep
+.Oo Ar flags Oc Ar files
+.Nm zfgrep
+.Oo Ar flags Oc Ar files
+.Pp
+.Nm bzgrep
+.Oo Ar flags Oc Ar files
+.Nm bzegrep
+.Oo Ar flags Oc Ar files
+.Nm bzfgrep
+.Oo Ar flags Oc Ar files
+.Pp
+.Nm lzgrep
+.Oo Ar flags Oc Ar files
+.Nm lzegrep
+.Oo Ar flags Oc Ar files
+.Nm lzfgrep
+.Oo Ar flags Oc Ar files
+.Pp
+.Nm xzgrep
+.Oo Ar flags Oc Ar files
+.Nm xzegrep
+.Oo Ar flags Oc Ar files
+.Nm xzfgrep
+.Oo Ar flags Oc Ar files
+.Pp
+.Nm zstdgrep
+.Oo Ar flags Oc Ar files
+.Nm zstdegrep
+.Oo Ar flags Oc Ar files
+.Nm zstdfgrep
+.Oo Ar flags Oc Ar files
+.Sh DESCRIPTION
+Allow
+.Xr grep 1
+to read compressed files.
+.Sh SEE ALSO
+.Xr bzip2 1 ,
+.Xr grep 1 ,
+.Xr gzip 1 ,
+.Xr xz 1 ,
+.Xr zstd 1
+.Sh AUTHORS
+This version of the
+.Nm
+utility was written by
+.An Thomas Klausner Aq Mt wiz@NetBSD.org .
+.Sh BUGS
+.Xr zgrep 1
+does not handle flags that take arguments if there is no whitespace
+between the flag and the argument, for example:
+.Pp
+.Dl "zgrep -enfs /etc/rpc"
+.Pp
+When more than one
+.Fl e
+flag is used matching
+should occur for any of the patterns (similar to multiple patterns
+supplied in a file with the
+.Fl f
+flag).
+.Xr zgrep 1
+only matches the last
+.Fl e
+pattern.
diff --git a/usr.bin/grep/zgrep.sh b/usr.bin/grep/zgrep.sh
new file mode 100755
index 000000000000..8bd630726647
--- /dev/null
+++ b/usr.bin/grep/zgrep.sh
@@ -0,0 +1,230 @@
+#!/bin/sh
+#
+# Copyright (c) 2003 Thomas Klausner.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+set -u
+grep=grep
+zcat=zstdcat
+
+endofopts=0
+pattern_file=0
+pattern_found=0
+grep_args=""
+hyphen=0
+silent=0
+
+prg=${0##*/}
+
+# handle being called 'zegrep' or 'zfgrep'
+case ${prg} in
+*egrep)
+ grep_args="-E";;
+*fgrep)
+ grep_args="-F";;
+esac
+
+catargs="-f"
+case ${prg} in
+zstd*)
+ cattool="/usr/bin/zstdcat"
+ catargs="-fq"
+ ;;
+bz*)
+ cattool="/usr/bin/bzcat"
+ ;;
+z*)
+ cattool="/usr/bin/zcat"
+ ;;
+xz*)
+ cattool="/usr/bin/xzcat"
+ ;;
+lz*)
+ cattool="/usr/bin/lzcat"
+ ;;
+*)
+ echo "Invalid command: ${prg}" >&2
+ exit 1
+ ;;
+esac
+
+# skip all options and pass them on to grep taking care of options
+# with arguments, and if -e was supplied
+
+while [ $# -gt 0 -a ${endofopts} -eq 0 ]
+do
+ case $1 in
+ # from GNU grep-2.6.0 -- keep in sync!
+ --)
+ shift
+ endofopts=1
+ ;;
+ --file=*)
+ pattern_file=1
+ grep_args="${grep_args} ${1}"
+ shift
+ ;;
+ --regexp=*)
+ if [ ${pattern_found} -ne 0 ]; then
+ grep_args="${grep_args} -e ${pattern}"
+ fi
+ pattern="${1#--regexp=}"
+ pattern_found=1
+ shift
+ ;;
+ -h|--no-filename)
+ silent=1
+ shift
+ ;;
+ -V|--version)
+ exec ${grep} -V
+ ;;
+ --*)
+ grep_args="${grep_args} $1"
+ shift
+ ;;
+ -[EFGHILOSUVabchilnopqsuvwxyz]*)
+ post="${1#-?}"
+ pre=${1%${post}}
+ grep_args="${grep_args} ${pre}"
+ shift
+ # Put back partial arg
+ set -- "-${post}" $*
+ ;;
+ -[ABCDdefm])
+ if [ $# -lt 2 ]
+ then
+ echo "${prg}: missing argument for $1 flag" >&2
+ exit 1
+ fi
+ case $1 in
+ -e)
+ if [ ${pattern_found} -ne 0 ]; then
+ grep_args="${grep_args} -e ${pattern}"
+ fi
+ pattern="$2"
+ pattern_found=1
+ shift 2
+ continue
+ ;;
+ -f)
+ pattern_file=1
+ ;;
+ *)
+ ;;
+ esac
+ grep_args="${grep_args} $1 $2"
+ shift 2
+ ;;
+ -[ABCDdefm]*)
+ post="${1#-e}"
+ case ${1} in
+ -e*)
+ if [ ${pattern_found} -ne 0 ]; then
+ grep_args="${grep_args} -e ${pattern}"
+ fi
+ pattern="${post}"
+ pattern_found=1
+ shift
+ continue
+ ;;
+ -f*)
+ pattern_file=1
+ ;;
+ *)
+ ;;
+ esac
+ grep_args="${grep_args} ${post}"
+ shift
+ ;;
+ -)
+ hyphen=1
+ shift
+ ;;
+ -r|-R)
+ echo "${prg}: the ${1} flag is not currently supported" >&2
+ exit 1
+ ;;
+ -?)
+ grep_args="${grep_args} $1"
+ shift
+ ;;
+ *)
+ # pattern to grep for
+ endofopts=1
+ ;;
+ esac
+done
+
+# if no -e option was found, take next argument as grep-pattern
+if [ ${pattern_file} -eq 0 -a ${pattern_found} -eq 0 ]
+then
+ if [ $# -ge 1 ]; then
+ pattern="$1"
+ shift
+ elif [ ${hyphen} -gt 0 ]; then
+ pattern="-"
+ else
+ echo "${prg}: missing pattern" >&2
+ exit 1
+ fi
+ pattern_found=1
+fi
+
+# Clean up possible leading blank
+grep_args="${grep_args# }"
+
+# call grep ...
+if [ $# -lt 1 ]
+then
+ # ... on stdin
+ if [ ${pattern_file} -eq 0 ]; then
+ ${cattool} ${catargs} - | ${grep} ${grep_args} -e "${pattern}" -- -
+ else
+ ${cattool} ${catargs} - | ${grep} ${grep_args} -- -
+ fi
+ ret=$?
+else
+ # ... on all files given on the command line
+ if [ ${silent} -lt 1 -a $# -gt 1 ]; then
+ grep_args="-H ${grep_args}"
+ fi
+ # Succeed if any file matches. First assume no match.
+ ret=1
+ for file; do
+ if [ ${pattern_file} -eq 0 ]; then
+ ${cattool} ${catargs} -- "${file}" |
+ ${grep} --label="${file}" ${grep_args} -e "${pattern}" -- -
+ else
+ ${cattool} ${catargs} -- "${file}" |
+ ${grep} --label="${file}" ${grep_args} -- -
+ fi
+ this_ret=$?
+ # A match (0) overrides a no-match (1). An error (>=2) overrides all.
+ if [ ${this_ret} -eq 0 -a ${ret} -eq 1 ] || [ ${this_ret} -ge 2 ]; then
+ ret=${this_ret}
+ fi
+ done
+fi
+
+exit ${ret}