aboutsummaryrefslogtreecommitdiff
path: root/usr.bin/gzip
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/gzip')
-rw-r--r--usr.bin/gzip/Makefile36
-rw-r--r--usr.bin/gzip/Makefile.depend18
-rw-r--r--usr.bin/gzip/Makefile.depend.options7
-rw-r--r--usr.bin/gzip/gzexe178
-rw-r--r--usr.bin/gzip/gzexe.171
-rw-r--r--usr.bin/gzip/gzip.1246
-rw-r--r--usr.bin/gzip/gzip.c2261
-rw-r--r--usr.bin/gzip/tests/Makefile12
-rw-r--r--usr.bin/gzip/tests/Makefile.depend10
-rw-r--r--usr.bin/gzip/tests/foo.diff2
-rw-r--r--usr.bin/gzip/tests/zdiff_test.sh125
-rw-r--r--usr.bin/gzip/unbzip2.c143
-rw-r--r--usr.bin/gzip/unlz.c642
-rw-r--r--usr.bin/gzip/unpack.c334
-rw-r--r--usr.bin/gzip/unxz.c474
-rw-r--r--usr.bin/gzip/unzstd.c89
-rw-r--r--usr.bin/gzip/zdiff141
-rw-r--r--usr.bin/gzip/zdiff.1140
-rw-r--r--usr.bin/gzip/zforce54
-rw-r--r--usr.bin/gzip/zforce.151
-rw-r--r--usr.bin/gzip/zmore81
-rw-r--r--usr.bin/gzip/zmore.1108
-rw-r--r--usr.bin/gzip/znew136
-rw-r--r--usr.bin/gzip/znew.169
-rw-r--r--usr.bin/gzip/zuncompress.c397
25 files changed, 5825 insertions, 0 deletions
diff --git a/usr.bin/gzip/Makefile b/usr.bin/gzip/Makefile
new file mode 100644
index 000000000000..33fbdb85d78c
--- /dev/null
+++ b/usr.bin/gzip/Makefile
@@ -0,0 +1,36 @@
+# $NetBSD: Makefile,v 1.18 2013/11/13 11:12:24 pettai Exp $
+
+.include <src.opts.mk>
+
+PROG= gzip
+MAN= gzip.1 gzexe.1 zdiff.1 zforce.1 zmore.1 znew.1
+
+LIBADD= z lzma zstd
+
+.if ${MK_BZIP2_SUPPORT} != "no"
+LIBADD+= bz2
+.else
+CFLAGS+= -DNO_BZIP2_SUPPORT
+.endif
+
+CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib
+
+SCRIPTS= gzexe zdiff zforce zmore znew
+
+MLINKS+= gzip.1 gunzip.1 \
+ gzip.1 gzcat.1 \
+ gzip.1 zcat.1 \
+ zdiff.1 zcmp.1 \
+ zdiff.1 xzdiff.1 \
+ zmore.1 zless.1
+
+LINKS+= ${BINDIR}/gzip ${BINDIR}/gunzip \
+ ${BINDIR}/gzip ${BINDIR}/gzcat \
+ ${BINDIR}/gzip ${BINDIR}/zcat \
+ ${BINDIR}/zdiff ${BINDIR}/xzdiff \
+ ${BINDIR}/zdiff ${BINDIR}/zcmp
+
+HAS_TESTS=
+SUBDIR.${MK_TESTS}+= tests
+
+.include <bsd.prog.mk>
diff --git a/usr.bin/gzip/Makefile.depend b/usr.bin/gzip/Makefile.depend
new file mode 100644
index 000000000000..bc41ed39450b
--- /dev/null
+++ b/usr.bin/gzip/Makefile.depend
@@ -0,0 +1,18 @@
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ lib/liblzma \
+ lib/libz \
+ lib/libzstd \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/usr.bin/gzip/Makefile.depend.options b/usr.bin/gzip/Makefile.depend.options
new file mode 100644
index 000000000000..2370c8a69e35
--- /dev/null
+++ b/usr.bin/gzip/Makefile.depend.options
@@ -0,0 +1,7 @@
+# This file is not autogenerated - take care!
+
+DIRDEPS_OPTIONS= BZIP2_SUPPORT
+
+DIRDEPS.BZIP2_SUPPORT.yes= lib/libbz2
+
+.include <dirdeps-options.mk>
diff --git a/usr.bin/gzip/gzexe b/usr.bin/gzip/gzexe
new file mode 100644
index 000000000000..ca96dfc83bce
--- /dev/null
+++ b/usr.bin/gzip/gzexe
@@ -0,0 +1,178 @@
+#!/bin/sh -
+#
+# $NetBSD: gzexe,v 1.3 2004/05/01 08:22:41 wiz Exp $
+# $OpenBSD: gzexe,v 1.3 2003/08/05 18:22:17 deraadt Exp $
+#
+#-
+# Copyright (c) 2003 Otto Moerbeek <otto@drijf.net>
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+
+# The number of lines plus one in the on-the-fly decompression script
+lines=19
+
+# A simple string to recognize already compressed files
+magic="# compressed by gzexe"
+
+# Write the decompression script to stdout
+header () {
+ # first section needs variable expansion, second not
+ cat <<- EOF
+ #!/bin/sh -
+ $magic
+ lines=$lines
+ EOF
+ cat <<- 'EOF'
+ prog=`/usr/bin/basename "$0"`
+ tmp=`/usr/bin/mktemp -d /tmp/gzexeXXXXXXXXXX` || {
+ /bin/echo "$prog: cannot create tmp dir"; exit 1
+ }
+ trap '/bin/rm -rf "$tmp"' 0
+ if /usr/bin/tail +$lines "$0" |
+ /usr/bin/gzip -dc > "$tmp/$prog" 2> /dev/null; then
+ /bin/chmod u+x "$tmp/$prog"
+ "$tmp/$prog" ${1+"$@"}
+ ret=$?
+ else
+ /bin/echo "$prog: cannot decompress $0"
+ ret=1
+ fi
+ exit $ret
+ EOF
+}
+
+# Test if a file is compressed by checking the magic line
+compressed () {
+ test "X`sed -n 2p "$1" 2> /dev/null`" = "X$magic"
+}
+
+# Decompress a file
+decompress () {
+ tmp=`mktemp /tmp/gzexeXXXXXXXXXX` || {
+ echo "$prog: cannot create tmp file"
+ return 1
+ }
+ if ! cp "$1" "$tmp"; then
+ echo "$prog: cannot copy $1 to $tmp"
+ rm -f "$tmp"
+ return 1
+ fi
+ if ! tail +$lines "$tmp" | gzip -vdc > "$1"; then
+ echo "$prog: cannot decompress $1"
+ cp "$tmp" "$1"
+ rm -f "$tmp"
+ return 1
+ fi
+}
+
+# Perform some sanity checks on the file
+check () {
+ if test ! -e "$1"; then
+ echo "$prog: cannot compress non-existing file $1"
+ return 1
+ fi
+
+ if test ! -f "$1"; then
+ echo "$prog: cannot compress non-regular file $1"
+ return 1
+ fi
+
+ case `basename "$1"` in
+ sh | mktemp | rm | echo | tail | gzip | chmod)
+ echo "$prog: cannot compress $1, I depend on it"
+ return 1
+ esac
+
+ if test ! -x "$1"; then
+ echo "$prog: cannot compress $1, it is not executable"
+ return 1
+ fi
+
+ if test -u "$1" -o -g "$1"; then
+ echo "$prog: cannot compress $1, it has an s bit set"
+ return 1
+ fi
+}
+
+# Compress a file
+compress () {
+ tmp=`mktemp /tmp/gzexeXXXXXXXXXX` || {
+ echo "$prog: cannot create tmp file"
+ return 1
+ }
+ if ! cp "$1" "$tmp"; then
+ echo "$prog: cannot copy $1 to $tmp"
+ rm -f "$tmp"
+ return 1
+ fi
+ if ! cp "$1" "$1"~; then
+ echo "$prog: cannot create backup copy $1~"
+ rm -f "$1"~ "$tmp"
+ return 1
+ fi
+
+ # Use cp to overwrite the existing file preserving mode and owner
+ # if possible. If the file is not writable, this will produce an
+ # error.
+
+ if header "$1" > "$tmp" && gzip -vc "$1" >> "$tmp"; then
+ if ! cp "$tmp" "$1"; then
+ echo "$prog: cannot copy $tmp to $1"
+ rm -f "$tmp"
+ return 1
+ fi
+ else
+ echo "$prog: cannot compress $1"
+ rm -f "$1"~ "$tmp"
+ return 1
+ fi
+}
+
+# Is the -d flag specified?
+dflag=
+
+# Return value
+rc=0
+
+if test "X$1" = X-d; then
+ dflag=1
+ shift
+fi
+
+prog=`basename "$0"`
+USAGE="usage: $prog [-d] file ..."
+if test $# -eq 0; then
+ echo $USAGE
+ exit 1
+fi
+
+while test $# -ne 0; do
+ if test $dflag; then
+ if ! compressed "$1"; then
+ echo "$prog: $1 is not compressed"
+ rc=1;
+ elif ! decompress "$1"; then
+ rc=$?
+ fi
+ else
+ if compressed "$1"; then
+ echo "$prog: $1 is already compressed"
+ rc=1;
+ elif ! check "$1" || ! compress "$1"; then
+ rc=$?
+ fi
+ fi
+ shift
+done
+exit $rc
diff --git a/usr.bin/gzip/gzexe.1 b/usr.bin/gzip/gzexe.1
new file mode 100644
index 000000000000..7247714b1f05
--- /dev/null
+++ b/usr.bin/gzip/gzexe.1
@@ -0,0 +1,71 @@
+.\" $NetBSD: gzexe.1,v 1.3 2003/12/28 12:49:41 wiz Exp $
+.\" $OpenBSD: gzexe.1,v 1.1 2003/07/31 07:32:47 otto Exp $
+.\"
+.\" Copyright (c) 2003 Otto Moerbeek <otto@drijf.net>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.Dd January 26, 2007
+.Dt GZEXE 1
+.Os
+.Sh NAME
+.Nm gzexe
+.Nd create auto-decompressing executables
+.Sh SYNOPSIS
+.Nm gzexe
+.Op Fl d
+.Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility uses
+.Xr gzip 1
+to compress executables, producing executables that decompress on-the-fly
+when executed.
+This saves disk space, at the cost of slower execution times.
+The original executables are saved by copying each of them to a file with
+the same name with a
+.Sq ~
+suffix appended.
+After verifying that the compressed executables work as expected, the backup
+files can be removed.
+.Pp
+The options are as follows:
+.Bl -tag -width Ds
+.It Fl d
+Decompress executables previously compressed by
+.Nm .
+.El
+.Pp
+The
+.Nm
+program refuses to compress non-regular or non-executable files,
+files with a setuid or setgid bit set, files that are already
+compressed using
+.Nm
+or programs it needs to perform on-the-fly decompression:
+.Xr sh 1 ,
+.Xr mktemp 1 ,
+.Xr rm 1 ,
+.Xr echo 1 ,
+.Xr tail 1 ,
+.Xr gzip 1 ,
+and
+.Xr chmod 1 .
+.Sh SEE ALSO
+.Xr gzip 1
+.Sh CAVEATS
+The
+.Nm
+utility replaces files by overwriting them with the generated
+compressed executable.
+To be able to do this, it is required that the original files are writable.
diff --git a/usr.bin/gzip/gzip.1 b/usr.bin/gzip/gzip.1
new file mode 100644
index 000000000000..86df53f59669
--- /dev/null
+++ b/usr.bin/gzip/gzip.1
@@ -0,0 +1,246 @@
+.\" $NetBSD: gzip.1,v 1.31 2018/10/26 22:10:15 christos Exp $
+.\"
+.\" Copyright (c) 1997, 2003, 2004, 2008, 2009, 2015, 2017 Matthew R. Green
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.Dd November 2, 2022
+.Dt GZIP 1
+.Os
+.Sh NAME
+.Nm gzip ,
+.Nm gunzip ,
+.Nm zcat
+.Nd compression/decompression tool using Lempel-Ziv coding (LZ77)
+.Sh SYNOPSIS
+.Nm
+.Op Fl cdfhkLlNnqrtVv
+.Op Fl S Ar suffix
+.Ar file
+.Oo
+.Ar file Oo ...
+.Oc
+.Oc
+.Nm gunzip
+.Op Fl cfhkLNqrtVv
+.Op Fl S Ar suffix
+.Ar file
+.Oo
+.Ar file Oo ...
+.Oc
+.Oc
+.Nm zcat
+.Op Fl fhV
+.Ar file
+.Oo
+.Ar file Oo ...
+.Oc
+.Oc
+.Sh DESCRIPTION
+The
+.Nm
+program compresses and decompresses files using Lempel-Ziv coding
+(LZ77).
+If no
+.Ar files
+are specified,
+.Nm
+will compress from standard input, or decompress to standard output.
+When in compression mode, each
+.Ar file
+will be replaced with another file with the suffix, set by the
+.Fl S Ar suffix
+option, added, if possible.
+.Pp
+In decompression mode, each
+.Ar file
+will be checked for existence, as will the file with the suffix
+added.
+Each
+.Ar file
+argument must contain a separate complete archive;
+when multiple
+.Ar files
+are indicated, each is decompressed in turn.
+.Pp
+In the case of
+.Nm gzcat
+the resulting data is then concatenated in the manner of
+.Xr cat 1 .
+.Pp
+If invoked as
+.Nm gunzip
+then the
+.Fl d
+option is enabled.
+If invoked as
+.Nm zcat
+or
+.Nm gzcat
+then both the
+.Fl c
+and
+.Fl d
+options are enabled.
+.Pp
+This version of
+.Nm
+is also capable of decompressing files compressed using
+.Xr compress 1 ,
+.Xr bzip2 1 ,
+.Ar lzip ,
+.Xr zstd 1 ,
+or
+.Xr xz 1 .
+.Sh OPTIONS
+The following options are available:
+.Bl -tag -width XXrXXXrecursiveX
+.It Fl 1 , Fl Fl fast
+.It Fl 2 , 3 , 4 , 5 , 6 , 7 , 8
+.It Fl 9 , Fl Fl best
+These options change the compression level used, with the
+.Fl 1
+option being the fastest, with less compression, and the
+.Fl 9
+option being the slowest, with optimal compression.
+The default compression level is 6.
+.It Fl c , Fl Fl stdout , Fl Fl to-stdout
+This option specifies that output will go to the standard output
+stream, leaving files intact.
+.It Fl d , Fl Fl decompress , Fl Fl uncompress
+This option selects decompression rather than compression.
+.It Fl f , Fl Fl force
+This option turns on force mode.
+This allows files with multiple links, symbolic links to regular files,
+overwriting of pre-existing files, reading from or writing to a terminal,
+and when combined with the
+.Fl c
+option, allowing non-compressed data to pass through unchanged.
+.It Fl h , Fl Fl help
+This option prints a usage summary and exits.
+.It Fl k , Fl Fl keep
+This option prevents
+.Nm
+from deleting input files after (de)compression.
+.It Fl L , -license
+This option prints
+.Nm
+license.
+.It Fl l , Fl Fl list
+This option displays information about the file's compressed and
+uncompressed size, ratio, uncompressed name.
+With the
+.Fl v
+option, it also displays the compression method, CRC, date and time
+embedded in the file.
+.It Fl N , Fl Fl name
+This option causes the stored filename in the input file to be used
+as the output file.
+.It Fl n , Fl Fl no-name
+This option stops the filename and timestamp from being stored in
+the output file.
+.It Fl q , Fl Fl quiet
+With this option, no warnings or errors are printed.
+.It Fl r , Fl Fl recursive
+This option is used to
+.Nm
+the files in a directory tree individually, using the
+.Xr fts 3
+library.
+.It Fl S Ar suffix , Fl Fl suffix Ar suffix
+This option changes the default suffix from .gz to
+.Ar suffix .
+.It Fl t , Fl Fl test
+This option will test compressed files for integrity.
+.It Fl V , Fl Fl version
+This option prints the version of the
+.Nm
+program.
+.It Fl v , Fl Fl verbose
+This option turns on verbose mode, which prints the compression
+ratio for each file compressed.
+.El
+.Sh ENVIRONMENT
+If the environment variable
+.Ev GZIP
+is set, it is parsed as a white-space separated list of options
+handled before any options on the command line.
+Options on the command line will override anything in
+.Ev GZIP .
+.Sh EXIT STATUS
+The
+.Nm
+utility exits 0 on success,
+1 on errors,
+and 2 if a warning occurs.
+.Sh SIGNALS
+.Nm
+responds to the following signals:
+.Bl -tag -width indent
+.It Dv SIGINFO
+Report progress to standard error.
+.El
+.Sh SEE ALSO
+.Xr bzip2 1 ,
+.Xr compress 1 ,
+.Xr xz 1 ,
+.Xr zstd 1 ,
+.Xr fts 3 ,
+.Xr zlib 3
+.Sh HISTORY
+The
+.Nm
+program was originally written by Jean-loup Gailly, licensed under
+the GNU Public Licence.
+Matthew R. Green wrote a simple front end for
+.Nx 1.3
+distribution media, based on the freely re-distributable zlib library.
+It was enhanced to be mostly feature-compatible with the original
+GNU
+.Nm
+program for
+.Nx 2.0 .
+.Pp
+This implementation of
+.Nm
+was ported based on the
+.Nx
+.Nm
+version 20181111,
+and first appeared in
+.Fx 7.0 .
+.Sh AUTHORS
+.An -nosplit
+This implementation of
+.Nm
+was written by
+.An Matthew R. Green Aq Mt mrg@eterna.com.au
+with unpack support written by
+.An Xin LI Aq Mt delphij@FreeBSD.org .
+.Sh BUGS
+According to RFC 1952, the recorded file size is stored in a 32-bit
+integer, therefore, it cannot represent files larger than 4GB.
+This limitation also applies to
+.Fl l
+option of
+.Nm
+utility.
diff --git a/usr.bin/gzip/gzip.c b/usr.bin/gzip/gzip.c
new file mode 100644
index 000000000000..fd8026af6a81
--- /dev/null
+++ b/usr.bin/gzip/gzip.c
@@ -0,0 +1,2261 @@
+/* $NetBSD: gzip.c,v 1.116 2018/10/27 11:39:12 skrll Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 1997, 1998, 2003, 2004, 2006, 2008, 2009, 2010, 2011, 2015, 2017
+ * Matthew R. Green
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * gzip.c -- GPL free gzip using zlib.
+ *
+ * RFC 1950 covers the zlib format
+ * RFC 1951 covers the deflate format
+ * RFC 1952 covers the gzip format
+ *
+ * TODO:
+ * - use mmap where possible
+ * - make bzip2/compress -v/-t/-l support work as well as possible
+ */
+
+#include <sys/endian.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <zlib.h>
+#include <fts.h>
+#include <libgen.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <time.h>
+
+/* what type of file are we dealing with */
+enum filetype {
+ FT_GZIP,
+#ifndef NO_BZIP2_SUPPORT
+ FT_BZIP2,
+#endif
+#ifndef NO_COMPRESS_SUPPORT
+ FT_Z,
+#endif
+#ifndef NO_PACK_SUPPORT
+ FT_PACK,
+#endif
+#ifndef NO_XZ_SUPPORT
+ FT_XZ,
+#endif
+#ifndef NO_LZ_SUPPORT
+ FT_LZ,
+#endif
+#ifndef NO_ZSTD_SUPPORT
+ FT_ZSTD,
+#endif
+ FT_LAST,
+ FT_UNKNOWN
+};
+
+#ifndef NO_BZIP2_SUPPORT
+#include <bzlib.h>
+
+#define BZ2_SUFFIX ".bz2"
+#define BZIP2_MAGIC "BZh"
+#endif
+
+#ifndef NO_COMPRESS_SUPPORT
+#define Z_SUFFIX ".Z"
+#define Z_MAGIC "\037\235"
+#endif
+
+#ifndef NO_PACK_SUPPORT
+#define PACK_MAGIC "\037\036"
+#endif
+
+#ifndef NO_XZ_SUPPORT
+#include <lzma.h>
+#define XZ_SUFFIX ".xz"
+#define XZ_MAGIC "\3757zXZ"
+#endif
+
+#ifndef NO_LZ_SUPPORT
+#define LZ_SUFFIX ".lz"
+#define LZ_MAGIC "LZIP"
+#endif
+
+#ifndef NO_ZSTD_SUPPORT
+#include <zstd.h>
+#define ZSTD_SUFFIX ".zst"
+#define ZSTD_MAGIC "\050\265\057\375"
+#endif
+
+#define GZ_SUFFIX ".gz"
+
+#define BUFLEN (64 * 1024)
+
+#define GZIP_MAGIC0 0x1F
+#define GZIP_MAGIC1 0x8B
+#define GZIP_OMAGIC1 0x9E
+
+#define GZIP_TIMESTAMP (off_t)4
+#define GZIP_ORIGNAME (off_t)10
+
+#define HEAD_CRC 0x02
+#define EXTRA_FIELD 0x04
+#define ORIG_NAME 0x08
+#define COMMENT 0x10
+
+#define OS_CODE 3 /* Unix */
+
+typedef struct {
+ const char *zipped;
+ int ziplen;
+ const char *normal; /* for unzip - must not be longer than zipped */
+} suffixes_t;
+static suffixes_t suffixes[] = {
+#define SUFFIX(Z, N) {Z, sizeof Z - 1, N}
+ SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S .xxx */
+ SUFFIX(GZ_SUFFIX, ""),
+ SUFFIX(".z", ""),
+ SUFFIX("-gz", ""),
+ SUFFIX("-z", ""),
+ SUFFIX("_z", ""),
+ SUFFIX(".taz", ".tar"),
+ SUFFIX(".tgz", ".tar"),
+#ifndef NO_BZIP2_SUPPORT
+ SUFFIX(BZ2_SUFFIX, ""),
+ SUFFIX(".tbz", ".tar"),
+ SUFFIX(".tbz2", ".tar"),
+#endif
+#ifndef NO_COMPRESS_SUPPORT
+ SUFFIX(Z_SUFFIX, ""),
+#endif
+#ifndef NO_XZ_SUPPORT
+ SUFFIX(XZ_SUFFIX, ""),
+#endif
+#ifndef NO_LZ_SUPPORT
+ SUFFIX(LZ_SUFFIX, ""),
+#endif
+#ifndef NO_ZSTD_SUPPORT
+ SUFFIX(ZSTD_SUFFIX, ""),
+#endif
+ SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S "" */
+#undef SUFFIX
+};
+#define NUM_SUFFIXES (nitems(suffixes))
+#define SUFFIX_MAXLEN 30
+
+static const char gzip_version[] = "FreeBSD gzip 20190107";
+
+static const char gzip_copyright[] = \
+" Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green\n"
+" All rights reserved.\n"
+"\n"
+" Redistribution and use in source and binary forms, with or without\n"
+" modification, are permitted provided that the following conditions\n"
+" are met:\n"
+" 1. Redistributions of source code must retain the above copyright\n"
+" notice, this list of conditions and the following disclaimer.\n"
+" 2. Redistributions in binary form must reproduce the above copyright\n"
+" notice, this list of conditions and the following disclaimer in the\n"
+" documentation and/or other materials provided with the distribution.\n"
+"\n"
+" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
+" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
+" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
+" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
+" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n"
+" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n"
+" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n"
+" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n"
+" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n"
+" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n"
+" SUCH DAMAGE.";
+
+static int cflag; /* stdout mode */
+static int dflag; /* decompress mode */
+static int lflag; /* list mode */
+static int numflag = 6; /* gzip -1..-9 value */
+
+static const char *remove_file = NULL; /* file to be removed upon SIGINT */
+
+static int fflag; /* force mode */
+static int kflag; /* don't delete input files */
+static int nflag; /* don't save name/timestamp */
+static int Nflag; /* don't restore name/timestamp */
+static int qflag; /* quiet mode */
+static int rflag; /* recursive mode */
+static int tflag; /* test */
+static int vflag; /* verbose mode */
+static sig_atomic_t print_info = 0;
+
+static int exit_value = 0; /* exit value */
+
+static const char *infile; /* name of file coming in */
+
+static void maybe_err(const char *fmt, ...) __printflike(1, 2) __dead2;
+#if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) || \
+ !defined(NO_XZ_SUPPORT) || !defined(NO_ZSTD_SUPPORT)
+static void maybe_errx(const char *fmt, ...) __printflike(1, 2) __dead2;
+#endif
+static void maybe_warn(const char *fmt, ...) __printflike(1, 2);
+static void maybe_warnx(const char *fmt, ...) __printflike(1, 2);
+static enum filetype file_gettype(u_char *);
+static off_t gz_compress(int, int, off_t *, const char *, uint32_t);
+static off_t gz_uncompress(int, int, char *, size_t, off_t *, const char *);
+static off_t file_compress(char *, char *, size_t);
+static off_t file_uncompress(char *, char *, size_t);
+static void handle_pathname(char *);
+static void handle_file(char *, struct stat *);
+static void handle_stdin(void);
+static void handle_stdout(void);
+static void print_ratio(off_t, off_t, FILE *);
+static void print_list(int fd, off_t, const char *, time_t);
+static void usage(void) __dead2;
+static void display_version(void) __dead2;
+static void display_license(void);
+static const suffixes_t *check_suffix(char *, int);
+static ssize_t read_retry(int, void *, size_t);
+static ssize_t write_retry(int, const void *, size_t);
+static void print_list_out(off_t, off_t, const char*);
+
+static void infile_set(const char *newinfile, off_t total);
+
+static off_t infile_total; /* total expected to read/write */
+static off_t infile_current; /* current read/write */
+
+static void check_siginfo(void);
+static off_t cat_fd(unsigned char *, size_t, off_t *, int fd);
+static void prepend_gzip(char *, int *, char ***);
+static void handle_dir(char *);
+static void print_verbage(const char *, const char *, off_t, off_t);
+static void print_test(const char *, int);
+static void copymodes(int fd, const struct stat *, const char *file);
+static int check_outfile(const char *outfile);
+static void setup_signals(void);
+static void infile_newdata(size_t newdata);
+static void infile_clear(void);
+
+#ifndef NO_BZIP2_SUPPORT
+static off_t unbzip2(int, int, char *, size_t, off_t *);
+#endif
+
+#ifndef NO_COMPRESS_SUPPORT
+static FILE *zdopen(int);
+static off_t zuncompress(FILE *, FILE *, char *, size_t, off_t *);
+#endif
+
+#ifndef NO_PACK_SUPPORT
+static off_t unpack(int, int, char *, size_t, off_t *);
+#endif
+
+#ifndef NO_XZ_SUPPORT
+static off_t unxz(int, int, char *, size_t, off_t *);
+static off_t unxz_len(int);
+#endif
+
+#ifndef NO_LZ_SUPPORT
+static off_t unlz(int, int, char *, size_t, off_t *);
+#endif
+
+#ifndef NO_ZSTD_SUPPORT
+static off_t unzstd(int, int, char *, size_t, off_t *);
+#endif
+
+static const struct option longopts[] = {
+ { "stdout", no_argument, 0, 'c' },
+ { "to-stdout", no_argument, 0, 'c' },
+ { "decompress", no_argument, 0, 'd' },
+ { "uncompress", no_argument, 0, 'd' },
+ { "force", no_argument, 0, 'f' },
+ { "help", no_argument, 0, 'h' },
+ { "keep", no_argument, 0, 'k' },
+ { "list", no_argument, 0, 'l' },
+ { "no-name", no_argument, 0, 'n' },
+ { "name", no_argument, 0, 'N' },
+ { "quiet", no_argument, 0, 'q' },
+ { "recursive", no_argument, 0, 'r' },
+ { "suffix", required_argument, 0, 'S' },
+ { "test", no_argument, 0, 't' },
+ { "verbose", no_argument, 0, 'v' },
+ { "version", no_argument, 0, 'V' },
+ { "fast", no_argument, 0, '1' },
+ { "best", no_argument, 0, '9' },
+ { "ascii", no_argument, 0, 'a' },
+ { "license", no_argument, 0, 'L' },
+ { NULL, no_argument, 0, 0 },
+};
+
+int
+main(int argc, char **argv)
+{
+ const char *progname = getprogname();
+ char *gzip;
+ int len;
+ int ch;
+
+ setup_signals();
+
+ if ((gzip = getenv("GZIP")) != NULL)
+ prepend_gzip(gzip, &argc, &argv);
+
+ /*
+ * XXX
+ * handle being called `gunzip', `zcat' and `gzcat'
+ */
+ if (strcmp(progname, "gunzip") == 0)
+ dflag = 1;
+ else if (strcmp(progname, "zcat") == 0 ||
+ strcmp(progname, "gzcat") == 0)
+ dflag = cflag = 1;
+
+#define OPT_LIST "123456789acdfhklLNnqrS:tVv"
+
+ while ((ch = getopt_long(argc, argv, OPT_LIST, longopts, NULL)) != -1) {
+ switch (ch) {
+ case '1': case '2': case '3':
+ case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ numflag = ch - '0';
+ break;
+ case 'c':
+ cflag = 1;
+ break;
+ case 'd':
+ dflag = 1;
+ break;
+ case 'l':
+ lflag = 1;
+ dflag = 1;
+ break;
+ case 'V':
+ display_version();
+ /* NOTREACHED */
+ case 'a':
+ fprintf(stderr, "%s: option --ascii ignored on this system\n", progname);
+ break;
+ case 'f':
+ fflag = 1;
+ break;
+ case 'k':
+ kflag = 1;
+ break;
+ case 'L':
+ display_license();
+ /* NOT REACHED */
+ case 'N':
+ nflag = 0;
+ Nflag = 1;
+ break;
+ case 'n':
+ nflag = 1;
+ Nflag = 0;
+ break;
+ case 'q':
+ qflag = 1;
+ break;
+ case 'r':
+ rflag = 1;
+ break;
+ case 'S':
+ len = strlen(optarg);
+ if (len != 0) {
+ if (len > SUFFIX_MAXLEN)
+ errx(1, "incorrect suffix: '%s': too long", optarg);
+ suffixes[0].zipped = optarg;
+ suffixes[0].ziplen = len;
+ } else {
+ suffixes[NUM_SUFFIXES - 1].zipped = "";
+ suffixes[NUM_SUFFIXES - 1].ziplen = 0;
+ }
+ break;
+ case 't':
+ cflag = 1;
+ tflag = 1;
+ dflag = 1;
+ break;
+ case 'v':
+ vflag = 1;
+ break;
+ default:
+ usage();
+ /* NOTREACHED */
+ }
+ }
+ argv += optind;
+ argc -= optind;
+
+ if (argc == 0) {
+ if (dflag) /* stdin mode */
+ handle_stdin();
+ else /* stdout mode */
+ handle_stdout();
+ } else {
+ do {
+ handle_pathname(argv[0]);
+ } while (*++argv);
+ }
+ if (qflag == 0 && lflag && argc > 1)
+ print_list(-1, 0, "(totals)", 0);
+ exit(exit_value);
+}
+
+/* maybe print a warning */
+void
+maybe_warn(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (qflag == 0) {
+ va_start(ap, fmt);
+ vwarn(fmt, ap);
+ va_end(ap);
+ }
+ if (exit_value == 0)
+ exit_value = 1;
+}
+
+/* ... without an errno. */
+void
+maybe_warnx(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (qflag == 0) {
+ va_start(ap, fmt);
+ vwarnx(fmt, ap);
+ va_end(ap);
+ }
+ if (exit_value == 0)
+ exit_value = 1;
+}
+
+/* maybe print an error */
+void
+maybe_err(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (qflag == 0) {
+ va_start(ap, fmt);
+ vwarn(fmt, ap);
+ va_end(ap);
+ }
+ exit(2);
+}
+
+#if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) || \
+ !defined(NO_XZ_SUPPORT) || !defined(NO_ZSTD_SUPPORT)
+/* ... without an errno. */
+void
+maybe_errx(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (qflag == 0) {
+ va_start(ap, fmt);
+ vwarnx(fmt, ap);
+ va_end(ap);
+ }
+ exit(2);
+}
+#endif
+
+/* split up $GZIP and prepend it to the argument list */
+static void
+prepend_gzip(char *gzip, int *argc, char ***argv)
+{
+ char *s, **nargv, **ac;
+ int nenvarg = 0, i;
+
+ /* scan how many arguments there are */
+ for (s = gzip;;) {
+ while (*s == ' ' || *s == '\t')
+ s++;
+ if (*s == 0)
+ goto count_done;
+ nenvarg++;
+ while (*s != ' ' && *s != '\t')
+ if (*s++ == 0)
+ goto count_done;
+ }
+count_done:
+ /* punt early */
+ if (nenvarg == 0)
+ return;
+
+ *argc += nenvarg;
+ ac = *argv;
+
+ nargv = (char **)malloc((*argc + 1) * sizeof(char *));
+ if (nargv == NULL)
+ maybe_err("malloc");
+
+ /* stash this away */
+ *argv = nargv;
+
+ /* copy the program name first */
+ i = 0;
+ nargv[i++] = *(ac++);
+
+ /* take a copy of $GZIP and add it to the array */
+ s = strdup(gzip);
+ if (s == NULL)
+ maybe_err("strdup");
+ for (;;) {
+ /* Skip whitespaces. */
+ while (*s == ' ' || *s == '\t')
+ s++;
+ if (*s == 0)
+ goto copy_done;
+ nargv[i++] = s;
+ /* Find the end of this argument. */
+ while (*s != ' ' && *s != '\t')
+ if (*s++ == 0)
+ /* Argument followed by NUL. */
+ goto copy_done;
+ /* Terminate by overwriting ' ' or '\t' with NUL. */
+ *s++ = 0;
+ }
+copy_done:
+
+ /* copy the original arguments and a NULL */
+ while (*ac)
+ nargv[i++] = *(ac++);
+ nargv[i] = NULL;
+}
+
+/* compress input to output. Return bytes read, -1 on error */
+static off_t
+gz_compress(int in, int out, off_t *gsizep, const char *origname, uint32_t mtime)
+{
+ z_stream z;
+ char *outbufp, *inbufp;
+ off_t in_tot = 0, out_tot = 0;
+ ssize_t in_size;
+ int i, error;
+ uLong crc;
+
+ outbufp = malloc(BUFLEN);
+ inbufp = malloc(BUFLEN);
+ if (outbufp == NULL || inbufp == NULL) {
+ maybe_err("malloc failed");
+ goto out;
+ }
+
+ memset(&z, 0, sizeof z);
+ z.zalloc = Z_NULL;
+ z.zfree = Z_NULL;
+ z.opaque = 0;
+
+ if (nflag != 0) {
+ mtime = 0;
+ origname = "";
+ }
+
+ i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c%c%c%s",
+ GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED,
+ *origname ? ORIG_NAME : 0,
+ mtime & 0xff,
+ (mtime >> 8) & 0xff,
+ (mtime >> 16) & 0xff,
+ (mtime >> 24) & 0xff,
+ numflag == 1 ? 4 : numflag == 9 ? 2 : 0,
+ OS_CODE, origname);
+ if (i >= BUFLEN)
+ /* this need PATH_MAX > BUFLEN ... */
+ maybe_err("snprintf");
+ if (*origname)
+ i++;
+
+ z.next_out = (unsigned char *)outbufp + i;
+ z.avail_out = BUFLEN - i;
+
+ error = deflateInit2(&z, numflag, Z_DEFLATED,
+ (-MAX_WBITS), 8, Z_DEFAULT_STRATEGY);
+ if (error != Z_OK) {
+ maybe_warnx("deflateInit2 failed");
+ in_tot = -1;
+ goto out;
+ }
+
+ crc = crc32(0L, Z_NULL, 0);
+ for (;;) {
+ if (z.avail_out == 0) {
+ if (write_retry(out, outbufp, BUFLEN) != BUFLEN) {
+ maybe_warn("write");
+ out_tot = -1;
+ goto out;
+ }
+
+ out_tot += BUFLEN;
+ z.next_out = (unsigned char *)outbufp;
+ z.avail_out = BUFLEN;
+ }
+
+ if (z.avail_in == 0) {
+ in_size = read(in, inbufp, BUFLEN);
+ if (in_size < 0) {
+ maybe_warn("read");
+ in_tot = -1;
+ goto out;
+ }
+ if (in_size == 0)
+ break;
+ infile_newdata(in_size);
+
+ crc = crc32(crc, (const Bytef *)inbufp, (unsigned)in_size);
+ in_tot += in_size;
+ z.next_in = (unsigned char *)inbufp;
+ z.avail_in = in_size;
+ }
+
+ error = deflate(&z, Z_NO_FLUSH);
+ if (error != Z_OK && error != Z_STREAM_END) {
+ maybe_warnx("deflate failed");
+ in_tot = -1;
+ goto out;
+ }
+ }
+
+ /* clean up */
+ for (;;) {
+ size_t len;
+ ssize_t w;
+
+ error = deflate(&z, Z_FINISH);
+ if (error != Z_OK && error != Z_STREAM_END) {
+ maybe_warnx("deflate failed");
+ in_tot = -1;
+ goto out;
+ }
+
+ len = (char *)z.next_out - outbufp;
+
+ w = write_retry(out, outbufp, len);
+ if (w == -1 || (size_t)w != len) {
+ maybe_warn("write");
+ out_tot = -1;
+ goto out;
+ }
+ out_tot += len;
+ z.next_out = (unsigned char *)outbufp;
+ z.avail_out = BUFLEN;
+
+ if (error == Z_STREAM_END)
+ break;
+ }
+
+ if (deflateEnd(&z) != Z_OK) {
+ maybe_warnx("deflateEnd failed");
+ in_tot = -1;
+ goto out;
+ }
+
+ i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c",
+ (int)crc & 0xff,
+ (int)(crc >> 8) & 0xff,
+ (int)(crc >> 16) & 0xff,
+ (int)(crc >> 24) & 0xff,
+ (int)in_tot & 0xff,
+ (int)(in_tot >> 8) & 0xff,
+ (int)(in_tot >> 16) & 0xff,
+ (int)(in_tot >> 24) & 0xff);
+ if (i != 8)
+ maybe_err("snprintf");
+ if (write_retry(out, outbufp, i) != i) {
+ maybe_warn("write");
+ in_tot = -1;
+ } else
+ out_tot += i;
+
+out:
+ if (inbufp != NULL)
+ free(inbufp);
+ if (outbufp != NULL)
+ free(outbufp);
+ if (gsizep)
+ *gsizep = out_tot;
+ return in_tot;
+}
+
+/*
+ * uncompress input to output then close the input. return the
+ * uncompressed size written, and put the compressed sized read
+ * into `*gsizep'.
+ */
+static off_t
+gz_uncompress(int in, int out, char *pre, size_t prelen, off_t *gsizep,
+ const char *filename)
+{
+ z_stream z;
+ char *outbufp, *inbufp;
+ off_t out_tot = -1, in_tot = 0;
+ uint32_t out_sub_tot = 0;
+ enum {
+ GZSTATE_MAGIC0,
+ GZSTATE_MAGIC1,
+ GZSTATE_METHOD,
+ GZSTATE_FLAGS,
+ GZSTATE_SKIPPING,
+ GZSTATE_EXTRA,
+ GZSTATE_EXTRA2,
+ GZSTATE_EXTRA3,
+ GZSTATE_ORIGNAME,
+ GZSTATE_COMMENT,
+ GZSTATE_HEAD_CRC1,
+ GZSTATE_HEAD_CRC2,
+ GZSTATE_INIT,
+ GZSTATE_READ,
+ GZSTATE_CRC,
+ GZSTATE_LEN,
+ } state = GZSTATE_MAGIC0;
+ int flags = 0, skip_count = 0;
+ int error = Z_STREAM_ERROR, done_reading = 0;
+ uLong crc = 0;
+ ssize_t wr;
+ int needmore = 0;
+
+#define ADVANCE() { z.next_in++; z.avail_in--; }
+
+ if ((outbufp = malloc(BUFLEN)) == NULL) {
+ maybe_err("malloc failed");
+ goto out2;
+ }
+ if ((inbufp = malloc(BUFLEN)) == NULL) {
+ maybe_err("malloc failed");
+ goto out1;
+ }
+
+ memset(&z, 0, sizeof z);
+ z.avail_in = prelen;
+ z.next_in = (unsigned char *)pre;
+ z.avail_out = BUFLEN;
+ z.next_out = (unsigned char *)outbufp;
+ z.zalloc = NULL;
+ z.zfree = NULL;
+ z.opaque = 0;
+
+ in_tot = prelen;
+ out_tot = 0;
+
+ for (;;) {
+ check_siginfo();
+ if ((z.avail_in == 0 || needmore) && done_reading == 0) {
+ ssize_t in_size;
+
+ if (z.avail_in > 0) {
+ memmove(inbufp, z.next_in, z.avail_in);
+ }
+ z.next_in = (unsigned char *)inbufp;
+ in_size = read(in, z.next_in + z.avail_in,
+ BUFLEN - z.avail_in);
+
+ if (in_size == -1) {
+ maybe_warn("failed to read stdin");
+ goto stop_and_fail;
+ } else if (in_size == 0) {
+ done_reading = 1;
+ }
+ infile_newdata(in_size);
+
+ z.avail_in += in_size;
+ needmore = 0;
+
+ in_tot += in_size;
+ }
+ if (z.avail_in == 0) {
+ if (done_reading && state != GZSTATE_MAGIC0) {
+ maybe_warnx("%s: unexpected end of file",
+ filename);
+ goto stop_and_fail;
+ }
+ goto stop;
+ }
+ switch (state) {
+ case GZSTATE_MAGIC0:
+ if (*z.next_in != GZIP_MAGIC0) {
+ if (in_tot > 0) {
+ maybe_warnx("%s: trailing garbage "
+ "ignored", filename);
+ exit_value = 2;
+ goto stop;
+ }
+ maybe_warnx("input not gziped (MAGIC0)");
+ goto stop_and_fail;
+ }
+ ADVANCE();
+ state++;
+ out_sub_tot = 0;
+ crc = crc32(0L, Z_NULL, 0);
+ break;
+
+ case GZSTATE_MAGIC1:
+ if (*z.next_in != GZIP_MAGIC1 &&
+ *z.next_in != GZIP_OMAGIC1) {
+ maybe_warnx("input not gziped (MAGIC1)");
+ goto stop_and_fail;
+ }
+ ADVANCE();
+ state++;
+ break;
+
+ case GZSTATE_METHOD:
+ if (*z.next_in != Z_DEFLATED) {
+ maybe_warnx("unknown compression method");
+ goto stop_and_fail;
+ }
+ ADVANCE();
+ state++;
+ break;
+
+ case GZSTATE_FLAGS:
+ flags = *z.next_in;
+ ADVANCE();
+ skip_count = 6;
+ state++;
+ break;
+
+ case GZSTATE_SKIPPING:
+ if (skip_count > 0) {
+ skip_count--;
+ ADVANCE();
+ } else
+ state++;
+ break;
+
+ case GZSTATE_EXTRA:
+ if ((flags & EXTRA_FIELD) == 0) {
+ state = GZSTATE_ORIGNAME;
+ break;
+ }
+ skip_count = *z.next_in;
+ ADVANCE();
+ state++;
+ break;
+
+ case GZSTATE_EXTRA2:
+ skip_count |= ((*z.next_in) << 8);
+ ADVANCE();
+ state++;
+ break;
+
+ case GZSTATE_EXTRA3:
+ if (skip_count > 0) {
+ skip_count--;
+ ADVANCE();
+ } else
+ state++;
+ break;
+
+ case GZSTATE_ORIGNAME:
+ if ((flags & ORIG_NAME) == 0) {
+ state++;
+ break;
+ }
+ if (*z.next_in == 0)
+ state++;
+ ADVANCE();
+ break;
+
+ case GZSTATE_COMMENT:
+ if ((flags & COMMENT) == 0) {
+ state++;
+ break;
+ }
+ if (*z.next_in == 0)
+ state++;
+ ADVANCE();
+ break;
+
+ case GZSTATE_HEAD_CRC1:
+ if (flags & HEAD_CRC)
+ skip_count = 2;
+ else
+ skip_count = 0;
+ state++;
+ break;
+
+ case GZSTATE_HEAD_CRC2:
+ if (skip_count > 0) {
+ skip_count--;
+ ADVANCE();
+ } else
+ state++;
+ break;
+
+ case GZSTATE_INIT:
+ if (inflateInit2(&z, -MAX_WBITS) != Z_OK) {
+ maybe_warnx("failed to inflateInit");
+ goto stop_and_fail;
+ }
+ state++;
+ break;
+
+ case GZSTATE_READ:
+ error = inflate(&z, Z_FINISH);
+ switch (error) {
+ /* Z_BUF_ERROR goes with Z_FINISH... */
+ case Z_BUF_ERROR:
+ if (z.avail_out > 0 && !done_reading)
+ continue;
+
+ case Z_STREAM_END:
+ case Z_OK:
+ break;
+
+ case Z_NEED_DICT:
+ maybe_warnx("Z_NEED_DICT error");
+ goto stop_and_fail;
+ case Z_DATA_ERROR:
+ maybe_warnx("data stream error");
+ goto stop_and_fail;
+ case Z_STREAM_ERROR:
+ maybe_warnx("internal stream error");
+ goto stop_and_fail;
+ case Z_MEM_ERROR:
+ maybe_warnx("memory allocation error");
+ goto stop_and_fail;
+
+ default:
+ maybe_warn("unknown error from inflate(): %d",
+ error);
+ }
+ wr = BUFLEN - z.avail_out;
+
+ if (wr != 0) {
+ crc = crc32(crc, (const Bytef *)outbufp, (unsigned)wr);
+ if (
+ /* don't write anything with -t */
+ tflag == 0 &&
+ write_retry(out, outbufp, wr) != wr) {
+ maybe_warn("error writing to output");
+ goto stop_and_fail;
+ }
+
+ out_tot += wr;
+ out_sub_tot += wr;
+ }
+
+ if (error == Z_STREAM_END) {
+ inflateEnd(&z);
+ state++;
+ }
+
+ z.next_out = (unsigned char *)outbufp;
+ z.avail_out = BUFLEN;
+
+ break;
+ case GZSTATE_CRC:
+ {
+ uLong origcrc;
+
+ if (z.avail_in < 4) {
+ if (!done_reading) {
+ needmore = 1;
+ continue;
+ }
+ maybe_warnx("truncated input");
+ goto stop_and_fail;
+ }
+ origcrc = le32dec(&z.next_in[0]);
+ if (origcrc != crc) {
+ maybe_warnx("invalid compressed"
+ " data--crc error");
+ goto stop_and_fail;
+ }
+ }
+
+ z.avail_in -= 4;
+ z.next_in += 4;
+
+ if (!z.avail_in && done_reading) {
+ goto stop;
+ }
+ state++;
+ break;
+ case GZSTATE_LEN:
+ {
+ uLong origlen;
+
+ if (z.avail_in < 4) {
+ if (!done_reading) {
+ needmore = 1;
+ continue;
+ }
+ maybe_warnx("truncated input");
+ goto stop_and_fail;
+ }
+ origlen = le32dec(&z.next_in[0]);
+
+ if (origlen != out_sub_tot) {
+ maybe_warnx("invalid compressed"
+ " data--length error");
+ goto stop_and_fail;
+ }
+ }
+
+ z.avail_in -= 4;
+ z.next_in += 4;
+
+ if (error < 0) {
+ maybe_warnx("decompression error");
+ goto stop_and_fail;
+ }
+ state = GZSTATE_MAGIC0;
+ break;
+ }
+ continue;
+stop_and_fail:
+ out_tot = -1;
+stop:
+ break;
+ }
+ if (state > GZSTATE_INIT)
+ inflateEnd(&z);
+
+ free(inbufp);
+out1:
+ free(outbufp);
+out2:
+ if (gsizep)
+ *gsizep = in_tot;
+ return (out_tot);
+}
+
+/*
+ * set the owner, mode, flags & utimes using the given file descriptor.
+ * file is only used in possible warning messages.
+ */
+static void
+copymodes(int fd, const struct stat *sbp, const char *file)
+{
+ struct timespec times[2];
+ struct stat sb;
+
+ /*
+ * If we have no info on the input, give this file some
+ * default values and return..
+ */
+ if (sbp == NULL) {
+ mode_t mask = umask(022);
+
+ (void)fchmod(fd, DEFFILEMODE & ~mask);
+ (void)umask(mask);
+ return;
+ }
+ sb = *sbp;
+
+ /* if the chown fails, remove set-id bits as-per compress(1) */
+ if (fchown(fd, sb.st_uid, sb.st_gid) < 0) {
+ if (errno != EPERM)
+ maybe_warn("couldn't fchown: %s", file);
+ sb.st_mode &= ~(S_ISUID|S_ISGID);
+ }
+
+ /* we only allow set-id and the 9 normal permission bits */
+ sb.st_mode &= S_ISUID | S_ISGID | S_IRWXU | S_IRWXG | S_IRWXO;
+ if (fchmod(fd, sb.st_mode) < 0)
+ maybe_warn("couldn't fchmod: %s", file);
+
+ times[0] = sb.st_atim;
+ times[1] = sb.st_mtim;
+ if (futimens(fd, times) < 0)
+ maybe_warn("couldn't futimens: %s", file);
+
+ /* only try flags if they exist already */
+ if (sb.st_flags != 0 && fchflags(fd, sb.st_flags) < 0)
+ maybe_warn("couldn't fchflags: %s", file);
+}
+
+/* what sort of file is this? */
+static enum filetype
+file_gettype(u_char *buf)
+{
+
+ if (buf[0] == GZIP_MAGIC0 &&
+ (buf[1] == GZIP_MAGIC1 || buf[1] == GZIP_OMAGIC1))
+ return FT_GZIP;
+#ifndef NO_BZIP2_SUPPORT
+ else if (memcmp(buf, BZIP2_MAGIC, 3) == 0 &&
+ buf[3] >= '0' && buf[3] <= '9')
+ return FT_BZIP2;
+#endif
+#ifndef NO_COMPRESS_SUPPORT
+ else if (memcmp(buf, Z_MAGIC, 2) == 0)
+ return FT_Z;
+#endif
+#ifndef NO_PACK_SUPPORT
+ else if (memcmp(buf, PACK_MAGIC, 2) == 0)
+ return FT_PACK;
+#endif
+#ifndef NO_XZ_SUPPORT
+ else if (memcmp(buf, XZ_MAGIC, 4) == 0) /* XXX: We only have 4 bytes */
+ return FT_XZ;
+#endif
+#ifndef NO_LZ_SUPPORT
+ else if (memcmp(buf, LZ_MAGIC, 4) == 0)
+ return FT_LZ;
+#endif
+#ifndef NO_ZSTD_SUPPORT
+ else if (memcmp(buf, ZSTD_MAGIC, 4) == 0)
+ return FT_ZSTD;
+#endif
+ else
+ return FT_UNKNOWN;
+}
+
+/* check the outfile is OK. */
+static int
+check_outfile(const char *outfile)
+{
+ struct stat sb;
+ int ok = 1;
+
+ if (lflag == 0 && stat(outfile, &sb) == 0) {
+ if (fflag)
+ unlink(outfile);
+ else if (isatty(STDIN_FILENO)) {
+ char ans[10] = { 'n', '\0' }; /* default */
+
+ fprintf(stderr, "%s already exists -- do you wish to "
+ "overwrite (y or n)? " , outfile);
+ (void)fgets(ans, sizeof(ans) - 1, stdin);
+ if (ans[0] != 'y' && ans[0] != 'Y') {
+ fprintf(stderr, "\tnot overwriting\n");
+ ok = 0;
+ } else
+ unlink(outfile);
+ } else {
+ maybe_warnx("%s already exists -- skipping", outfile);
+ ok = 0;
+ }
+ }
+ return ok;
+}
+
+static void
+unlink_input(const char *file, const struct stat *sb)
+{
+ struct stat nsb;
+
+ if (kflag)
+ return;
+ if (stat(file, &nsb) != 0)
+ /* Must be gone already */
+ return;
+ if (nsb.st_dev != sb->st_dev || nsb.st_ino != sb->st_ino)
+ /* Definitely a different file */
+ return;
+ unlink(file);
+}
+
+static void
+got_sigint(int signo __unused)
+{
+
+ if (remove_file != NULL)
+ unlink(remove_file);
+ _exit(2);
+}
+
+static void
+got_siginfo(int signo __unused)
+{
+
+ print_info = 1;
+}
+
+static void
+setup_signals(void)
+{
+
+ signal(SIGINFO, got_siginfo);
+ signal(SIGINT, got_sigint);
+}
+
+static void
+infile_newdata(size_t newdata)
+{
+
+ infile_current += newdata;
+}
+
+static void
+infile_set(const char *newinfile, off_t total)
+{
+
+ if (newinfile)
+ infile = newinfile;
+ infile_total = total;
+}
+
+static void
+infile_clear(void)
+{
+
+ infile = NULL;
+ infile_total = infile_current = 0;
+}
+
+static const suffixes_t *
+check_suffix(char *file, int xlate)
+{
+ const suffixes_t *s;
+ int len = strlen(file);
+ char *sp;
+
+ for (s = suffixes; s != suffixes + NUM_SUFFIXES; s++) {
+ /* if it doesn't fit in "a.suf", don't bother */
+ if (s->ziplen >= len)
+ continue;
+ sp = file + len - s->ziplen;
+ if (strcmp(s->zipped, sp) != 0)
+ continue;
+ if (xlate)
+ strcpy(sp, s->normal);
+ return s;
+ }
+ return NULL;
+}
+
+/*
+ * compress the given file: create a corresponding .gz file and remove the
+ * original.
+ */
+static off_t
+file_compress(char *file, char *outfile, size_t outsize)
+{
+ int in;
+ int out;
+ off_t size, in_size;
+ struct stat isb, osb;
+ const suffixes_t *suff;
+
+ in = open(file, O_RDONLY);
+ if (in == -1) {
+ maybe_warn("can't open %s", file);
+ return (-1);
+ }
+
+ if (fstat(in, &isb) != 0) {
+ maybe_warn("couldn't stat: %s", file);
+ close(in);
+ return (-1);
+ }
+
+ if (fstat(in, &isb) != 0) {
+ close(in);
+ maybe_warn("can't stat %s", file);
+ return -1;
+ }
+ infile_set(file, isb.st_size);
+
+ if (cflag == 0) {
+ if (isb.st_nlink > 1 && fflag == 0) {
+ maybe_warnx("%s has %ju other link%s -- "
+ "skipping", file,
+ (uintmax_t)isb.st_nlink - 1,
+ isb.st_nlink == 1 ? "" : "s");
+ close(in);
+ return -1;
+ }
+
+ if (fflag == 0 && (suff = check_suffix(file, 0)) &&
+ suff->zipped[0] != 0) {
+ maybe_warnx("%s already has %s suffix -- unchanged",
+ file, suff->zipped);
+ close(in);
+ return (-1);
+ }
+
+ /* Add (usually) .gz to filename */
+ if ((size_t)snprintf(outfile, outsize, "%s%s",
+ file, suffixes[0].zipped) >= outsize)
+ memcpy(outfile + outsize - suffixes[0].ziplen - 1,
+ suffixes[0].zipped, suffixes[0].ziplen + 1);
+
+ if (check_outfile(outfile) == 0) {
+ close(in);
+ return (-1);
+ }
+ }
+
+ if (cflag == 0) {
+ out = open(outfile, O_WRONLY | O_CREAT | O_EXCL, 0600);
+ if (out == -1) {
+ maybe_warn("could not create output: %s", outfile);
+ fclose(stdin);
+ return (-1);
+ }
+ remove_file = outfile;
+ } else
+ out = STDOUT_FILENO;
+
+ in_size = gz_compress(in, out, &size, basename(file), (uint32_t)isb.st_mtime);
+
+ (void)close(in);
+
+ /*
+ * If there was an error, in_size will be -1.
+ * If we compressed to stdout, just return the size.
+ * Otherwise stat the file and check it is the correct size.
+ * We only blow away the file if we can stat the output and it
+ * has the expected size.
+ */
+ if (cflag != 0)
+ return in_size == -1 ? -1 : size;
+
+ if (fstat(out, &osb) != 0) {
+ maybe_warn("couldn't stat: %s", outfile);
+ goto bad_outfile;
+ }
+
+ if (osb.st_size != size) {
+ maybe_warnx("output file: %s wrong size (%ju != %ju), deleting",
+ outfile, (uintmax_t)osb.st_size, (uintmax_t)size);
+ goto bad_outfile;
+ }
+
+ copymodes(out, &isb, outfile);
+ remove_file = NULL;
+ if (close(out) == -1)
+ maybe_warn("couldn't close output");
+
+ /* output is good, ok to delete input */
+ unlink_input(file, &isb);
+ return (size);
+
+ bad_outfile:
+ if (close(out) == -1)
+ maybe_warn("couldn't close output");
+
+ maybe_warnx("leaving original %s", file);
+ unlink(outfile);
+ return (size);
+}
+
+/* uncompress the given file and remove the original */
+static off_t
+file_uncompress(char *file, char *outfile, size_t outsize)
+{
+ struct stat isb, osb;
+ off_t size;
+ ssize_t rbytes;
+ unsigned char fourbytes[4];
+ enum filetype method;
+ int fd, ofd, zfd = -1;
+ int error;
+ size_t in_size;
+ ssize_t rv;
+ time_t timestamp = 0;
+ char name[PATH_MAX + 1];
+
+ /* gather the old name info */
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ maybe_warn("can't open %s", file);
+ goto lose;
+ }
+ if (fstat(fd, &isb) != 0) {
+ maybe_warn("can't stat %s", file);
+ goto lose;
+ }
+ if (S_ISREG(isb.st_mode))
+ in_size = isb.st_size;
+ else
+ in_size = 0;
+ infile_set(file, in_size);
+
+ strlcpy(outfile, file, outsize);
+ if (check_suffix(outfile, 1) == NULL && !(cflag || lflag)) {
+ maybe_warnx("%s: unknown suffix -- ignored", file);
+ goto lose;
+ }
+
+ rbytes = read(fd, fourbytes, sizeof fourbytes);
+ if (rbytes != sizeof fourbytes) {
+ /* we don't want to fail here. */
+ if (fflag)
+ goto lose;
+ if (rbytes == -1)
+ maybe_warn("can't read %s", file);
+ else
+ goto unexpected_EOF;
+ goto lose;
+ }
+ infile_newdata(rbytes);
+
+ method = file_gettype(fourbytes);
+ if (fflag == 0 && method == FT_UNKNOWN) {
+ maybe_warnx("%s: not in gzip format", file);
+ goto lose;
+ }
+
+
+ if (method == FT_GZIP && Nflag) {
+ unsigned char ts[4]; /* timestamp */
+
+ rv = pread(fd, ts, sizeof ts, GZIP_TIMESTAMP);
+ if (rv >= 0 && rv < (ssize_t)(sizeof ts))
+ goto unexpected_EOF;
+ if (rv == -1) {
+ if (!fflag)
+ maybe_warn("can't read %s", file);
+ goto lose;
+ }
+ infile_newdata(rv);
+ timestamp = le32dec(&ts[0]);
+
+ if (fourbytes[3] & ORIG_NAME) {
+ rbytes = pread(fd, name, sizeof(name) - 1, GZIP_ORIGNAME);
+ if (rbytes < 0) {
+ maybe_warn("can't read %s", file);
+ goto lose;
+ }
+ if (name[0] != '\0') {
+ char *dp, *nf;
+
+ /* Make sure that name is NUL-terminated */
+ name[rbytes] = '\0';
+
+ /* strip saved directory name */
+ nf = strrchr(name, '/');
+ if (nf == NULL)
+ nf = name;
+ else
+ nf++;
+
+ /* preserve original directory name */
+ dp = strrchr(file, '/');
+ if (dp == NULL)
+ dp = file;
+ else
+ dp++;
+ snprintf(outfile, outsize, "%.*s%.*s",
+ (int) (dp - file),
+ file, (int) rbytes, nf);
+ }
+ }
+ }
+ lseek(fd, 0, SEEK_SET);
+
+ if (cflag == 0 || lflag) {
+ if (isb.st_nlink > 1 && lflag == 0 && fflag == 0) {
+ maybe_warnx("%s has %ju other links -- skipping",
+ file, (uintmax_t)isb.st_nlink - 1);
+ goto lose;
+ }
+ if (nflag == 0 && timestamp)
+ isb.st_mtime = timestamp;
+ if (check_outfile(outfile) == 0)
+ goto lose;
+ }
+
+ if (cflag)
+ zfd = STDOUT_FILENO;
+ else if (lflag)
+ zfd = -1;
+ else {
+ zfd = open(outfile, O_WRONLY|O_CREAT|O_EXCL, 0600);
+ if (zfd == STDOUT_FILENO) {
+ /* We won't close STDOUT_FILENO later... */
+ zfd = dup(zfd);
+ close(STDOUT_FILENO);
+ }
+ if (zfd == -1) {
+ maybe_warn("can't open %s", outfile);
+ goto lose;
+ }
+ remove_file = outfile;
+ }
+
+ switch (method) {
+#ifndef NO_BZIP2_SUPPORT
+ case FT_BZIP2:
+ /* XXX */
+ if (lflag) {
+ maybe_warnx("no -l with bzip2 files");
+ goto lose;
+ }
+
+ size = unbzip2(fd, zfd, NULL, 0, NULL);
+ break;
+#endif
+
+#ifndef NO_COMPRESS_SUPPORT
+ case FT_Z: {
+ FILE *in, *out;
+
+ /* XXX */
+ if (lflag) {
+ maybe_warnx("no -l with Lempel-Ziv files");
+ goto lose;
+ }
+
+ if ((in = zdopen(fd)) == NULL) {
+ maybe_warn("zdopen for read: %s", file);
+ goto lose;
+ }
+
+ out = fdopen(dup(zfd), "w");
+ if (out == NULL) {
+ maybe_warn("fdopen for write: %s", outfile);
+ fclose(in);
+ goto lose;
+ }
+
+ size = zuncompress(in, out, NULL, 0, NULL);
+ /* need to fclose() if ferror() is true... */
+ error = ferror(in);
+ if (error | fclose(in)) {
+ if (error)
+ maybe_warn("failed infile");
+ else
+ maybe_warn("failed infile fclose");
+ if (cflag == 0)
+ unlink(outfile);
+ (void)fclose(out);
+ goto lose;
+ }
+ if (fclose(out) != 0) {
+ maybe_warn("failed outfile fclose");
+ if (cflag == 0)
+ unlink(outfile);
+ goto lose;
+ }
+ break;
+ }
+#endif
+
+#ifndef NO_PACK_SUPPORT
+ case FT_PACK:
+ if (lflag) {
+ maybe_warnx("no -l with packed files");
+ goto lose;
+ }
+
+ size = unpack(fd, zfd, NULL, 0, NULL);
+ break;
+#endif
+
+#ifndef NO_XZ_SUPPORT
+ case FT_XZ:
+ if (lflag) {
+ size = unxz_len(fd);
+ if (!tflag) {
+ print_list_out(in_size, size, file);
+ close(fd);
+ return -1;
+ }
+ } else
+ size = unxz(fd, zfd, NULL, 0, NULL);
+ break;
+#endif
+
+#ifndef NO_LZ_SUPPORT
+ case FT_LZ:
+ if (lflag) {
+ maybe_warnx("no -l with lzip files");
+ goto lose;
+ }
+ size = unlz(fd, zfd, NULL, 0, NULL);
+ break;
+#endif
+
+#ifndef NO_ZSTD_SUPPORT
+ case FT_ZSTD:
+ if (lflag) {
+ maybe_warnx("no -l with zstd files");
+ goto lose;
+ }
+ size = unzstd(fd, zfd, NULL, 0, NULL);
+ break;
+#endif
+ case FT_UNKNOWN:
+ if (lflag) {
+ maybe_warnx("no -l for unknown filetypes");
+ goto lose;
+ }
+ size = cat_fd(NULL, 0, NULL, fd);
+ break;
+ default:
+ if (lflag) {
+ print_list(fd, in_size, outfile, isb.st_mtime);
+ if (!tflag) {
+ close(fd);
+ return -1; /* XXX */
+ }
+ }
+
+ size = gz_uncompress(fd, zfd, NULL, 0, NULL, file);
+ break;
+ }
+
+ if (close(fd) != 0)
+ maybe_warn("couldn't close input");
+ if (zfd != STDOUT_FILENO && close(zfd) != 0)
+ maybe_warn("couldn't close output");
+
+ if (size == -1) {
+ if (cflag == 0)
+ unlink(outfile);
+ maybe_warnx("%s: uncompress failed", file);
+ return -1;
+ }
+
+ /* if testing, or we uncompressed to stdout, this is all we need */
+ if (tflag)
+ return size;
+ /* if we are uncompressing to stdin, don't remove the file. */
+ if (cflag)
+ return size;
+
+ /*
+ * if we create a file...
+ */
+ /*
+ * if we can't stat the file don't remove the file.
+ */
+
+ ofd = open(outfile, O_RDWR, 0);
+ if (ofd == -1) {
+ maybe_warn("couldn't open (leaving original): %s",
+ outfile);
+ return -1;
+ }
+ if (fstat(ofd, &osb) != 0) {
+ maybe_warn("couldn't stat (leaving original): %s",
+ outfile);
+ close(ofd);
+ return -1;
+ }
+ if (osb.st_size != size) {
+ maybe_warnx("stat gave different size: %ju != %ju (leaving original)",
+ (uintmax_t)size, (uintmax_t)osb.st_size);
+ close(ofd);
+ unlink(outfile);
+ return -1;
+ }
+ copymodes(ofd, &isb, outfile);
+ remove_file = NULL;
+ close(ofd);
+ unlink_input(file, &isb);
+ return size;
+
+ unexpected_EOF:
+ maybe_warnx("%s: unexpected end of file", file);
+ lose:
+ if (fd != -1)
+ close(fd);
+ if (zfd != -1 && zfd != STDOUT_FILENO)
+ close(zfd);
+ return -1;
+}
+
+static void
+check_siginfo(void)
+{
+ if (print_info == 0)
+ return;
+ if (infile) {
+ if (infile_total) {
+ int pcent = (int)((100.0 * infile_current) / infile_total);
+
+ fprintf(stderr, "%s: done %llu/%llu bytes %d%%\n",
+ infile, (unsigned long long)infile_current,
+ (unsigned long long)infile_total, pcent);
+ } else
+ fprintf(stderr, "%s: done %llu bytes\n",
+ infile, (unsigned long long)infile_current);
+ }
+ print_info = 0;
+}
+
+static off_t
+cat_fd(unsigned char * prepend, size_t count, off_t *gsizep, int fd)
+{
+ char buf[BUFLEN];
+ off_t in_tot;
+ ssize_t w;
+
+ in_tot = count;
+ w = write_retry(STDOUT_FILENO, prepend, count);
+ if (w == -1 || (size_t)w != count) {
+ maybe_warn("write to stdout");
+ return -1;
+ }
+ for (;;) {
+ ssize_t rv;
+
+ rv = read(fd, buf, sizeof buf);
+ if (rv == 0)
+ break;
+ if (rv < 0) {
+ maybe_warn("read from fd %d", fd);
+ break;
+ }
+ infile_newdata(rv);
+
+ if (write_retry(STDOUT_FILENO, buf, rv) != rv) {
+ maybe_warn("write to stdout");
+ break;
+ }
+ in_tot += rv;
+ }
+
+ if (gsizep)
+ *gsizep = in_tot;
+ return (in_tot);
+}
+
+static void
+handle_stdin(void)
+{
+ struct stat isb;
+ unsigned char fourbytes[4];
+ size_t in_size;
+ off_t usize, gsize;
+ enum filetype method;
+ ssize_t bytes_read;
+#ifndef NO_COMPRESS_SUPPORT
+ FILE *in;
+#endif
+
+ if (fflag == 0 && lflag == 0 && isatty(STDIN_FILENO)) {
+ maybe_warnx("standard input is a terminal -- ignoring");
+ goto out;
+ }
+
+ if (fstat(STDIN_FILENO, &isb) < 0) {
+ maybe_warn("fstat");
+ goto out;
+ }
+ if (S_ISREG(isb.st_mode))
+ in_size = isb.st_size;
+ else
+ in_size = 0;
+ infile_set("(stdin)", in_size);
+
+ if (lflag) {
+ print_list(STDIN_FILENO, in_size, infile, isb.st_mtime);
+ goto out;
+ }
+
+ bytes_read = read_retry(STDIN_FILENO, fourbytes, sizeof fourbytes);
+ if (bytes_read == -1) {
+ maybe_warn("can't read stdin");
+ goto out;
+ } else if (bytes_read != sizeof(fourbytes)) {
+ maybe_warnx("(stdin): unexpected end of file");
+ goto out;
+ }
+
+ method = file_gettype(fourbytes);
+ switch (method) {
+ default:
+ if (fflag == 0) {
+ maybe_warnx("unknown compression format");
+ goto out;
+ }
+ usize = cat_fd(fourbytes, sizeof fourbytes, &gsize, STDIN_FILENO);
+ break;
+ case FT_GZIP:
+ usize = gz_uncompress(STDIN_FILENO, STDOUT_FILENO,
+ (char *)fourbytes, sizeof fourbytes, &gsize, "(stdin)");
+ break;
+#ifndef NO_BZIP2_SUPPORT
+ case FT_BZIP2:
+ usize = unbzip2(STDIN_FILENO, STDOUT_FILENO,
+ (char *)fourbytes, sizeof fourbytes, &gsize);
+ break;
+#endif
+#ifndef NO_COMPRESS_SUPPORT
+ case FT_Z:
+ if ((in = zdopen(STDIN_FILENO)) == NULL) {
+ maybe_warnx("zopen of stdin");
+ goto out;
+ }
+
+ usize = zuncompress(in, stdout, (char *)fourbytes,
+ sizeof fourbytes, &gsize);
+ fclose(in);
+ break;
+#endif
+#ifndef NO_PACK_SUPPORT
+ case FT_PACK:
+ usize = unpack(STDIN_FILENO, STDOUT_FILENO,
+ (char *)fourbytes, sizeof fourbytes, &gsize);
+ break;
+#endif
+#ifndef NO_XZ_SUPPORT
+ case FT_XZ:
+ usize = unxz(STDIN_FILENO, STDOUT_FILENO,
+ (char *)fourbytes, sizeof fourbytes, &gsize);
+ break;
+#endif
+#ifndef NO_LZ_SUPPORT
+ case FT_LZ:
+ usize = unlz(STDIN_FILENO, STDOUT_FILENO,
+ (char *)fourbytes, sizeof fourbytes, &gsize);
+ break;
+#endif
+#ifndef NO_ZSTD_SUPPORT
+ case FT_ZSTD:
+ usize = unzstd(STDIN_FILENO, STDOUT_FILENO,
+ (char *)fourbytes, sizeof fourbytes, &gsize);
+ break;
+#endif
+ }
+
+ if (vflag && !tflag && usize != -1 && gsize != -1)
+ print_verbage(NULL, NULL, usize, gsize);
+ if (vflag && tflag)
+ print_test("(stdin)", usize != -1);
+
+out:
+ infile_clear();
+}
+
+static void
+handle_stdout(void)
+{
+ off_t gsize;
+ off_t usize;
+ struct stat sb;
+ time_t systime;
+ uint32_t mtime;
+ int ret;
+
+ infile_set("(stdout)", 0);
+
+ if (fflag == 0 && isatty(STDOUT_FILENO)) {
+ maybe_warnx("standard output is a terminal -- ignoring");
+ return;
+ }
+
+ /* If stdin is a file use its mtime, otherwise use current time */
+ ret = fstat(STDIN_FILENO, &sb);
+ if (ret < 0) {
+ maybe_warn("Can't stat stdin");
+ return;
+ }
+
+ if (S_ISREG(sb.st_mode)) {
+ infile_set("(stdout)", sb.st_size);
+ mtime = (uint32_t)sb.st_mtime;
+ } else {
+ systime = time(NULL);
+ if (systime == -1) {
+ maybe_warn("time");
+ return;
+ }
+ mtime = (uint32_t)systime;
+ }
+
+ usize =
+ gz_compress(STDIN_FILENO, STDOUT_FILENO, &gsize, "", mtime);
+ if (vflag && !tflag && usize != -1 && gsize != -1)
+ print_verbage(NULL, NULL, usize, gsize);
+}
+
+/* do what is asked for, for the path name */
+static void
+handle_pathname(char *path)
+{
+ char *opath = path, *s = NULL;
+ ssize_t len;
+ int slen;
+ struct stat sb;
+
+ /* check for stdout/stdin */
+ if (path[0] == '-' && path[1] == '\0') {
+ if (dflag)
+ handle_stdin();
+ else
+ handle_stdout();
+ return;
+ }
+
+retry:
+ if (stat(path, &sb) != 0 || (fflag == 0 && cflag == 0 &&
+ lstat(path, &sb) != 0)) {
+ /* lets try <path>.gz if we're decompressing */
+ if (dflag && s == NULL && errno == ENOENT) {
+ len = strlen(path);
+ slen = suffixes[0].ziplen;
+ s = malloc(len + slen + 1);
+ if (s == NULL)
+ maybe_err("malloc");
+ memcpy(s, path, len);
+ memcpy(s + len, suffixes[0].zipped, slen + 1);
+ path = s;
+ goto retry;
+ }
+ maybe_warn("can't stat: %s", opath);
+ goto out;
+ }
+
+ if (S_ISDIR(sb.st_mode)) {
+ if (rflag)
+ handle_dir(path);
+ else
+ maybe_warnx("%s is a directory", path);
+ goto out;
+ }
+
+ if (S_ISREG(sb.st_mode))
+ handle_file(path, &sb);
+ else
+ maybe_warnx("%s is not a regular file", path);
+
+out:
+ if (s)
+ free(s);
+}
+
+/* compress/decompress a file */
+static void
+handle_file(char *file, struct stat *sbp)
+{
+ off_t usize, gsize;
+ char outfile[PATH_MAX];
+
+ infile_set(file, sbp->st_size);
+ if (dflag) {
+ usize = file_uncompress(file, outfile, sizeof(outfile));
+ if (vflag && tflag)
+ print_test(file, usize != -1);
+ if (usize == -1)
+ return;
+ gsize = sbp->st_size;
+ } else {
+ gsize = file_compress(file, outfile, sizeof(outfile));
+ if (gsize == -1)
+ return;
+ usize = sbp->st_size;
+ }
+ infile_clear();
+
+ if (vflag && !tflag)
+ print_verbage(file, (cflag) ? NULL : outfile, usize, gsize);
+}
+
+/* this is used with -r to recursively descend directories */
+static void
+handle_dir(char *dir)
+{
+ char *path_argv[2];
+ FTS *fts;
+ FTSENT *entry;
+
+ path_argv[0] = dir;
+ path_argv[1] = 0;
+ fts = fts_open(path_argv, FTS_PHYSICAL | FTS_NOCHDIR, NULL);
+ if (fts == NULL) {
+ warn("couldn't fts_open %s", dir);
+ return;
+ }
+
+ while (errno = 0, (entry = fts_read(fts))) {
+ switch(entry->fts_info) {
+ case FTS_D:
+ case FTS_DP:
+ continue;
+
+ case FTS_DNR:
+ case FTS_ERR:
+ case FTS_NS:
+ maybe_warn("%s", entry->fts_path);
+ continue;
+ case FTS_F:
+ handle_file(entry->fts_path, entry->fts_statp);
+ }
+ }
+ if (errno != 0)
+ warn("error with fts_read %s", dir);
+ (void)fts_close(fts);
+}
+
+/* print a ratio - size reduction as a fraction of uncompressed size */
+static void
+print_ratio(off_t in, off_t out, FILE *where)
+{
+ int percent10; /* 10 * percent */
+ off_t diff;
+ char buff[8];
+ int len;
+
+ diff = in - out/2;
+ if (in == 0 && out == 0)
+ percent10 = 0;
+ else if (diff < 0)
+ /*
+ * Output is more than double size of input! print -99.9%
+ * Quite possibly we've failed to get the original size.
+ */
+ percent10 = -999;
+ else {
+ /*
+ * We only need 12 bits of result from the final division,
+ * so reduce the values until a 32bit division will suffice.
+ */
+ while (in > 0x100000) {
+ diff >>= 1;
+ in >>= 1;
+ }
+ if (in != 0)
+ percent10 = ((u_int)diff * 2000) / (u_int)in - 1000;
+ else
+ percent10 = 0;
+ }
+
+ len = snprintf(buff, sizeof buff, "%2.2d.", percent10);
+ /* Move the '.' to before the last digit */
+ buff[len - 1] = buff[len - 2];
+ buff[len - 2] = '.';
+ fprintf(where, "%5s%%", buff);
+}
+
+/* print compression statistics, and the new name (if there is one!) */
+static void
+print_verbage(const char *file, const char *nfile, off_t usize, off_t gsize)
+{
+ if (file)
+ fprintf(stderr, "%s:%s ", file,
+ strlen(file) < 7 ? "\t\t" : "\t");
+ print_ratio(usize, gsize, stderr);
+ if (nfile)
+ fprintf(stderr, " -- replaced with %s", nfile);
+ fprintf(stderr, "\n");
+ fflush(stderr);
+}
+
+/* print test results */
+static void
+print_test(const char *file, int ok)
+{
+
+ if (exit_value == 0 && ok == 0)
+ exit_value = 1;
+ fprintf(stderr, "%s:%s %s\n", file,
+ strlen(file) < 7 ? "\t\t" : "\t", ok ? "OK" : "NOT OK");
+ fflush(stderr);
+}
+
+/* print a file's info ala --list */
+/* eg:
+ compressed uncompressed ratio uncompressed_name
+ 354841 1679360 78.8% /usr/pkgsrc/distfiles/libglade-2.0.1.tar
+*/
+static void
+print_list(int fd, off_t out, const char *outfile, time_t ts)
+{
+ static int first = 1;
+ static off_t in_tot, out_tot;
+ uint32_t crc = 0;
+ off_t in = 0, rv;
+
+ if (first) {
+ if (vflag)
+ printf("method crc date time ");
+ if (qflag == 0)
+ printf(" compressed uncompressed "
+ "ratio uncompressed_name\n");
+ }
+ first = 0;
+
+ /* print totals? */
+ if (fd == -1) {
+ in = in_tot;
+ out = out_tot;
+ } else
+ {
+ /* read the last 4 bytes - this is the uncompressed size */
+ rv = lseek(fd, (off_t)(-8), SEEK_END);
+ if (rv != -1) {
+ unsigned char buf[8];
+ uint32_t usize;
+
+ rv = read(fd, (char *)buf, sizeof(buf));
+ if (rv == -1)
+ maybe_warn("read of uncompressed size");
+ else if (rv != sizeof(buf))
+ maybe_warnx("read of uncompressed size");
+
+ else {
+ usize = le32dec(&buf[4]);
+ in = (off_t)usize;
+ crc = le32dec(&buf[0]);
+ }
+ }
+ }
+
+ if (vflag && fd == -1)
+ printf(" ");
+ else if (vflag) {
+ char *date = ctime(&ts);
+
+ /* skip the day, 1/100th second, and year */
+ date += 4;
+ date[12] = 0;
+ printf("%5s %08x %11s ", "defla"/*XXX*/, crc, date);
+ }
+ in_tot += in;
+ out_tot += out;
+ print_list_out(out, in, outfile);
+}
+
+static void
+print_list_out(off_t out, off_t in, const char *outfile)
+{
+ printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
+ print_ratio(in, out, stdout);
+ printf(" %s\n", outfile);
+}
+
+/* display the usage of NetBSD gzip */
+static void
+usage(void)
+{
+
+ fprintf(stderr, "%s\n", gzip_version);
+ fprintf(stderr,
+ "usage: %s [-123456789acdfhklLNnqrtVv] [-S .suffix] [<file> [<file> ...]]\n"
+ " -1 --fast fastest (worst) compression\n"
+ " -2 .. -8 set compression level\n"
+ " -9 --best best (slowest) compression\n"
+ " -c --stdout write to stdout, keep original files\n"
+ " --to-stdout\n"
+ " -d --decompress uncompress files\n"
+ " --uncompress\n"
+ " -f --force force overwriting & compress links\n"
+ " -h --help display this help\n"
+ " -k --keep don't delete input files during operation\n"
+ " -l --list list compressed file contents\n"
+ " -N --name save or restore original file name and time stamp\n"
+ " -n --no-name don't save original file name or time stamp\n"
+ " -q --quiet output no warnings\n"
+ " -r --recursive recursively compress files in directories\n"
+ " -S .suf use suffix .suf instead of .gz\n"
+ " --suffix .suf\n"
+ " -t --test test compressed file\n"
+ " -V --version display program version\n"
+ " -v --verbose print extra statistics\n",
+ getprogname());
+ exit(0);
+}
+
+/* display the license information of FreeBSD gzip */
+static void
+display_license(void)
+{
+
+ fprintf(stderr, "%s (based on NetBSD gzip 20150113)\n", gzip_version);
+ fprintf(stderr, "%s\n", gzip_copyright);
+ exit(0);
+}
+
+/* display the version of NetBSD gzip */
+static void
+display_version(void)
+{
+
+ fprintf(stderr, "%s\n", gzip_version);
+ exit(0);
+}
+
+#ifndef NO_BZIP2_SUPPORT
+#include "unbzip2.c"
+#endif
+#ifndef NO_COMPRESS_SUPPORT
+#include "zuncompress.c"
+#endif
+#ifndef NO_PACK_SUPPORT
+#include "unpack.c"
+#endif
+#ifndef NO_XZ_SUPPORT
+#include "unxz.c"
+#endif
+#ifndef NO_LZ_SUPPORT
+#include "unlz.c"
+#endif
+#ifndef NO_ZSTD_SUPPORT
+#include "unzstd.c"
+#endif
+
+static ssize_t
+read_retry(int fd, void *buf, size_t sz)
+{
+ char *cp = buf;
+ size_t left = MIN(sz, (size_t) SSIZE_MAX);
+
+ while (left > 0) {
+ ssize_t ret;
+
+ ret = read(fd, cp, left);
+ if (ret == -1) {
+ return ret;
+ } else if (ret == 0) {
+ break; /* EOF */
+ }
+ cp += ret;
+ left -= ret;
+ }
+
+ return sz - left;
+}
+
+static ssize_t
+write_retry(int fd, const void *buf, size_t sz)
+{
+ const char *cp = buf;
+ size_t left = MIN(sz, (size_t) SSIZE_MAX);
+
+ while (left > 0) {
+ ssize_t ret;
+
+ ret = write(fd, cp, left);
+ if (ret == -1) {
+ return ret;
+ } else if (ret == 0) {
+ abort(); /* Can't happen */
+ }
+ cp += ret;
+ left -= ret;
+ }
+
+ return sz - left;
+}
diff --git a/usr.bin/gzip/tests/Makefile b/usr.bin/gzip/tests/Makefile
new file mode 100644
index 000000000000..96783eec23ea
--- /dev/null
+++ b/usr.bin/gzip/tests/Makefile
@@ -0,0 +1,12 @@
+.include <bsd.own.mk>
+
+PACKAGE= tests
+
+ATF_TESTS_SH= zdiff_test
+NETBSD_ATF_TESTS_SH= gzip_test
+
+${PACKAGE}FILES+= foo.diff
+
+.include <netbsd-tests.test.mk>
+
+.include <bsd.test.mk>
diff --git a/usr.bin/gzip/tests/Makefile.depend b/usr.bin/gzip/tests/Makefile.depend
new file mode 100644
index 000000000000..11aba52f82cf
--- /dev/null
+++ b/usr.bin/gzip/tests/Makefile.depend
@@ -0,0 +1,10 @@
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/usr.bin/gzip/tests/foo.diff b/usr.bin/gzip/tests/foo.diff
new file mode 100644
index 000000000000..278d14088435
--- /dev/null
+++ b/usr.bin/gzip/tests/foo.diff
@@ -0,0 +1,2 @@
+1a2
+> bar
diff --git a/usr.bin/gzip/tests/zdiff_test.sh b/usr.bin/gzip/tests/zdiff_test.sh
new file mode 100644
index 000000000000..e260261fa291
--- /dev/null
+++ b/usr.bin/gzip/tests/zdiff_test.sh
@@ -0,0 +1,125 @@
+#
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2022 Kyle Evans <kevans@FreeBSD.org>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+
+specials="foo'bar foo\"bar foo\$bar"
+
+prepare_files()
+{
+ compressfunc="$1"
+ compresssuffix="$2"
+
+ echo "foo" > foo
+
+ for f in $specials foo; do
+ [ "$f" == "foo" ] || cp foo "$f"
+ atf_check "$compressfunc" -k "$f"
+ atf_check tar -ckf "$f.tar" "$f"
+ atf_check -o save:"$f.$compresssuffix" \
+ "$compressfunc" -c "$f.tar"
+
+ # Regenerate $f.tar to create a diff from the .$compresssuffix
+ # file, too.
+ echo "bar" >> "$f"
+ atf_check tar -ckf "$f.tar" "$f"
+ done
+}
+
+atf_test_case gzip
+gzip_body()
+{
+ prepare_files gzip tgz
+ cp foo.gz foo.Z
+
+ for f in foo $specials; do
+ atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff \
+ zdiff "$f.gz"
+ done
+
+ atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff zdiff foo.Z
+
+ for f in foo $specials; do
+ rm "$f"
+ atf_check -s exit:1 -o match:"Binary files" zdiff "$f.tgz"
+ done
+}
+
+atf_test_case bzip
+bzip_body()
+{
+ prepare_files bzip2 tbz2
+ cp foo.bz2 foo.bz
+
+ for f in foo $specials; do
+ atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff \
+ zdiff "$f.bz2"
+ done
+
+ atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff zdiff foo.bz
+
+ for f in foo $specials; do
+ rm "$f"
+ atf_check -s exit:1 -o match:"Binary files" zdiff "$f.tbz2"
+ done
+}
+
+atf_test_case xzip
+xzip_body()
+{
+ prepare_files xz txz
+ cp foo.xz foo.lzma
+
+ for f in foo $specials; do
+ atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff \
+ zdiff "$f.xz"
+ done
+
+ atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff zdiff foo.lzma
+
+ for f in foo $specials; do
+ rm "$f"
+ atf_check -s exit:1 -o match:"Binary files" zdiff "$f.txz"
+ done
+}
+
+atf_test_case unknown
+unknown_body()
+{
+ prepare_files xz fxz
+
+ for f in foo $specials; do
+ atf_check -s exit:1 -e match:"unknown suffix$" zdiff "$f.fxz"
+ done
+}
+
+atf_init_test_cases()
+{
+
+ atf_add_test_case gzip
+ atf_add_test_case bzip
+ atf_add_test_case xzip
+ atf_add_test_case unknown
+}
diff --git a/usr.bin/gzip/unbzip2.c b/usr.bin/gzip/unbzip2.c
new file mode 100644
index 000000000000..3fd74f1bac57
--- /dev/null
+++ b/usr.bin/gzip/unbzip2.c
@@ -0,0 +1,143 @@
+/* $NetBSD: unbzip2.c,v 1.14 2017/08/04 07:27:08 mrg Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2006 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Simon Burge.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This file is #included by gzip.c */
+
+static off_t
+unbzip2(int in, int out, char *pre, size_t prelen, off_t *bytes_in)
+{
+ int ret, end_of_file, cold = 0;
+ off_t bytes_out = 0;
+ bz_stream bzs;
+ static char *inbuf, *outbuf;
+
+ if (inbuf == NULL)
+ inbuf = malloc(BUFLEN);
+ if (outbuf == NULL)
+ outbuf = malloc(BUFLEN);
+ if (inbuf == NULL || outbuf == NULL)
+ maybe_err("malloc");
+
+ bzs.bzalloc = NULL;
+ bzs.bzfree = NULL;
+ bzs.opaque = NULL;
+
+ end_of_file = 0;
+ ret = BZ2_bzDecompressInit(&bzs, 0, 0);
+ if (ret != BZ_OK)
+ maybe_errx("bzip2 init");
+
+ /* Prepend. */
+ bzs.avail_in = prelen;
+ bzs.next_in = pre;
+
+ if (bytes_in)
+ *bytes_in = prelen;
+
+ while (ret == BZ_OK) {
+ check_siginfo();
+ if (bzs.avail_in == 0 && !end_of_file) {
+ ssize_t n;
+
+ n = read(in, inbuf, BUFLEN);
+ if (n < 0)
+ maybe_err("read");
+ if (n == 0)
+ end_of_file = 1;
+ infile_newdata(n);
+ bzs.next_in = inbuf;
+ bzs.avail_in = n;
+ if (bytes_in)
+ *bytes_in += n;
+ }
+
+ bzs.next_out = outbuf;
+ bzs.avail_out = BUFLEN;
+ ret = BZ2_bzDecompress(&bzs);
+
+ switch (ret) {
+ case BZ_STREAM_END:
+ case BZ_OK:
+ if (ret == BZ_OK && end_of_file) {
+ /*
+ * If we hit this after a stream end, consider
+ * it as the end of the whole file and don't
+ * bail out.
+ */
+ if (cold == 1)
+ ret = BZ_STREAM_END;
+ else
+ maybe_errx("truncated file");
+ }
+ cold = 0;
+ if (!tflag && bzs.avail_out != BUFLEN) {
+ ssize_t n;
+
+ n = write(out, outbuf, BUFLEN - bzs.avail_out);
+ if (n < 0)
+ maybe_err("write");
+ bytes_out += n;
+ }
+ if (ret == BZ_STREAM_END && !end_of_file) {
+ if (BZ2_bzDecompressEnd(&bzs) != BZ_OK ||
+ BZ2_bzDecompressInit(&bzs, 0, 0) != BZ_OK)
+ maybe_errx("bzip2 re-init");
+ cold = 1;
+ ret = BZ_OK;
+ }
+ break;
+
+ case BZ_DATA_ERROR:
+ maybe_warnx("bzip2 data integrity error");
+ break;
+
+ case BZ_DATA_ERROR_MAGIC:
+ maybe_warnx("bzip2 magic number error");
+ break;
+
+ case BZ_MEM_ERROR:
+ maybe_warnx("bzip2 out of memory");
+ break;
+
+ default:
+ maybe_warnx("unknown bzip2 error: %d", ret);
+ break;
+ }
+ }
+
+ if (ret != BZ_STREAM_END || BZ2_bzDecompressEnd(&bzs) != BZ_OK)
+ return (-1);
+
+ return (bytes_out);
+}
+
diff --git a/usr.bin/gzip/unlz.c b/usr.bin/gzip/unlz.c
new file mode 100644
index 000000000000..71e752ab1112
--- /dev/null
+++ b/usr.bin/gzip/unlz.c
@@ -0,0 +1,642 @@
+/* $NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $ */
+
+/*-
+ * Copyright (c) 2018 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Lzd - Educational decompressor for the lzip format
+ Copyright (C) 2013-2018 Antonio Diaz Diaz.
+
+ This program is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+
+#define LZ_STATES 12
+
+#define LITERAL_CONTEXT_BITS 3
+#define POS_STATE_BITS 2
+#define POS_STATES (1 << POS_STATE_BITS)
+#define POS_STATE_MASK (POS_STATES - 1)
+
+#define STATES 4
+#define DIS_SLOT_BITS 6
+
+#define DIS_MODEL_START 4
+#define DIS_MODEL_END 14
+
+#define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2))
+#define DIS_ALIGN_BITS 4
+#define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS)
+
+#define LOW_BITS 3
+#define MID_BITS 3
+#define HIGH_BITS 8
+
+#define LOW_SYMBOLS (1 << LOW_BITS)
+#define MID_SYMBOLS (1 << MID_BITS)
+#define HIGH_SYMBOLS (1 << HIGH_BITS)
+
+#define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
+
+#define MIN_MATCH_LEN 2
+
+#define BIT_MODEL_MOVE_BITS 5
+#define BIT_MODEL_TOTAL_BITS 11
+#define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS)
+#define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2)
+
+static const int lz_st_next[] = {
+ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
+};
+
+static bool
+lz_st_is_char(int st) {
+ return st < 7;
+}
+
+static int
+lz_st_get_char(int st) {
+ return lz_st_next[st];
+}
+
+static int
+lz_st_get_match(int st) {
+ return st < 7 ? 7 : 10;
+}
+
+static int
+lz_st_get_rep(int st) {
+ return st < 7 ? 8 : 11;
+}
+
+static int
+lz_st_get_short_rep(int st) {
+ return st < 7 ? 9 : 11;
+}
+
+struct lz_len_model {
+ int choice1;
+ int choice2;
+ int bm_low[POS_STATES][LOW_SYMBOLS];
+ int bm_mid[POS_STATES][MID_SYMBOLS];
+ int bm_high[HIGH_SYMBOLS];
+};
+
+static uint32_t lz_crc[256];
+
+static void
+lz_crc_init(void)
+{
+ for (unsigned i = 0; i < nitems(lz_crc); i++) {
+ unsigned c = i;
+ for (unsigned j = 0; j < 8; j++) {
+ if (c & 1)
+ c = 0xEDB88320U ^ (c >> 1);
+ else
+ c >>= 1;
+ }
+ lz_crc[i] = c;
+ }
+}
+
+static void
+lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
+{
+ for (size_t i = 0; i < len; i++)
+ *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
+}
+
+struct lz_range_decoder {
+ FILE *fp;
+ uint32_t code;
+ uint32_t range;
+};
+
+static int
+lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
+{
+ rd->fp = fp;
+ rd->code = 0;
+ rd->range = ~0;
+ for (int i = 0; i < 5; i++)
+ rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
+ return ferror(rd->fp) ? -1 : 0;
+}
+
+static unsigned
+lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
+{
+ unsigned symbol = 0;
+
+ for (int i = num_bits; i > 0; i--) {
+ rd->range >>= 1;
+ symbol <<= 1;
+ if (rd->code >= rd->range) {
+ rd->code -= rd->range;
+ symbol |= 1;
+ }
+ if (rd->range <= 0x00FFFFFFU) {
+ rd->range <<= 8;
+ rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
+ }
+ }
+
+ return symbol;
+}
+
+static unsigned
+lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
+{
+ unsigned symbol;
+ const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
+
+ if(rd->code < bound) {
+ rd->range = bound;
+ *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
+ symbol = 0;
+ }
+ else {
+ rd->range -= bound;
+ rd->code -= bound;
+ *bm -= *bm >> BIT_MODEL_MOVE_BITS;
+ symbol = 1;
+ }
+
+ if (rd->range <= 0x00FFFFFFU) {
+ rd->range <<= 8;
+ rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
+ }
+ return symbol;
+}
+
+static unsigned
+lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
+{
+ unsigned symbol = 1;
+
+ for (int i = 0; i < num_bits; i++)
+ symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
+
+ return symbol - (1 << num_bits);
+}
+
+static unsigned
+lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
+{
+ unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
+ unsigned reversed_symbol = 0;
+
+ for (int i = 0; i < num_bits; i++) {
+ reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
+ symbol >>= 1;
+ }
+
+ return reversed_symbol;
+}
+
+static unsigned
+lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
+{
+ unsigned symbol = 1;
+
+ for (int i = 7; i >= 0; i--) {
+ const unsigned match_bit = (match_byte >> i) & 1;
+ const unsigned bit = lz_rd_decode_bit(rd,
+ &bm[symbol + (match_bit << 8) + 0x100]);
+ symbol = (symbol << 1) | bit;
+ if (match_bit != bit) {
+ while (symbol < 0x100) {
+ symbol = (symbol << 1) |
+ lz_rd_decode_bit(rd, &bm[symbol]);
+ }
+ break;
+ }
+ }
+ return symbol & 0xFF;
+}
+
+static unsigned
+lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
+ int pos_state)
+{
+ if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
+ return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
+
+ if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
+ return LOW_SYMBOLS +
+ lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
+ }
+
+ return LOW_SYMBOLS + MID_SYMBOLS +
+ lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
+}
+
+struct lz_decoder {
+ FILE *fin, *fout;
+ off_t pos, ppos, spos, dict_size;
+ bool wrapped;
+ uint32_t crc;
+ uint8_t *obuf;
+ struct lz_range_decoder rdec;
+};
+
+static int
+lz_flush(struct lz_decoder *lz)
+{
+ off_t offs = lz->pos - lz->spos;
+ if (offs <= 0)
+ return -1;
+
+ size_t size = (size_t)offs;
+ lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
+ if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
+ return -1;
+
+ lz->wrapped = lz->pos >= lz->dict_size;
+ if (lz->wrapped) {
+ lz->ppos += lz->pos;
+ lz->pos = 0;
+ }
+ lz->spos = lz->pos;
+ return 0;
+}
+
+static void
+lz_destroy(struct lz_decoder *lz)
+{
+ if (lz->fin)
+ fclose(lz->fin);
+ if (lz->fout)
+ fclose(lz->fout);
+ free(lz->obuf);
+}
+
+static int
+lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
+{
+ memset(lz, 0, sizeof(*lz));
+
+ lz->fin = fdopen(dup(fin), "r");
+ if (lz->fin == NULL)
+ goto out;
+
+ lz->fout = fdopen(dup(fdout), "w");
+ if (lz->fout == NULL)
+ goto out;
+
+ lz->pos = lz->ppos = lz->spos = 0;
+ lz->crc = ~0;
+ lz->dict_size = dict_size;
+ lz->wrapped = false;
+
+ lz->obuf = malloc(dict_size);
+ if (lz->obuf == NULL)
+ goto out;
+
+ if (lz_rd_create(&lz->rdec, lz->fin) == -1)
+ goto out;
+ return 0;
+out:
+ lz_destroy(lz);
+ return -1;
+}
+
+static uint8_t
+lz_peek(const struct lz_decoder *lz, unsigned ahead)
+{
+ off_t diff = lz->pos - ahead - 1;
+
+ if (diff >= 0)
+ return lz->obuf[diff];
+
+ if (lz->wrapped)
+ return lz->obuf[lz->dict_size + diff];
+
+ return 0;
+}
+
+static void
+lz_put(struct lz_decoder *lz, uint8_t b)
+{
+ lz->obuf[lz->pos++] = b;
+ if (lz->dict_size == lz->pos)
+ lz_flush(lz);
+}
+
+static off_t
+lz_get_data_position(const struct lz_decoder *lz)
+{
+ return lz->ppos + lz->pos;
+}
+
+static unsigned
+lz_get_crc(const struct lz_decoder *lz)
+{
+ return lz->crc ^ 0xffffffffU;
+}
+
+static void
+lz_bm_init(int *a, size_t l)
+{
+ for (size_t i = 0; i < l; i++)
+ a[i] = BIT_MODEL_INIT;
+}
+
+#define LZ_BM_INIT(a) lz_bm_init(a, nitems(a))
+#define LZ_BM_INIT2(a) do { \
+ size_t l = nitems(a[0]); \
+ for (size_t i = 0; i < nitems(a); i++) \
+ lz_bm_init(a[i], l); \
+} while (/*CONSTCOND*/0)
+
+#define LZ_MODEL_INIT(a) do { \
+ a.choice1 = BIT_MODEL_INIT; \
+ a.choice2 = BIT_MODEL_INIT; \
+ LZ_BM_INIT2(a.bm_low); \
+ LZ_BM_INIT2(a.bm_mid); \
+ LZ_BM_INIT(a.bm_high); \
+} while (/*CONSTCOND*/0)
+
+static bool
+lz_decode_member(struct lz_decoder *lz)
+{
+ int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
+ int bm_match[LZ_STATES][POS_STATES];
+ int bm_rep[4][LZ_STATES];
+ int bm_len[LZ_STATES][POS_STATES];
+ int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
+ int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
+ int bm_align[DIS_ALIGN_SIZE];
+
+ LZ_BM_INIT2(bm_literal);
+ LZ_BM_INIT2(bm_match);
+ LZ_BM_INIT2(bm_rep);
+ LZ_BM_INIT2(bm_len);
+ LZ_BM_INIT2(bm_dis_slot);
+ LZ_BM_INIT(bm_dis);
+ LZ_BM_INIT(bm_align);
+
+ struct lz_len_model match_len_model;
+ struct lz_len_model rep_len_model;
+
+ LZ_MODEL_INIT(match_len_model);
+ LZ_MODEL_INIT(rep_len_model);
+
+ struct lz_range_decoder *rd = &lz->rdec;
+ unsigned rep[4] = { 0 };
+
+
+ int state = 0;
+
+ while (!feof(lz->fin) && !ferror(lz->fin)) {
+ const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
+ // bit 1
+ if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
+ const uint8_t prev_byte = lz_peek(lz, 0);
+ const int literal_state =
+ prev_byte >> (8 - LITERAL_CONTEXT_BITS);
+ int *bm = bm_literal[literal_state];
+ if (lz_st_is_char(state))
+ lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
+ else {
+ int peek = lz_peek(lz, rep[0]);
+ lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
+ }
+ state = lz_st_get_char(state);
+ continue;
+ }
+ int len;
+ // bit 2
+ if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
+ // bit 3
+ if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
+ // bit 4
+ if (lz_rd_decode_bit(rd,
+ &bm_len[state][pos_state]) == 0)
+ {
+ state = lz_st_get_short_rep(state);
+ lz_put(lz, lz_peek(lz, rep[0]));
+ continue;
+ }
+ } else {
+ unsigned distance;
+ // bit 4
+ if (lz_rd_decode_bit(rd, &bm_rep[2][state])
+ == 0)
+ distance = rep[1];
+ else {
+ // bit 5
+ if (lz_rd_decode_bit(rd,
+ &bm_rep[3][state]) == 0)
+ distance = rep[2];
+ else {
+ distance = rep[3];
+ rep[3] = rep[2];
+ }
+ rep[2] = rep[1];
+ }
+ rep[1] = rep[0];
+ rep[0] = distance;
+ }
+ state = lz_st_get_rep(state);
+ len = MIN_MATCH_LEN +
+ lz_rd_decode_len(rd, &rep_len_model, pos_state);
+ } else {
+ rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
+ len = MIN_MATCH_LEN +
+ lz_rd_decode_len(rd, &match_len_model, pos_state);
+ const int len_state =
+ MIN(len - MIN_MATCH_LEN, STATES - 1);
+ rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
+ DIS_SLOT_BITS);
+ if (rep[0] >= DIS_MODEL_START) {
+ const unsigned dis_slot = rep[0];
+ const int direct_bits = (dis_slot >> 1) - 1;
+ rep[0] = (2 | (dis_slot & 1)) << direct_bits;
+ if (dis_slot < DIS_MODEL_END)
+ rep[0] += lz_rd_decode_tree_reversed(rd,
+ &bm_dis[rep[0] - dis_slot],
+ direct_bits);
+ else {
+ rep[0] += lz_rd_decode(rd, direct_bits
+ - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
+ rep[0] += lz_rd_decode_tree_reversed(rd,
+ bm_align, DIS_ALIGN_BITS);
+ if (rep[0] == 0xFFFFFFFFU) {
+ lz_flush(lz);
+ return len == MIN_MATCH_LEN;
+ }
+ }
+ }
+ state = lz_st_get_match(state);
+ if (rep[0] >= lz->dict_size ||
+ (rep[0] >= lz->pos && !lz->wrapped)) {
+ lz_flush(lz);
+ return false;
+ }
+ }
+ for (int i = 0; i < len; i++)
+ lz_put(lz, lz_peek(lz, rep[0]));
+ }
+ lz_flush(lz);
+ return false;
+}
+
+/*
+ * 0-3 CRC32 of the uncompressed data
+ * 4-11 size of the uncompressed data
+ * 12-19 member size including header and trailer
+ */
+#define TRAILER_SIZE 20
+
+
+static off_t
+lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
+{
+ struct lz_decoder lz;
+ off_t rv = -1;
+
+ if (lz_create(&lz, fin, fdout, dict_size) == -1)
+ return -1;
+
+ if (!lz_decode_member(&lz))
+ goto out;
+
+ uint8_t trailer[TRAILER_SIZE];
+
+ for(size_t i = 0; i < nitems(trailer); i++)
+ trailer[i] = (uint8_t)getc(lz.fin);
+
+ unsigned crc = 0;
+ for (int i = 3; i >= 0; --i) {
+ crc <<= 8;
+ crc += trailer[i];
+ }
+
+ int64_t data_size = 0;
+ for (int i = 11; i >= 4; --i) {
+ data_size <<= 8;
+ data_size += trailer[i];
+ }
+
+ if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
+ goto out;
+
+ rv = 0;
+ for (int i = 19; i >= 12; --i) {
+ rv <<= 8;
+ rv += trailer[i];
+ }
+ if (insize)
+ *insize = rv;
+#if 0
+ /* Does not work with pipes */
+ rv = ftello(lz.fout);
+#else
+ rv = data_size;
+#endif
+out:
+ lz_destroy(&lz);
+ return rv;
+}
+
+
+/*
+ * 0-3 magic
+ * 4 version
+ * 5 coded dict_size
+ */
+#define HDR_SIZE 6
+#define MIN_DICTIONARY_SIZE (1 << 12)
+#define MAX_DICTIONARY_SIZE (1 << 29)
+
+static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
+
+static unsigned
+lz_get_dict_size(unsigned char c)
+{
+ unsigned dict_size = 1 << (c & 0x1f);
+ dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
+ if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
+ return 0;
+ return dict_size;
+}
+
+static off_t
+unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
+{
+ if (lz_crc[0] == 0)
+ lz_crc_init();
+
+ char header[HDR_SIZE];
+
+ if (pre && prelen)
+ memcpy(header, pre, prelen);
+
+ ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
+ switch (nr) {
+ case -1:
+ return -1;
+ case 0:
+ return prelen ? -1 : 0;
+ default:
+ if ((size_t)nr != sizeof(header) - prelen)
+ return -1;
+ break;
+ }
+
+ if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
+ return -1;
+
+ unsigned dict_size = lz_get_dict_size(header[5]);
+ if (dict_size == 0)
+ return -1;
+
+ return lz_decode(fin, fout, dict_size, bytes_in);
+}
diff --git a/usr.bin/gzip/unpack.c b/usr.bin/gzip/unpack.c
new file mode 100644
index 000000000000..110500ac21d6
--- /dev/null
+++ b/usr.bin/gzip/unpack.c
@@ -0,0 +1,334 @@
+/* $NetBSD: unpack.c,v 1.3 2017/08/04 07:27:08 mrg Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2009 Xin LI <delphij@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* This file is #included by gzip.c */
+
+/*
+ * pack(1) file format:
+ *
+ * The first 7 bytes is the header:
+ * 00, 01 - Signature (US, RS), we already validated it earlier.
+ * 02..05 - Uncompressed size
+ * 06 - Level for the huffman tree (<=24)
+ *
+ * pack(1) will then store symbols (leaf) nodes count in each huffman
+ * tree levels, each level would consume 1 byte (See [1]).
+ *
+ * After the symbol count table, there is the symbol table, storing
+ * symbols represented by corresponding leaf node. EOB is not being
+ * explicitly transmitted (not necessary anyway) in the symbol table.
+ *
+ * Compressed data goes after the symbol table.
+ *
+ * NOTES
+ *
+ * [1] If we count EOB into the symbols, that would mean that we will
+ * have at most 256 symbols in the huffman tree. pack(1) rejects empty
+ * file and files that just repeats one character, which means that we
+ * will have at least 2 symbols. Therefore, pack(1) would reduce the
+ * last level symbol count by 2 which makes it a number in
+ * range [0..254], so all levels' symbol count would fit into 1 byte.
+ */
+
+#define PACK_HEADER_LENGTH 7
+#define HTREE_MAXLEVEL 24
+
+/*
+ * unpack descriptor
+ *
+ * Represent the huffman tree in a similar way that pack(1) would
+ * store in a packed file. We store all symbols in a linear table,
+ * and store pointers to each level's first symbol. In addition to
+ * that, maintain two counts for each level: inner nodes count and
+ * leaf nodes count.
+ */
+typedef struct {
+ int symbol_size; /* Size of the symbol table */
+ int treelevels; /* Levels for the huffman tree */
+
+ int *symbolsin; /* Table of leaf symbols count in each
+ * level */
+ int *inodesin; /* Table of internal nodes count in
+ * each level */
+
+ char *symbol; /* The symbol table */
+ char *symbol_eob; /* Pointer to the EOB symbol */
+ char **tree; /* Decoding huffman tree (pointers to
+ * first symbol of each tree level */
+
+ off_t uncompressed_size; /* Uncompressed size */
+ FILE *fpIn; /* Input stream */
+ FILE *fpOut; /* Output stream */
+} unpack_descriptor_t;
+
+/*
+ * Release resource allocated to an unpack descriptor.
+ *
+ * Caller is responsible to make sure that all of these pointers are
+ * initialized (in our case, they all point to valid memory block).
+ * We don't zero out pointers here because nobody else would ever
+ * reference the memory block without scrubbing them.
+ */
+static void
+unpack_descriptor_fini(unpack_descriptor_t *unpackd)
+{
+
+ free(unpackd->symbolsin);
+ free(unpackd->inodesin);
+ free(unpackd->symbol);
+ free(unpackd->tree);
+
+ fclose(unpackd->fpIn);
+ fclose(unpackd->fpOut);
+}
+
+/*
+ * Recursively fill the internal node count table
+ */
+static void
+unpackd_fill_inodesin(const unpack_descriptor_t *unpackd, int level)
+{
+
+ /*
+ * The internal nodes would be 1/2 of total internal nodes and
+ * leaf nodes in the next level. For the last level there
+ * would be no internal node by definition.
+ */
+ if (level < unpackd->treelevels) {
+ unpackd_fill_inodesin(unpackd, level + 1);
+ unpackd->inodesin[level] = (unpackd->inodesin[level + 1] +
+ unpackd->symbolsin[level + 1]) / 2;
+ } else
+ unpackd->inodesin[level] = 0;
+}
+
+/*
+ * Update counter for accepted bytes
+ */
+static void
+accepted_bytes(off_t *bytes_in, off_t newbytes)
+{
+
+ if (bytes_in != NULL)
+ (*bytes_in) += newbytes;
+}
+
+/*
+ * Read file header and construct the tree. Also, prepare the buffered I/O
+ * for decode routine.
+ *
+ * Return value is uncompressed size.
+ */
+static void
+unpack_parse_header(int in, int out, char *pre, size_t prelen, off_t *bytes_in,
+ unpack_descriptor_t *unpackd)
+{
+ unsigned char hdr[PACK_HEADER_LENGTH]; /* buffer for header */
+ ssize_t bytesread; /* Bytes read from the file */
+ int i, j, thisbyte;
+
+ /* Prepend the header buffer if we already read some data */
+ if (prelen != 0)
+ memcpy(hdr, pre, prelen);
+
+ /* Read in and fill the rest bytes of header */
+ bytesread = read(in, hdr + prelen, PACK_HEADER_LENGTH - prelen);
+ if (bytesread < 0)
+ maybe_err("Error reading pack header");
+ infile_newdata(bytesread);
+
+ accepted_bytes(bytes_in, PACK_HEADER_LENGTH);
+
+ /* Obtain uncompressed length (bytes 2,3,4,5) */
+ unpackd->uncompressed_size = 0;
+ for (i = 2; i <= 5; i++) {
+ unpackd->uncompressed_size <<= 8;
+ unpackd->uncompressed_size |= hdr[i];
+ }
+
+ /* Get the levels of the tree */
+ unpackd->treelevels = hdr[6];
+ if (unpackd->treelevels > HTREE_MAXLEVEL || unpackd->treelevels < 1)
+ maybe_errx("Huffman tree has insane levels");
+
+ /* Let libc take care for buffering from now on */
+ if ((unpackd->fpIn = fdopen(in, "r")) == NULL)
+ maybe_err("Can not fdopen() input stream");
+ if ((unpackd->fpOut = fdopen(out, "w")) == NULL)
+ maybe_err("Can not fdopen() output stream");
+
+ /* Allocate for the tables of bounds and the tree itself */
+ unpackd->inodesin =
+ calloc(unpackd->treelevels, sizeof(*(unpackd->inodesin)));
+ unpackd->symbolsin =
+ calloc(unpackd->treelevels, sizeof(*(unpackd->symbolsin)));
+ unpackd->tree =
+ calloc(unpackd->treelevels, (sizeof(*(unpackd->tree))));
+ if (unpackd->inodesin == NULL || unpackd->symbolsin == NULL ||
+ unpackd->tree == NULL)
+ maybe_err("calloc");
+
+ /* We count from 0 so adjust to match array upper bound */
+ unpackd->treelevels--;
+
+ /* Read the levels symbol count table and calculate total */
+ unpackd->symbol_size = 1; /* EOB */
+ for (i = 0; i <= unpackd->treelevels; i++) {
+ if ((thisbyte = fgetc(unpackd->fpIn)) == EOF)
+ maybe_err("File appears to be truncated");
+ unpackd->symbolsin[i] = (unsigned char)thisbyte;
+ unpackd->symbol_size += unpackd->symbolsin[i];
+ }
+ accepted_bytes(bytes_in, unpackd->treelevels);
+ if (unpackd->symbol_size > 256)
+ maybe_errx("Bad symbol table");
+ infile_newdata(unpackd->treelevels);
+
+ /* Allocate for the symbol table, point symbol_eob at the beginning */
+ unpackd->symbol_eob = unpackd->symbol = calloc(1, unpackd->symbol_size);
+ if (unpackd->symbol == NULL)
+ maybe_err("calloc");
+
+ /*
+ * Read in the symbol table, which contain [2, 256] symbols.
+ * In order to fit the count in one byte, pack(1) would offset
+ * it by reducing 2 from the actual number from the last level.
+ *
+ * We adjust the last level's symbol count by 1 here, because
+ * the EOB symbol is not being transmitted explicitly. Another
+ * adjustment would be done later afterward.
+ */
+ unpackd->symbolsin[unpackd->treelevels]++;
+ for (i = 0; i <= unpackd->treelevels; i++) {
+ unpackd->tree[i] = unpackd->symbol_eob;
+ for (j = 0; j < unpackd->symbolsin[i]; j++) {
+ if ((thisbyte = fgetc(unpackd->fpIn)) == EOF)
+ maybe_errx("Symbol table truncated");
+ *unpackd->symbol_eob++ = (char)thisbyte;
+ }
+ infile_newdata(unpackd->symbolsin[i]);
+ accepted_bytes(bytes_in, unpackd->symbolsin[i]);
+ }
+
+ /* Now, take account for the EOB symbol as well */
+ unpackd->symbolsin[unpackd->treelevels]++;
+
+ /*
+ * The symbolsin table has been constructed now.
+ * Calculate the internal nodes count table based on it.
+ */
+ unpackd_fill_inodesin(unpackd, 0);
+}
+
+/*
+ * Decode huffman stream, based on the huffman tree.
+ */
+static void
+unpack_decode(const unpack_descriptor_t *unpackd, off_t *bytes_in)
+{
+ int thislevel, thiscode, thisbyte, inlevelindex;
+ int i;
+ off_t bytes_out = 0;
+ const char *thissymbol; /* The symbol pointer decoded from stream */
+
+ /*
+ * Decode huffman. Fetch every bytes from the file, get it
+ * into 'thiscode' bit-by-bit, then output the symbol we got
+ * when one has been found.
+ *
+ * Assumption: sizeof(int) > ((max tree levels + 1) / 8).
+ * bad things could happen if not.
+ */
+ thislevel = 0;
+ thiscode = thisbyte = 0;
+
+ while ((thisbyte = fgetc(unpackd->fpIn)) != EOF) {
+ accepted_bytes(bytes_in, 1);
+ infile_newdata(1);
+ check_siginfo();
+
+ /*
+ * Split one bit from thisbyte, from highest to lowest,
+ * feed the bit into thiscode, until we got a symbol from
+ * the tree.
+ */
+ for (i = 7; i >= 0; i--) {
+ thiscode = (thiscode << 1) | ((thisbyte >> i) & 1);
+
+ /* Did we got a symbol? (referencing leaf node) */
+ if (thiscode >= unpackd->inodesin[thislevel]) {
+ inlevelindex =
+ thiscode - unpackd->inodesin[thislevel];
+ if (inlevelindex > unpackd->symbolsin[thislevel])
+ maybe_errx("File corrupt");
+
+ thissymbol =
+ &(unpackd->tree[thislevel][inlevelindex]);
+ if ((thissymbol == unpackd->symbol_eob) &&
+ (bytes_out == unpackd->uncompressed_size))
+ goto finished;
+
+ fputc((*thissymbol), unpackd->fpOut);
+ bytes_out++;
+
+ /* Prepare for next input */
+ thislevel = 0; thiscode = 0;
+ } else {
+ thislevel++;
+ if (thislevel > unpackd->treelevels)
+ maybe_errx("File corrupt");
+ }
+ }
+ }
+
+finished:
+ if (bytes_out != unpackd->uncompressed_size)
+ maybe_errx("Premature EOF");
+}
+
+/* Handler for pack(1)'ed file */
+static off_t
+unpack(int in, int out, char *pre, size_t prelen, off_t *bytes_in)
+{
+ unpack_descriptor_t unpackd;
+
+ in = dup(in);
+ if (in == -1)
+ maybe_err("dup");
+ out = dup(out);
+ if (out == -1)
+ maybe_err("dup");
+
+ unpack_parse_header(in, out, pre, prelen, bytes_in, &unpackd);
+ unpack_decode(&unpackd, bytes_in);
+ unpack_descriptor_fini(&unpackd);
+
+ /* If we reached here, the unpack was successful */
+ return (unpackd.uncompressed_size);
+}
diff --git a/usr.bin/gzip/unxz.c b/usr.bin/gzip/unxz.c
new file mode 100644
index 000000000000..de6683b246b8
--- /dev/null
+++ b/usr.bin/gzip/unxz.c
@@ -0,0 +1,474 @@
+/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <lzma.h>
+
+static off_t
+unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+ static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
+ lzma_ret ret;
+ lzma_action action = LZMA_RUN;
+ off_t bytes_out, bp;
+ uint8_t ibuf[BUFSIZ];
+ uint8_t obuf[BUFSIZ];
+
+ if (bytes_in == NULL)
+ bytes_in = &bp;
+
+ strm.next_in = ibuf;
+ memcpy(ibuf, pre, prelen);
+ strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
+ if (strm.avail_in == (size_t)-1)
+ maybe_err("read failed");
+ infile_newdata(strm.avail_in);
+ strm.avail_in += prelen;
+ *bytes_in = strm.avail_in;
+
+ if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
+ maybe_errx("Can't initialize decoder (%d)", ret);
+
+ strm.next_out = NULL;
+ strm.avail_out = 0;
+ if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
+ maybe_errx("Can't read headers (%d)", ret);
+
+ bytes_out = 0;
+ strm.next_out = obuf;
+ strm.avail_out = sizeof(obuf);
+
+ for (;;) {
+ check_siginfo();
+ if (strm.avail_in == 0) {
+ strm.next_in = ibuf;
+ strm.avail_in = read(i, ibuf, sizeof(ibuf));
+ switch (strm.avail_in) {
+ case (size_t)-1:
+ maybe_err("read failed");
+ /*NOTREACHED*/
+ case 0:
+ action = LZMA_FINISH;
+ break;
+ default:
+ infile_newdata(strm.avail_in);
+ *bytes_in += strm.avail_in;
+ break;
+ }
+ }
+
+ ret = lzma_code(&strm, action);
+
+ // Write and check write error before checking decoder error.
+ // This way as much data as possible gets written to output
+ // even if decoder detected an error.
+ if (strm.avail_out == 0 || ret != LZMA_OK) {
+ const size_t write_size = sizeof(obuf) - strm.avail_out;
+
+ if (write(o, obuf, write_size) != (ssize_t)write_size)
+ maybe_err("write failed");
+
+ strm.next_out = obuf;
+ strm.avail_out = sizeof(obuf);
+ bytes_out += write_size;
+ }
+
+ if (ret != LZMA_OK) {
+ if (ret == LZMA_STREAM_END) {
+ // Check that there's no trailing garbage.
+ if (strm.avail_in != 0 || read(i, ibuf, 1))
+ ret = LZMA_DATA_ERROR;
+ else {
+ lzma_end(&strm);
+ return bytes_out;
+ }
+ }
+
+ const char *msg;
+ switch (ret) {
+ case LZMA_MEM_ERROR:
+ msg = strerror(ENOMEM);
+ break;
+
+ case LZMA_FORMAT_ERROR:
+ msg = "File format not recognized";
+ break;
+
+ case LZMA_OPTIONS_ERROR:
+ // FIXME: Better message?
+ msg = "Unsupported compression options";
+ break;
+
+ case LZMA_DATA_ERROR:
+ msg = "File is corrupt";
+ break;
+
+ case LZMA_BUF_ERROR:
+ msg = "Unexpected end of input";
+ break;
+
+ case LZMA_MEMLIMIT_ERROR:
+ msg = "Reached memory limit";
+ break;
+
+ default:
+ maybe_errx("Unknown error (%d)", ret);
+ break;
+ }
+ maybe_errx("%s", msg);
+
+ }
+ }
+}
+
+#include <stdbool.h>
+
+/*
+ * Copied various bits and pieces from xz support code or brute force
+ * replacements.
+ */
+
+#define my_min(A,B) ((A)<(B)?(A):(B))
+
+// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
+// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
+#if BUFSIZ <= 1024
+# define IO_BUFFER_SIZE 8192
+#else
+# define IO_BUFFER_SIZE (BUFSIZ & ~7U)
+#endif
+
+/// is_sparse() accesses the buffer as uint64_t for maximum speed.
+/// Use an union to make sure that the buffer is properly aligned.
+typedef union {
+ uint8_t u8[IO_BUFFER_SIZE];
+ uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
+ uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
+} io_buf;
+
+
+static bool
+io_pread(int fd, io_buf *buf, size_t size, off_t pos)
+{
+ // Using lseek() and read() is more portable than pread() and
+ // for us it is as good as real pread().
+ if (lseek(fd, pos, SEEK_SET) != pos) {
+ return true;
+ }
+
+ const size_t amount = read(fd, buf, size);
+ if (amount == SIZE_MAX)
+ return true;
+
+ if (amount != size) {
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Most of the following is copied (mostly verbatim) from the xz
+ * distribution, from file src/xz/list.c
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file list.c
+/// \brief Listing information about .xz files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+/// Information about a .xz file
+typedef struct {
+ /// Combined Index of all Streams in the file
+ lzma_index *idx;
+
+ /// Total amount of Stream Padding
+ uint64_t stream_padding;
+
+ /// Highest memory usage so far
+ uint64_t memusage_max;
+
+ /// True if all Blocks so far have Compressed Size and
+ /// Uncompressed Size fields
+ bool all_have_sizes;
+
+ /// Oldest XZ Utils version that will decompress the file
+ uint32_t min_version;
+
+} xz_file_info;
+
+#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
+
+
+/// \brief Parse the Index(es) from the given .xz file
+///
+/// \param xfi Pointer to structure where the decoded information
+/// is stored.
+/// \param pair Input file
+///
+/// \return On success, false is returned. On error, true is returned.
+///
+// TODO: This function is pretty big. liblzma should have a function that
+// takes a callback function to parse the Index(es) from a .xz file to make
+// it easy for applications.
+static bool
+parse_indexes(xz_file_info *xfi, int src_fd)
+{
+ struct stat st;
+
+ if (fstat(src_fd, &st) != 0) {
+ return true;
+ }
+
+ if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
+ return true;
+ }
+
+ io_buf buf;
+ lzma_stream_flags header_flags;
+ lzma_stream_flags footer_flags;
+ lzma_ret ret;
+
+ // lzma_stream for the Index decoder
+ lzma_stream strm = LZMA_STREAM_INIT;
+
+ // All Indexes decoded so far
+ lzma_index *combined_index = NULL;
+
+ // The Index currently being decoded
+ lzma_index *this_index = NULL;
+
+ // Current position in the file. We parse the file backwards so
+ // initialize it to point to the end of the file.
+ off_t pos = st.st_size;
+
+ // Each loop iteration decodes one Index.
+ do {
+ // Check that there is enough data left to contain at least
+ // the Stream Header and Stream Footer. This check cannot
+ // fail in the first pass of this loop.
+ if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
+ goto error;
+ }
+
+ pos -= LZMA_STREAM_HEADER_SIZE;
+ lzma_vli stream_padding = 0;
+
+ // Locate the Stream Footer. There may be Stream Padding which
+ // we must skip when reading backwards.
+ while (true) {
+ if (pos < LZMA_STREAM_HEADER_SIZE) {
+ goto error;
+ }
+
+ if (io_pread(src_fd, &buf,
+ LZMA_STREAM_HEADER_SIZE, pos))
+ goto error;
+
+ // Stream Padding is always a multiple of four bytes.
+ int i = 2;
+ if (buf.u32[i] != 0)
+ break;
+
+ // To avoid calling io_pread() for every four bytes
+ // of Stream Padding, take advantage that we read
+ // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
+ // check them too before calling io_pread() again.
+ do {
+ stream_padding += 4;
+ pos -= 4;
+ --i;
+ } while (i >= 0 && buf.u32[i] == 0);
+ }
+
+ // Decode the Stream Footer.
+ ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
+ if (ret != LZMA_OK) {
+ goto error;
+ }
+
+ // Check that the Stream Footer doesn't specify something
+ // that we don't support. This can only happen if the xz
+ // version is older than liblzma and liblzma supports
+ // something new.
+ //
+ // It is enough to check Stream Footer. Stream Header must
+ // match when it is compared against Stream Footer with
+ // lzma_stream_flags_compare().
+ if (footer_flags.version != 0) {
+ goto error;
+ }
+
+ // Check that the size of the Index field looks sane.
+ lzma_vli index_size = footer_flags.backward_size;
+ if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
+ goto error;
+ }
+
+ // Set pos to the beginning of the Index.
+ pos -= index_size;
+
+ // Decode the Index.
+ ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
+ if (ret != LZMA_OK) {
+ goto error;
+ }
+
+ do {
+ // Don't give the decoder more input than the
+ // Index size.
+ strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
+ if (io_pread(src_fd, &buf, strm.avail_in, pos))
+ goto error;
+
+ pos += strm.avail_in;
+ index_size -= strm.avail_in;
+
+ strm.next_in = buf.u8;
+ ret = lzma_code(&strm, LZMA_RUN);
+
+ } while (ret == LZMA_OK);
+
+ // If the decoding seems to be successful, check also that
+ // the Index decoder consumed as much input as indicated
+ // by the Backward Size field.
+ if (ret == LZMA_STREAM_END)
+ if (index_size != 0 || strm.avail_in != 0)
+ ret = LZMA_DATA_ERROR;
+
+ if (ret != LZMA_STREAM_END) {
+ // LZMA_BUFFER_ERROR means that the Index decoder
+ // would have liked more input than what the Index
+ // size should be according to Stream Footer.
+ // The message for LZMA_DATA_ERROR makes more
+ // sense in that case.
+ if (ret == LZMA_BUF_ERROR)
+ ret = LZMA_DATA_ERROR;
+
+ goto error;
+ }
+
+ // Decode the Stream Header and check that its Stream Flags
+ // match the Stream Footer.
+ pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
+ if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
+ goto error;
+ }
+
+ pos -= lzma_index_total_size(this_index);
+ if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
+ goto error;
+
+ ret = lzma_stream_header_decode(&header_flags, buf.u8);
+ if (ret != LZMA_OK) {
+ goto error;
+ }
+
+ ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
+ if (ret != LZMA_OK) {
+ goto error;
+ }
+
+ // Store the decoded Stream Flags into this_index. This is
+ // needed so that we can print which Check is used in each
+ // Stream.
+ ret = lzma_index_stream_flags(this_index, &footer_flags);
+ if (ret != LZMA_OK)
+ goto error;
+
+ // Store also the size of the Stream Padding field. It is
+ // needed to show the offsets of the Streams correctly.
+ ret = lzma_index_stream_padding(this_index, stream_padding);
+ if (ret != LZMA_OK)
+ goto error;
+
+ if (combined_index != NULL) {
+ // Append the earlier decoded Indexes
+ // after this_index.
+ ret = lzma_index_cat(
+ this_index, combined_index, NULL);
+ if (ret != LZMA_OK) {
+ goto error;
+ }
+ }
+
+ combined_index = this_index;
+ this_index = NULL;
+
+ xfi->stream_padding += stream_padding;
+
+ } while (pos > 0);
+
+ lzma_end(&strm);
+
+ // All OK. Make combined_index available to the caller.
+ xfi->idx = combined_index;
+ return false;
+
+error:
+ // Something went wrong, free the allocated memory.
+ lzma_end(&strm);
+ lzma_index_end(combined_index, NULL);
+ lzma_index_end(this_index, NULL);
+ return true;
+}
+
+/***************** end of copy form list.c *************************/
+
+/*
+ * Small wrapper to extract total length of a file
+ */
+off_t
+unxz_len(int fd)
+{
+ xz_file_info xfi = XZ_FILE_INFO_INIT;
+ if (!parse_indexes(&xfi, fd)) {
+ off_t res = lzma_index_uncompressed_size(xfi.idx);
+ lzma_index_end(xfi.idx, NULL);
+ return res;
+ }
+ return 0;
+}
+
diff --git a/usr.bin/gzip/unzstd.c b/usr.bin/gzip/unzstd.c
new file mode 100644
index 000000000000..4536f3119ace
--- /dev/null
+++ b/usr.bin/gzip/unzstd.c
@@ -0,0 +1,89 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* This file is #included by gzip.c */
+
+static off_t
+unzstd(int in, int out, char *pre, size_t prelen, off_t *bytes_in)
+{
+ static char *ibuf, *obuf;
+ ZSTD_inBuffer zib;
+ ZSTD_outBuffer zob;
+ ZSTD_DCtx *zds;
+ ssize_t res;
+ size_t zres;
+ size_t bytes_out = 0;
+ int eof = 0;
+
+ if (ibuf == NULL)
+ ibuf = malloc(BUFLEN);
+ if (obuf == NULL)
+ obuf = malloc(BUFLEN);
+ if (ibuf == NULL || obuf == NULL)
+ maybe_err("malloc");
+
+ zds = ZSTD_createDStream();
+ ZSTD_initDStream(zds);
+
+ zib.src = pre;
+ zib.size = prelen;
+ zib.pos = 0;
+ if (bytes_in != NULL)
+ *bytes_in = prelen;
+ zob.dst = obuf;
+ zob.size = BUFLEN;
+ zob.pos = 0;
+
+ while (!eof) {
+ if (zib.pos >= zib.size) {
+ res = read(in, ibuf, BUFLEN);
+ if (res < 0)
+ maybe_err("read");
+ if (res == 0)
+ eof = 1;
+ infile_newdata(res);
+ zib.src = ibuf;
+ zib.size = res;
+ zib.pos = 0;
+ if (bytes_in != NULL)
+ *bytes_in += res;
+ }
+ zres = ZSTD_decompressStream(zds, &zob, &zib);
+ if (ZSTD_isError(zres)) {
+ maybe_errx("%s", ZSTD_getErrorName(zres));
+ }
+ if (zob.pos > 0) {
+ res = write(out, obuf, zob.pos);
+ if (res < 0)
+ maybe_err("write");
+ zob.pos = 0;
+ bytes_out += res;
+ }
+ }
+ ZSTD_freeDStream(zds);
+ return (bytes_out);
+}
diff --git a/usr.bin/gzip/zdiff b/usr.bin/gzip/zdiff
new file mode 100644
index 000000000000..f0f8c2d64681
--- /dev/null
+++ b/usr.bin/gzip/zdiff
@@ -0,0 +1,141 @@
+#!/bin/sh -
+#
+# $NetBSD: zdiff,v 1.5 2010/04/14 20:30:28 joerg Exp $
+#
+# $OpenBSD: zdiff,v 1.2 2003/07/29 07:42:44 otto Exp $
+#
+#-
+# Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
+# Copyright (c) 2010 Joerg Sonnenberger <joerg@NetBSD.org>
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+# Sponsored in part by the Defense Advanced Research Projects
+# Agency (DARPA) and Air Force Research Laboratory, Air Force
+# Materiel Command, USAF, under agreement number F39502-99-1-0512.
+#
+
+# Set $prog based on $0
+case $0 in
+ *cmp) prog=cmp
+ ;;
+ *) prog=diff
+ ;;
+esac
+USAGE="usage: $0 [options] file1 [file2]"
+
+check_suffix() {
+ case "$1" in
+ *[._-][Zz])
+ eval "$2=\${1%??}"
+ eval "$3=\"gzip -cdqf\""
+ ;;
+ *[._-]bz)
+ eval "$2=\${1%???}"
+ eval "$3=\"bzip2 -cdqf\""
+ ;;
+ *[._-]gz)
+ eval "$2=\${1%???}"
+ eval "$3=\"gzip -cdqf\""
+ ;;
+ *[._-]xz)
+ eval "$2=\${1%???}"
+ eval "$3=\"xz -cdqf\""
+ ;;
+ *[._-]bz2)
+ eval "$2=\${1%????}"
+ eval "$3=\"bzip2 -cdqf\""
+ ;;
+ *[._-]lzma)
+ eval "$2=\${1%?????}"
+ eval "$3=\"xz -cdqf\""
+ ;;
+ *.t[ag]z)
+ eval "$2=\${1%??}ar"
+ eval "$3=\"gzip -cdqf\""
+ ;;
+ *.tbz)
+ eval "$2=\${1%??}ar"
+ eval "$3=\"bzip2 -cdqf\""
+ ;;
+ *.tbz2)
+ eval "$2=\${1%???}ar"
+ eval "$3=\"bzip2 -cdqf\""
+ ;;
+ *.t[lx]z)
+ eval "$2=\${1%??}ar"
+ eval "$3=\"xz -cdqf\""
+ ;;
+ *)
+ eval "$2=\$1"
+ eval "$3=\"\""
+ ;;
+ esac
+}
+
+
+# Pull out any command line flags so we can pass them to diff/cmp
+# XXX - assumes there is no optarg
+flags=
+while test $# -ne 0; do
+ case "$1" in
+ --)
+ shift
+ break
+ ;;
+ -)
+ break
+ ;;
+ -*)
+ flags="$flags $1"
+ shift
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
+
+if [ $# -eq 1 ]; then
+ # One file given, compare compressed to uncompressed
+ files="$1"
+ check_suffix "$1" files filt
+ if [ -z "$filt" ]; then
+ echo "z$prog: unknown suffix" 1>&2
+ exit 1
+ fi
+ $filt -- "$1" | $prog $flags -- - "$files"
+ status=$?
+elif [ $# -eq 2 ]; then
+ # Two files given, compare the two uncompressing as needed
+ check_suffix "$1" files filt
+ check_suffix "$2" files2 filt2
+ if [ -z "$filt" -a -z "$filt2" ]; then
+ $prog $flags -- "$1" "$2"
+ elif [ -z "$filt" -a -n "$filt2" -a "$1" != "-" ]; then
+ $filt2 -- "$2" | $prog $flags -- "$1" -
+ elif [ -n "$filt" -a -z "$filt2" -a "$2" != "-" ]; then
+ $filt -- "$1" | $prog $flags -- - "$2"
+ else
+ tmp=`mktemp -t z$prog.XXXXXXXXXX` || exit 1
+ trap "rm -f $tmp" 0 1 2 3 13 15
+ ${filt2:-cat} -- "$2" > $tmp || exit $?
+ ${filt:-cat} -- "$1" | $prog $flags -- - "$tmp"
+ fi
+ status=$?
+else
+ echo "$USAGE" 1>&2
+ exit 1
+fi
+
+exit $status
diff --git a/usr.bin/gzip/zdiff.1 b/usr.bin/gzip/zdiff.1
new file mode 100644
index 000000000000..3ce1663ae1ce
--- /dev/null
+++ b/usr.bin/gzip/zdiff.1
@@ -0,0 +1,140 @@
+.\" $NetBSD: zdiff.1,v 1.5 2010/04/14 19:52:05 wiz Exp $
+.\" $OpenBSD: zdiff.1,v 1.2 2003/07/13 17:39:14 millert Exp $
+.\"
+.\" Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
+.\" Copyright (c) 2010 Joerg Sonnenberger <joerg@NetBSD.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.\" Sponsored in part by the Defense Advanced Research Projects
+.\" Agency (DARPA) and Air Force Research Laboratory, Air Force
+.\" Materiel Command, USAF, under agreement number F39502-99-1-0512.
+.Dd May 23, 2011
+.Dt ZDIFF 1
+.Os
+.Sh NAME
+.Nm zcmp ,
+.Nm zdiff
+.Nd compare compressed files
+.Sh SYNOPSIS
+.Nm zcmp
+.Op Ar options
+.Ar file
+.Op Ar file2
+.Nm zdiff
+.Op Ar options
+.Ar file
+.Op Ar file2
+.Sh DESCRIPTION
+.Nm zcmp
+and
+.Nm zdiff
+are filters that invoke
+.Xr cmp 1
+or
+.Xr diff 1
+respectively to compare compressed files.
+Any
+.Ar options
+that are specified are passed to
+.Xr cmp 1
+or
+.Xr diff 1 .
+.Pp
+If only
+.Ar file1
+is specified, it is compared against a file with the same name, but
+with the extension removed.
+When both
+.Ar file1
+or
+.Ar file2
+are specified, either file may be compressed.
+.Pp
+Extensions handled by
+.Xr gzip 1 :
+.Bl -bullet -compact
+.It
+z, Z,
+.It
+gz,
+.It
+taz,
+.It
+tgz.
+.El
+.Pp
+Extensions handled by
+.Xr bzip2 1 :
+.Bl -bullet -compact
+.It
+bz,
+.It
+bz2,
+.It
+tbz,
+.It
+tbz2.
+.El
+.Pp
+Extensions handled by
+.Xr xz 1 :
+.Bl -bullet -compact
+.It
+lzma,
+.It
+xz,
+.It
+tlz,
+.It
+txz.
+.El
+.Sh ENVIRONMENT
+.Bl -tag -width "TMPDIR"
+.It Ev TMPDIR
+Directory in which to place temporary files.
+If unset,
+.Pa /tmp
+is used.
+.El
+.Sh FILES
+.Bl -tag -width "/tmp/zdiff.XXXXXXXXXX" -compact
+.It Pa /tmp/zcmp.XXXXXXXXXX
+Temporary file for
+.Nm zcmp .
+.It Pa /tmp/zdiff.XXXXXXXXXX
+Temporary file for
+.Nm zdiff .
+.El
+.Sh SEE ALSO
+.Xr bzip2 1 ,
+.Xr cmp 1 ,
+.Xr diff 1 ,
+.Xr gzip 1 ,
+.Xr xz 1
+.Sh CAVEATS
+.Nm zcmp
+and
+.Nm zdiff
+rely solely on the file extension to determine what is, or is not,
+a compressed file.
+Consequently, the following are not supported as arguments:
+.Bl -dash
+.It
+directories
+.It
+device special files
+.It
+filenames indicating the standard input
+.Pq Dq \-
+.El
diff --git a/usr.bin/gzip/zforce b/usr.bin/gzip/zforce
new file mode 100644
index 000000000000..06c897ddf709
--- /dev/null
+++ b/usr.bin/gzip/zforce
@@ -0,0 +1,54 @@
+#!/bin/sh -
+#
+# $NetBSD: zforce,v 1.2 2003/12/28 12:43:43 wiz Exp $
+# $OpenBSD: zforce,v 1.2 2003/08/05 18:22:17 deraadt Exp $
+#
+#-
+# Copyright (c) 2003 Otto Moerbeek <otto@drijf.net>
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+prog=`basename $0`
+USAGE="usage: $prog file ..."
+if test $# -eq 0; then
+ echo $USAGE
+ exit 1
+fi
+
+ret=0
+
+while test $# -ne 0; do
+ case "$1" in
+ *[._-]gz)
+ shift
+ ;;
+ *.t[ag]z)
+ shift
+ ;;
+ *)
+ if file "$1" |
+ grep -q "gzip compressed data" 2> /dev/null
+ then
+ n="$1".gz
+ if mv "$1" "$n" 2> /dev/null; then
+ echo "$1" -- renamed to "$n"
+ else
+ ret=1
+ echo $prog: cannot rename "$1" to "$n"
+ fi
+ fi
+ shift
+ ;;
+ esac
+done
+exit $ret
diff --git a/usr.bin/gzip/zforce.1 b/usr.bin/gzip/zforce.1
new file mode 100644
index 000000000000..10010d61cc14
--- /dev/null
+++ b/usr.bin/gzip/zforce.1
@@ -0,0 +1,51 @@
+.\" $NetBSD: zforce.1,v 1.2 2003/12/28 12:43:43 wiz Exp $
+.\" $OpenBSD: zforce.1,v 1.1 2003/07/29 11:50:09 otto Exp $
+.\"
+.\" Copyright (c) 2003 Otto Moerbeek <otto@drijf.net>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.Dd January 26, 2007
+.Dt ZFORCE 1
+.Os
+.Sh NAME
+.Nm zforce
+.Nd force gzip files to have a .gz suffix
+.Sh SYNOPSIS
+.Nm zforce
+.Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility renames
+.Xr gzip 1
+files to have a
+.Sq .gz
+suffix, so that
+.Xr gzip 1
+will not compress them twice.
+This can be useful if file names were truncated during a file transfer.
+Files that have an existing
+.Sq .gz ,
+.Sq -gz ,
+.Sq _gz ,
+.Sq .tgz
+or
+.Sq .taz
+suffix, or that have not been compressed by
+.Xr gzip 1 ,
+are ignored.
+.Sh SEE ALSO
+.Xr gzip 1
+.Sh CAVEATS
+.Nm
+overwrites existing files without warning.
diff --git a/usr.bin/gzip/zmore b/usr.bin/gzip/zmore
new file mode 100644
index 000000000000..fb195c482a99
--- /dev/null
+++ b/usr.bin/gzip/zmore
@@ -0,0 +1,81 @@
+#!/bin/sh -
+#
+# $NetBSD: zmore,v 1.5 2013/12/06 13:33:15 pettai Exp $
+#
+# $OpenBSD: zmore,v 1.6 2008/08/20 09:22:02 mpf Exp $
+#
+#-
+# Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+# Sponsored in part by the Defense Advanced Research Projects
+# Agency (DARPA) and Air Force Research Laboratory, Air Force
+# Materiel Command, USAF, under agreement number F39502-99-1-0512.
+#
+
+# Pull out any command line flags so we can pass them to more/less
+flags=
+while test $# -ne 0; do
+ case "$1" in
+ --)
+ shift
+ break
+ ;;
+ -*)
+ flags="$flags $1"
+ shift
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
+
+if [ `basename $0` = "zless" ] ; then
+ pager=${PAGER-less}
+else
+ pager=${PAGER-more}
+fi
+
+# No files means read from stdin
+if [ $# -eq 0 ]; then
+ gzip -cdfq 2>&1 | $pager $flags
+ exit 0
+fi
+
+oterm=`stty -g 2>/dev/null`
+while test $# -ne 0; do
+ gzip -cdfq "$1" 2>&1 | $pager $flags
+ prev="$1"
+ shift
+ if tty -s && test -n "$oterm" -a $# -gt 0; then
+ #echo -n "--More--(Next file: $1)"
+ echo -n "$prev (END) - Next: $1 "
+ trap "stty $oterm 2>/dev/null" 0 1 2 3 13 15
+ stty cbreak -echo 2>/dev/null
+ REPLY=`dd bs=1 count=1 2>/dev/null`
+ stty $oterm 2>/dev/null
+ trap - 0 1 2 3 13 15
+ echo
+ case "$REPLY" in
+ s)
+ shift
+ ;;
+ e|q)
+ break
+ ;;
+ esac
+ fi
+done
+exit 0
diff --git a/usr.bin/gzip/zmore.1 b/usr.bin/gzip/zmore.1
new file mode 100644
index 000000000000..5e0acc9b5901
--- /dev/null
+++ b/usr.bin/gzip/zmore.1
@@ -0,0 +1,108 @@
+.\" $NetBSD: zmore.1,v 1.4 2013/11/12 21:58:37 pettai Exp $
+.\" $OpenBSD: zmore.1,v 1.10 2009/08/16 09:41:08 sobrado Exp $
+.\"
+.\" Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.\" Sponsored in part by the Defense Advanced Research Projects
+.\" Agency (DARPA) and Air Force Research Laboratory, Air Force
+.\" Materiel Command, USAF, under agreement number F39502-99-1-0512.
+.Dd October 22, 2014
+.Dt ZMORE 1
+.Os
+.Sh NAME
+.Nm zmore ,
+.Nm zless
+.Nd view compressed files
+.Sh SYNOPSIS
+.Nm zmore
+.Op Ar flags
+.Op Ar
+.Nm zless
+.Op Ar flags
+.Op Ar
+.Sh DESCRIPTION
+.Nm
+is a filter that allows the viewing of files compressed with Lempel-Ziv
+encoding.
+Such files generally have a
+.Dq Z
+or
+.Dq gz
+extension (both the
+.Xr compress 1
+and
+.Xr gzip 1
+formats are supported).
+Any
+.Ar flags
+that are specified are passed to the user's preferred
+.Ev PAGER
+(which is
+.Pa /usr/bin/more
+by default).
+.Pp
+.Nm zless
+is equivalent to
+.Nm zmore
+but uses
+.Xr less 1
+as a pager instead of
+.Xr more 1 .
+.Pp
+When multiple files are specified,
+.Nm
+will pause at the end of each file and present the following prompt to the user:
+.Bd -literal -offset indent
+prev_file (END) - Next: next_file
+.Ed
+.Pp
+Where
+.Sy prev_file
+is the file that was just displayed and
+.Sy next_file
+is the next file to be displayed.
+The following keys are recognized at the prompt:
+.Bl -tag -width "e or q" -offset indent
+.It Ic e No or Ic q
+quit
+.Nm zmore .
+.It Ic s
+skip the next file (or exit if the next file is the last).
+.El
+.Pp
+If no files are specified,
+.Nm
+will read from the standard input.
+In this mode
+.Nm
+will assume
+.Xr gzip 1
+style compression since there is no suffix on which to make a decision.
+.Sh ENVIRONMENT
+.Bl -tag -width "PAGER"
+.It Ev PAGER
+Program used to display files.
+If unset,
+.Pa /usr/bin/more
+is used
+.Pq Nm zmore
+or
+.Pa /usr/bin/less
+.Pq Nm zless .
+.El
+.Sh SEE ALSO
+.Xr compress 1 ,
+.Xr less 1 ,
+.Xr more 1
diff --git a/usr.bin/gzip/znew b/usr.bin/gzip/znew
new file mode 100644
index 000000000000..498c1b70622d
--- /dev/null
+++ b/usr.bin/gzip/znew
@@ -0,0 +1,136 @@
+#!/bin/sh -
+#
+# $NetBSD: znew,v 1.3 2008/04/27 09:07:13 nakayama Exp $
+# $OpenBSD: znew,v 1.2 2003/08/05 18:22:17 deraadt Exp $
+#
+#-
+# Copyright (c) 2003 Otto Moerbeek <otto@drijf.net>
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+
+# Return 0 if the first arg file size is smaller than the second, 1 otherwise.
+smaller () {
+ a=`du -k "$1" | awk '{ print $1 }'`
+ b=`du -k "$2" | awk '{ print $1 }'`
+ test $a -lt $b
+}
+
+# Check gzip integrity if the -t flag is specified
+checkfile () {
+ if test $tflag -eq 1; then
+ gzip -qt < "$1"
+ fi
+}
+
+# Decompress a file and then gzip it
+process () {
+ prefix="${1%.Z}"
+ filez="$prefix".Z
+ filegz="$prefix".gz
+
+ if test ! -e "$filez"; then
+ echo "$prog: $filez does not exist"
+ return 1
+ fi
+ if test ! -f "$filez"; then
+ echo "$prog: $filez is not a regular file"
+ return 1
+ fi
+ if test -e "$filegz" -a $fflag -eq 0; then
+ echo "$prog: $filegz already exists"
+ return 1
+ fi
+
+ tmp=`mktemp /tmp/znewXXXXXXXXXX` || {
+ echo "$prog: cannot create tmp file"
+ return 1
+ }
+ trap 'rm -f "$tmp"; exit 1' HUP INT QUIT PIPE TERM
+
+ # Do the actual work, producing a file "$tmp"
+ if uncompress -f -c < "$filez" | gzip -f -c $gzipflags > "$tmp"; then
+ if test $kflag -eq 1 && smaller "$filez" "$tmp"; then
+ echo -n "$prog: $filez is smaller than $filegz"
+ echo "; keeping it"
+ rm -f "$tmp"
+ return 0
+ fi
+ if ! checkfile "$tmp"; then
+ echo "$prog: integrity check of $tmp failed"
+ rm -f "$tmp"
+ return 1;
+ fi
+
+ # Try to keep the mode of the original file
+ if ! cp -fp "$filez" "$filegz"; then
+ echo "$prog: warning: could not keep mode of $filez"
+ fi
+ if ! cp "$tmp" "$filegz" 2> /dev/null; then
+ echo "$prog: warning: could not keep mode of $filez"
+ if ! cp -f "$tmp" "$filegz" 2> /dev/null; then
+ echo "$prog: could not copy $tmp to $filegz"
+ rm -f "$filegz" "$tmp"
+ return 1
+ fi
+ fi
+ if ! touch -fr "$filez" "$filegz"; then
+ echo -n "$prog: warning: could not keep timestamp of "
+ echo "$filez"
+ fi
+ rm -f "$filez" "$tmp"
+ else
+ echo "$prog: failed to process $filez"
+ rm -f "$tmp"
+ return 1
+ fi
+}
+
+prog=`basename "$0"`
+usage="usage: $prog [-ftv9K] file ..."
+
+fflag=0
+tflag=0
+kflag=0
+gzipflags=
+
+# -P flag is recognized to maintain compatibility, but ignored. Pipe mode is
+# always used
+while getopts :ftv9PK i; do
+ case $i in
+ f) fflag=1;;
+ t) tflag=1;;
+ v) gzipflags="-v $gzipflags";;
+ 9) gzipflags="-9 $gzipflags";;
+ P) ;;
+ K) kflag=1;;
+ \?) echo "$usage"; exit 1;;
+ esac
+done
+
+shift $((OPTIND - 1))
+
+if test $# -eq 0; then
+ echo "$usage"
+ exit 1
+fi
+
+rc=0
+
+while test $# -ne 0; do
+ if ! process "$1"; then
+ rc=$?
+ fi
+ shift
+done
+exit $rc
diff --git a/usr.bin/gzip/znew.1 b/usr.bin/gzip/znew.1
new file mode 100644
index 000000000000..0b97fe6f4a9f
--- /dev/null
+++ b/usr.bin/gzip/znew.1
@@ -0,0 +1,69 @@
+.\" $NetBSD: znew.1,v 1.2 2003/12/28 12:43:43 wiz Exp $
+.\" $OpenBSD: znew.1,v 1.1 2003/08/02 20:52:50 otto Exp $
+.\"
+.\" Copyright (c) 2003 Otto Moerbeek <otto@drijf.net>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.Dd January 26, 2007
+.Dt ZNEW 1
+.Os
+.Sh NAME
+.Nm znew
+.Nd convert compressed files to gzipped files
+.Sh SYNOPSIS
+.Nm
+.Op Fl ftv9K
+.Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility uncompresses files compressed by
+.Xr compress 1
+and recompresses them with
+.Xr gzip 1 .
+.Pp
+The options are as follows:
+.Bl -tag -width Ds
+.It Fl f
+Overwrite existing
+.Sq .gz
+files.
+Unless this option is specified,
+.Nm
+refuses to overwrite existing files.
+.It Fl t
+Test integrity of the gzipped file before deleting the original file.
+If the integrity check fails, the original
+.Sq .Z
+file is not removed.
+.It Fl v
+Print a report specifying the achieved compression ratios.
+.It Fl 9
+Use the -9 mode of
+.Xr gzip 1 ,
+achieving better compression at the cost of slower execution.
+.It Fl K
+Keep the original
+.Sq .Z
+file if it uses less disk blocks than the gzipped one.
+A disk block is 1024 bytes.
+.El
+.Sh SEE ALSO
+.Xr gzip 1
+.Sh CAVEATS
+The
+.Nm
+utility tries to maintain the file mode of the original file.
+If the original file is not writable, it is not able to do that and
+.Nm
+will print a warning.
diff --git a/usr.bin/gzip/zuncompress.c b/usr.bin/gzip/zuncompress.c
new file mode 100644
index 000000000000..79f3983037f7
--- /dev/null
+++ b/usr.bin/gzip/zuncompress.c
@@ -0,0 +1,397 @@
+/* $NetBSD: zuncompress.c,v 1.11 2011/08/16 13:55:02 joerg Exp $ */
+
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1985, 1986, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis and James A. Woods, derived from original
+ * work by Spencer Thomas and Joseph Orost.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: NetBSD: zopen.c,v 1.8 2003/08/07 11:13:29 agc Exp
+ */
+
+/* This file is #included by gzip.c */
+
+static int zread(void *, char *, int);
+
+#define tab_prefixof(i) (zs->zs_codetab[i])
+#define tab_suffixof(i) ((char_type *)(zs->zs_htab))[i]
+#define de_stack ((char_type *)&tab_suffixof(1 << BITS))
+
+#define BITS 16 /* Default bits. */
+#define HSIZE 69001 /* 95% occupancy */ /* XXX may not need HSIZE */
+#define BIT_MASK 0x1f /* Defines for third byte of header. */
+#define BLOCK_MASK 0x80
+#define CHECK_GAP 10000 /* Ratio check interval. */
+#define BUFSIZE (64 * 1024)
+
+/*
+ * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
+ * a fourth header byte (for expansion).
+ */
+#define INIT_BITS 9 /* Initial number of bits/code. */
+
+/*
+ * the next two codes should not be changed lightly, as they must not
+ * lie within the contiguous general code space.
+ */
+#define FIRST 257 /* First free entry. */
+#define CLEAR 256 /* Table clear output code. */
+
+
+#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
+
+typedef long code_int;
+typedef long count_int;
+typedef u_char char_type;
+
+static char_type magic_header[] =
+ {'\037', '\235'}; /* 1F 9D */
+
+static char_type rmask[9] =
+ {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
+
+static off_t total_compressed_bytes;
+static size_t compressed_prelen;
+static char *compressed_pre;
+
+struct s_zstate {
+ FILE *zs_fp; /* File stream for I/O */
+ char zs_mode; /* r or w */
+ enum {
+ S_START, S_MIDDLE, S_EOF
+ } zs_state; /* State of computation */
+ int zs_n_bits; /* Number of bits/code. */
+ int zs_maxbits; /* User settable max # bits/code. */
+ code_int zs_maxcode; /* Maximum code, given n_bits. */
+ code_int zs_maxmaxcode; /* Should NEVER generate this code. */
+ count_int zs_htab [HSIZE];
+ u_short zs_codetab [HSIZE];
+ code_int zs_hsize; /* For dynamic table sizing. */
+ code_int zs_free_ent; /* First unused entry. */
+ /*
+ * Block compression parameters -- after all codes are used up,
+ * and compression rate changes, start over.
+ */
+ int zs_block_compress;
+ int zs_clear_flg;
+ long zs_ratio;
+ count_int zs_checkpoint;
+ int zs_offset;
+ long zs_in_count; /* Length of input. */
+ long zs_bytes_out; /* Length of compressed output. */
+ long zs_out_count; /* # of codes output (for debugging). */
+ char_type zs_buf[BITS];
+ union {
+ struct {
+ long zs_fcode;
+ code_int zs_ent;
+ code_int zs_hsize_reg;
+ int zs_hshift;
+ } w; /* Write parameters */
+ struct {
+ char_type *zs_stackp;
+ int zs_finchar;
+ code_int zs_code, zs_oldcode, zs_incode;
+ int zs_roffset, zs_size;
+ char_type zs_gbuf[BITS];
+ } r; /* Read parameters */
+ } u;
+};
+
+static code_int getcode(struct s_zstate *zs);
+
+static off_t
+zuncompress(FILE *in, FILE *out, char *pre, size_t prelen,
+ off_t *compressed_bytes)
+{
+ off_t bin, bout = 0;
+ char *buf;
+
+ buf = malloc(BUFSIZE);
+ if (buf == NULL)
+ return -1;
+
+ /* XXX */
+ compressed_prelen = prelen;
+ if (prelen != 0)
+ compressed_pre = pre;
+ else
+ compressed_pre = NULL;
+
+ while ((bin = fread(buf, 1, BUFSIZE, in)) != 0) {
+ if (tflag == 0 && (off_t)fwrite(buf, 1, bin, out) != bin) {
+ free(buf);
+ return -1;
+ }
+ bout += bin;
+ }
+
+ if (compressed_bytes)
+ *compressed_bytes = total_compressed_bytes;
+
+ free(buf);
+ return bout;
+}
+
+static int
+zclose(void *zs)
+{
+ free(zs);
+ /* We leave the caller to close the fd passed to zdopen() */
+ return 0;
+}
+
+FILE *
+zdopen(int fd)
+{
+ struct s_zstate *zs;
+
+ if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL)
+ return (NULL);
+
+ zs->zs_state = S_START;
+
+ /* XXX we can get rid of some of these */
+ zs->zs_hsize = HSIZE; /* For dynamic table sizing. */
+ zs->zs_free_ent = 0; /* First unused entry. */
+ zs->zs_block_compress = BLOCK_MASK;
+ zs->zs_clear_flg = 0; /* XXX we calloc()'d this structure why = 0? */
+ zs->zs_ratio = 0;
+ zs->zs_checkpoint = CHECK_GAP;
+ zs->zs_in_count = 1; /* Length of input. */
+ zs->zs_out_count = 0; /* # of codes output (for debugging). */
+ zs->u.r.zs_roffset = 0;
+ zs->u.r.zs_size = 0;
+
+ /*
+ * Layering compress on top of stdio in order to provide buffering,
+ * and ensure that reads and write work with the data specified.
+ */
+ if ((zs->zs_fp = fdopen(fd, "r")) == NULL) {
+ free(zs);
+ return NULL;
+ }
+
+ return funopen(zs, zread, NULL, NULL, zclose);
+}
+
+/*
+ * Decompress read. This routine adapts to the codes in the file building
+ * the "string" table on-the-fly; requiring no table to be stored in the
+ * compressed file. The tables used herein are shared with those of the
+ * compress() routine. See the definitions above.
+ */
+static int
+zread(void *cookie, char *rbp, int num)
+{
+ u_int count, i;
+ struct s_zstate *zs;
+ u_char *bp, header[3];
+
+ if (num == 0)
+ return (0);
+
+ zs = cookie;
+ count = num;
+ bp = (u_char *)rbp;
+ switch (zs->zs_state) {
+ case S_START:
+ zs->zs_state = S_MIDDLE;
+ break;
+ case S_MIDDLE:
+ goto middle;
+ case S_EOF:
+ goto eof;
+ }
+
+ /* Check the magic number */
+ for (i = 0; i < 3 && compressed_prelen; i++, compressed_prelen--)
+ header[i] = *compressed_pre++;
+
+ if (fread(header + i, 1, sizeof(header) - i, zs->zs_fp) !=
+ sizeof(header) - i ||
+ memcmp(header, magic_header, sizeof(magic_header)) != 0) {
+ errno = EFTYPE;
+ return (-1);
+ }
+ total_compressed_bytes = 0;
+ zs->zs_maxbits = header[2]; /* Set -b from file. */
+ zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK;
+ zs->zs_maxbits &= BIT_MASK;
+ zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
+ if (zs->zs_maxbits > BITS || zs->zs_maxbits < 12) {
+ errno = EFTYPE;
+ return (-1);
+ }
+ /* As above, initialize the first 256 entries in the table. */
+ zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
+ for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; zs->u.r.zs_code--) {
+ tab_prefixof(zs->u.r.zs_code) = 0;
+ tab_suffixof(zs->u.r.zs_code) = (char_type) zs->u.r.zs_code;
+ }
+ zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256;
+
+ zs->u.r.zs_oldcode = -1;
+ zs->u.r.zs_stackp = de_stack;
+
+ while ((zs->u.r.zs_code = getcode(zs)) > -1) {
+
+ if ((zs->u.r.zs_code == CLEAR) && zs->zs_block_compress) {
+ for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0;
+ zs->u.r.zs_code--)
+ tab_prefixof(zs->u.r.zs_code) = 0;
+ zs->zs_clear_flg = 1;
+ zs->zs_free_ent = FIRST;
+ zs->u.r.zs_oldcode = -1;
+ continue;
+ }
+ zs->u.r.zs_incode = zs->u.r.zs_code;
+
+ /* Special case for KwKwK string. */
+ if (zs->u.r.zs_code >= zs->zs_free_ent) {
+ if (zs->u.r.zs_code > zs->zs_free_ent ||
+ zs->u.r.zs_oldcode == -1) {
+ /* Bad stream. */
+ errno = EFTYPE;
+ return (-1);
+ }
+ *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar;
+ zs->u.r.zs_code = zs->u.r.zs_oldcode;
+ }
+ /*
+ * The above condition ensures that code < free_ent.
+ * The construction of tab_prefixof in turn guarantees that
+ * each iteration decreases code and therefore stack usage is
+ * bound by 1 << BITS - 256.
+ */
+
+ /* Generate output characters in reverse order. */
+ while (zs->u.r.zs_code >= 256) {
+ *zs->u.r.zs_stackp++ = tab_suffixof(zs->u.r.zs_code);
+ zs->u.r.zs_code = tab_prefixof(zs->u.r.zs_code);
+ }
+ *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar = tab_suffixof(zs->u.r.zs_code);
+
+ /* And put them out in forward order. */
+middle: do {
+ if (count-- == 0)
+ return (num);
+ *bp++ = *--zs->u.r.zs_stackp;
+ } while (zs->u.r.zs_stackp > de_stack);
+
+ /* Generate the new entry. */
+ if ((zs->u.r.zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode &&
+ zs->u.r.zs_oldcode != -1) {
+ tab_prefixof(zs->u.r.zs_code) = (u_short) zs->u.r.zs_oldcode;
+ tab_suffixof(zs->u.r.zs_code) = zs->u.r.zs_finchar;
+ zs->zs_free_ent = zs->u.r.zs_code + 1;
+ }
+
+ /* Remember previous code. */
+ zs->u.r.zs_oldcode = zs->u.r.zs_incode;
+ }
+ zs->zs_state = S_EOF;
+eof: return (num - count);
+}
+
+/*-
+ * Read one code from the standard input. If EOF, return -1.
+ * Inputs:
+ * stdin
+ * Outputs:
+ * code or -1 is returned.
+ */
+static code_int
+getcode(struct s_zstate *zs)
+{
+ code_int gcode;
+ int r_off, bits, i;
+ char_type *bp;
+
+ bp = zs->u.r.zs_gbuf;
+ if (zs->zs_clear_flg > 0 || zs->u.r.zs_roffset >= zs->u.r.zs_size ||
+ zs->zs_free_ent > zs->zs_maxcode) {
+ /*
+ * If the next entry will be too big for the current gcode
+ * size, then we must increase the size. This implies reading
+ * a new buffer full, too.
+ */
+ if (zs->zs_free_ent > zs->zs_maxcode) {
+ zs->zs_n_bits++;
+ if (zs->zs_n_bits == zs->zs_maxbits) /* Won't get any bigger now. */
+ zs->zs_maxcode = zs->zs_maxmaxcode;
+ else
+ zs->zs_maxcode = MAXCODE(zs->zs_n_bits);
+ }
+ if (zs->zs_clear_flg > 0) {
+ zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
+ zs->zs_clear_flg = 0;
+ }
+ /* XXX */
+ for (i = 0; i < zs->zs_n_bits && compressed_prelen; i++, compressed_prelen--)
+ zs->u.r.zs_gbuf[i] = *compressed_pre++;
+ zs->u.r.zs_size = fread(zs->u.r.zs_gbuf + i, 1, zs->zs_n_bits - i, zs->zs_fp);
+ zs->u.r.zs_size += i;
+ if (zs->u.r.zs_size <= 0) /* End of file. */
+ return (-1);
+ zs->u.r.zs_roffset = 0;
+
+ total_compressed_bytes += zs->u.r.zs_size;
+
+ /* Round size down to integral number of codes. */
+ zs->u.r.zs_size = (zs->u.r.zs_size << 3) - (zs->zs_n_bits - 1);
+ }
+ r_off = zs->u.r.zs_roffset;
+ bits = zs->zs_n_bits;
+
+ /* Get to the first byte. */
+ bp += (r_off >> 3);
+ r_off &= 7;
+
+ /* Get first part (low order bits). */
+ gcode = (*bp++ >> r_off);
+ bits -= (8 - r_off);
+ r_off = 8 - r_off; /* Now, roffset into gcode word. */
+
+ /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
+ if (bits >= 8) {
+ gcode |= *bp++ << r_off;
+ r_off += 8;
+ bits -= 8;
+ }
+
+ /* High order bits. */
+ gcode |= (*bp & rmask[bits]) << r_off;
+ zs->u.r.zs_roffset += zs->zs_n_bits;
+
+ return (gcode);
+}
+