diff options
Diffstat (limited to 'usr.bin/gzip')
-rw-r--r-- | usr.bin/gzip/Makefile | 36 | ||||
-rw-r--r-- | usr.bin/gzip/Makefile.depend | 18 | ||||
-rw-r--r-- | usr.bin/gzip/Makefile.depend.options | 7 | ||||
-rw-r--r-- | usr.bin/gzip/gzexe | 178 | ||||
-rw-r--r-- | usr.bin/gzip/gzexe.1 | 71 | ||||
-rw-r--r-- | usr.bin/gzip/gzip.1 | 246 | ||||
-rw-r--r-- | usr.bin/gzip/gzip.c | 2261 | ||||
-rw-r--r-- | usr.bin/gzip/tests/Makefile | 12 | ||||
-rw-r--r-- | usr.bin/gzip/tests/Makefile.depend | 10 | ||||
-rw-r--r-- | usr.bin/gzip/tests/foo.diff | 2 | ||||
-rw-r--r-- | usr.bin/gzip/tests/zdiff_test.sh | 125 | ||||
-rw-r--r-- | usr.bin/gzip/unbzip2.c | 143 | ||||
-rw-r--r-- | usr.bin/gzip/unlz.c | 642 | ||||
-rw-r--r-- | usr.bin/gzip/unpack.c | 334 | ||||
-rw-r--r-- | usr.bin/gzip/unxz.c | 474 | ||||
-rw-r--r-- | usr.bin/gzip/unzstd.c | 89 | ||||
-rw-r--r-- | usr.bin/gzip/zdiff | 141 | ||||
-rw-r--r-- | usr.bin/gzip/zdiff.1 | 140 | ||||
-rw-r--r-- | usr.bin/gzip/zforce | 54 | ||||
-rw-r--r-- | usr.bin/gzip/zforce.1 | 51 | ||||
-rw-r--r-- | usr.bin/gzip/zmore | 81 | ||||
-rw-r--r-- | usr.bin/gzip/zmore.1 | 108 | ||||
-rw-r--r-- | usr.bin/gzip/znew | 136 | ||||
-rw-r--r-- | usr.bin/gzip/znew.1 | 69 | ||||
-rw-r--r-- | usr.bin/gzip/zuncompress.c | 397 |
25 files changed, 5825 insertions, 0 deletions
diff --git a/usr.bin/gzip/Makefile b/usr.bin/gzip/Makefile new file mode 100644 index 000000000000..33fbdb85d78c --- /dev/null +++ b/usr.bin/gzip/Makefile @@ -0,0 +1,36 @@ +# $NetBSD: Makefile,v 1.18 2013/11/13 11:12:24 pettai Exp $ + +.include <src.opts.mk> + +PROG= gzip +MAN= gzip.1 gzexe.1 zdiff.1 zforce.1 zmore.1 znew.1 + +LIBADD= z lzma zstd + +.if ${MK_BZIP2_SUPPORT} != "no" +LIBADD+= bz2 +.else +CFLAGS+= -DNO_BZIP2_SUPPORT +.endif + +CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib + +SCRIPTS= gzexe zdiff zforce zmore znew + +MLINKS+= gzip.1 gunzip.1 \ + gzip.1 gzcat.1 \ + gzip.1 zcat.1 \ + zdiff.1 zcmp.1 \ + zdiff.1 xzdiff.1 \ + zmore.1 zless.1 + +LINKS+= ${BINDIR}/gzip ${BINDIR}/gunzip \ + ${BINDIR}/gzip ${BINDIR}/gzcat \ + ${BINDIR}/gzip ${BINDIR}/zcat \ + ${BINDIR}/zdiff ${BINDIR}/xzdiff \ + ${BINDIR}/zdiff ${BINDIR}/zcmp + +HAS_TESTS= +SUBDIR.${MK_TESTS}+= tests + +.include <bsd.prog.mk> diff --git a/usr.bin/gzip/Makefile.depend b/usr.bin/gzip/Makefile.depend new file mode 100644 index 000000000000..bc41ed39450b --- /dev/null +++ b/usr.bin/gzip/Makefile.depend @@ -0,0 +1,18 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/liblzma \ + lib/libz \ + lib/libzstd \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/usr.bin/gzip/Makefile.depend.options b/usr.bin/gzip/Makefile.depend.options new file mode 100644 index 000000000000..2370c8a69e35 --- /dev/null +++ b/usr.bin/gzip/Makefile.depend.options @@ -0,0 +1,7 @@ +# This file is not autogenerated - take care! + +DIRDEPS_OPTIONS= BZIP2_SUPPORT + +DIRDEPS.BZIP2_SUPPORT.yes= lib/libbz2 + +.include <dirdeps-options.mk> diff --git a/usr.bin/gzip/gzexe b/usr.bin/gzip/gzexe new file mode 100644 index 000000000000..ca96dfc83bce --- /dev/null +++ b/usr.bin/gzip/gzexe @@ -0,0 +1,178 @@ +#!/bin/sh - +# +# $NetBSD: gzexe,v 1.3 2004/05/01 08:22:41 wiz Exp $ +# $OpenBSD: gzexe,v 1.3 2003/08/05 18:22:17 deraadt Exp $ +# +#- +# Copyright (c) 2003 Otto Moerbeek <otto@drijf.net> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# + +# The number of lines plus one in the on-the-fly decompression script +lines=19 + +# A simple string to recognize already compressed files +magic="# compressed by gzexe" + +# Write the decompression script to stdout +header () { + # first section needs variable expansion, second not + cat <<- EOF + #!/bin/sh - + $magic + lines=$lines + EOF + cat <<- 'EOF' + prog=`/usr/bin/basename "$0"` + tmp=`/usr/bin/mktemp -d /tmp/gzexeXXXXXXXXXX` || { + /bin/echo "$prog: cannot create tmp dir"; exit 1 + } + trap '/bin/rm -rf "$tmp"' 0 + if /usr/bin/tail +$lines "$0" | + /usr/bin/gzip -dc > "$tmp/$prog" 2> /dev/null; then + /bin/chmod u+x "$tmp/$prog" + "$tmp/$prog" ${1+"$@"} + ret=$? + else + /bin/echo "$prog: cannot decompress $0" + ret=1 + fi + exit $ret + EOF +} + +# Test if a file is compressed by checking the magic line +compressed () { + test "X`sed -n 2p "$1" 2> /dev/null`" = "X$magic" +} + +# Decompress a file +decompress () { + tmp=`mktemp /tmp/gzexeXXXXXXXXXX` || { + echo "$prog: cannot create tmp file" + return 1 + } + if ! cp "$1" "$tmp"; then + echo "$prog: cannot copy $1 to $tmp" + rm -f "$tmp" + return 1 + fi + if ! tail +$lines "$tmp" | gzip -vdc > "$1"; then + echo "$prog: cannot decompress $1" + cp "$tmp" "$1" + rm -f "$tmp" + return 1 + fi +} + +# Perform some sanity checks on the file +check () { + if test ! -e "$1"; then + echo "$prog: cannot compress non-existing file $1" + return 1 + fi + + if test ! -f "$1"; then + echo "$prog: cannot compress non-regular file $1" + return 1 + fi + + case `basename "$1"` in + sh | mktemp | rm | echo | tail | gzip | chmod) + echo "$prog: cannot compress $1, I depend on it" + return 1 + esac + + if test ! -x "$1"; then + echo "$prog: cannot compress $1, it is not executable" + return 1 + fi + + if test -u "$1" -o -g "$1"; then + echo "$prog: cannot compress $1, it has an s bit set" + return 1 + fi +} + +# Compress a file +compress () { + tmp=`mktemp /tmp/gzexeXXXXXXXXXX` || { + echo "$prog: cannot create tmp file" + return 1 + } + if ! cp "$1" "$tmp"; then + echo "$prog: cannot copy $1 to $tmp" + rm -f "$tmp" + return 1 + fi + if ! cp "$1" "$1"~; then + echo "$prog: cannot create backup copy $1~" + rm -f "$1"~ "$tmp" + return 1 + fi + + # Use cp to overwrite the existing file preserving mode and owner + # if possible. If the file is not writable, this will produce an + # error. + + if header "$1" > "$tmp" && gzip -vc "$1" >> "$tmp"; then + if ! cp "$tmp" "$1"; then + echo "$prog: cannot copy $tmp to $1" + rm -f "$tmp" + return 1 + fi + else + echo "$prog: cannot compress $1" + rm -f "$1"~ "$tmp" + return 1 + fi +} + +# Is the -d flag specified? +dflag= + +# Return value +rc=0 + +if test "X$1" = X-d; then + dflag=1 + shift +fi + +prog=`basename "$0"` +USAGE="usage: $prog [-d] file ..." +if test $# -eq 0; then + echo $USAGE + exit 1 +fi + +while test $# -ne 0; do + if test $dflag; then + if ! compressed "$1"; then + echo "$prog: $1 is not compressed" + rc=1; + elif ! decompress "$1"; then + rc=$? + fi + else + if compressed "$1"; then + echo "$prog: $1 is already compressed" + rc=1; + elif ! check "$1" || ! compress "$1"; then + rc=$? + fi + fi + shift +done +exit $rc diff --git a/usr.bin/gzip/gzexe.1 b/usr.bin/gzip/gzexe.1 new file mode 100644 index 000000000000..7247714b1f05 --- /dev/null +++ b/usr.bin/gzip/gzexe.1 @@ -0,0 +1,71 @@ +.\" $NetBSD: gzexe.1,v 1.3 2003/12/28 12:49:41 wiz Exp $ +.\" $OpenBSD: gzexe.1,v 1.1 2003/07/31 07:32:47 otto Exp $ +.\" +.\" Copyright (c) 2003 Otto Moerbeek <otto@drijf.net> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.Dd January 26, 2007 +.Dt GZEXE 1 +.Os +.Sh NAME +.Nm gzexe +.Nd create auto-decompressing executables +.Sh SYNOPSIS +.Nm gzexe +.Op Fl d +.Ar +.Sh DESCRIPTION +The +.Nm +utility uses +.Xr gzip 1 +to compress executables, producing executables that decompress on-the-fly +when executed. +This saves disk space, at the cost of slower execution times. +The original executables are saved by copying each of them to a file with +the same name with a +.Sq ~ +suffix appended. +After verifying that the compressed executables work as expected, the backup +files can be removed. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl d +Decompress executables previously compressed by +.Nm . +.El +.Pp +The +.Nm +program refuses to compress non-regular or non-executable files, +files with a setuid or setgid bit set, files that are already +compressed using +.Nm +or programs it needs to perform on-the-fly decompression: +.Xr sh 1 , +.Xr mktemp 1 , +.Xr rm 1 , +.Xr echo 1 , +.Xr tail 1 , +.Xr gzip 1 , +and +.Xr chmod 1 . +.Sh SEE ALSO +.Xr gzip 1 +.Sh CAVEATS +The +.Nm +utility replaces files by overwriting them with the generated +compressed executable. +To be able to do this, it is required that the original files are writable. diff --git a/usr.bin/gzip/gzip.1 b/usr.bin/gzip/gzip.1 new file mode 100644 index 000000000000..86df53f59669 --- /dev/null +++ b/usr.bin/gzip/gzip.1 @@ -0,0 +1,246 @@ +.\" $NetBSD: gzip.1,v 1.31 2018/10/26 22:10:15 christos Exp $ +.\" +.\" Copyright (c) 1997, 2003, 2004, 2008, 2009, 2015, 2017 Matthew R. Green +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.Dd November 2, 2022 +.Dt GZIP 1 +.Os +.Sh NAME +.Nm gzip , +.Nm gunzip , +.Nm zcat +.Nd compression/decompression tool using Lempel-Ziv coding (LZ77) +.Sh SYNOPSIS +.Nm +.Op Fl cdfhkLlNnqrtVv +.Op Fl S Ar suffix +.Ar file +.Oo +.Ar file Oo ... +.Oc +.Oc +.Nm gunzip +.Op Fl cfhkLNqrtVv +.Op Fl S Ar suffix +.Ar file +.Oo +.Ar file Oo ... +.Oc +.Oc +.Nm zcat +.Op Fl fhV +.Ar file +.Oo +.Ar file Oo ... +.Oc +.Oc +.Sh DESCRIPTION +The +.Nm +program compresses and decompresses files using Lempel-Ziv coding +(LZ77). +If no +.Ar files +are specified, +.Nm +will compress from standard input, or decompress to standard output. +When in compression mode, each +.Ar file +will be replaced with another file with the suffix, set by the +.Fl S Ar suffix +option, added, if possible. +.Pp +In decompression mode, each +.Ar file +will be checked for existence, as will the file with the suffix +added. +Each +.Ar file +argument must contain a separate complete archive; +when multiple +.Ar files +are indicated, each is decompressed in turn. +.Pp +In the case of +.Nm gzcat +the resulting data is then concatenated in the manner of +.Xr cat 1 . +.Pp +If invoked as +.Nm gunzip +then the +.Fl d +option is enabled. +If invoked as +.Nm zcat +or +.Nm gzcat +then both the +.Fl c +and +.Fl d +options are enabled. +.Pp +This version of +.Nm +is also capable of decompressing files compressed using +.Xr compress 1 , +.Xr bzip2 1 , +.Ar lzip , +.Xr zstd 1 , +or +.Xr xz 1 . +.Sh OPTIONS +The following options are available: +.Bl -tag -width XXrXXXrecursiveX +.It Fl 1 , Fl Fl fast +.It Fl 2 , 3 , 4 , 5 , 6 , 7 , 8 +.It Fl 9 , Fl Fl best +These options change the compression level used, with the +.Fl 1 +option being the fastest, with less compression, and the +.Fl 9 +option being the slowest, with optimal compression. +The default compression level is 6. +.It Fl c , Fl Fl stdout , Fl Fl to-stdout +This option specifies that output will go to the standard output +stream, leaving files intact. +.It Fl d , Fl Fl decompress , Fl Fl uncompress +This option selects decompression rather than compression. +.It Fl f , Fl Fl force +This option turns on force mode. +This allows files with multiple links, symbolic links to regular files, +overwriting of pre-existing files, reading from or writing to a terminal, +and when combined with the +.Fl c +option, allowing non-compressed data to pass through unchanged. +.It Fl h , Fl Fl help +This option prints a usage summary and exits. +.It Fl k , Fl Fl keep +This option prevents +.Nm +from deleting input files after (de)compression. +.It Fl L , -license +This option prints +.Nm +license. +.It Fl l , Fl Fl list +This option displays information about the file's compressed and +uncompressed size, ratio, uncompressed name. +With the +.Fl v +option, it also displays the compression method, CRC, date and time +embedded in the file. +.It Fl N , Fl Fl name +This option causes the stored filename in the input file to be used +as the output file. +.It Fl n , Fl Fl no-name +This option stops the filename and timestamp from being stored in +the output file. +.It Fl q , Fl Fl quiet +With this option, no warnings or errors are printed. +.It Fl r , Fl Fl recursive +This option is used to +.Nm +the files in a directory tree individually, using the +.Xr fts 3 +library. +.It Fl S Ar suffix , Fl Fl suffix Ar suffix +This option changes the default suffix from .gz to +.Ar suffix . +.It Fl t , Fl Fl test +This option will test compressed files for integrity. +.It Fl V , Fl Fl version +This option prints the version of the +.Nm +program. +.It Fl v , Fl Fl verbose +This option turns on verbose mode, which prints the compression +ratio for each file compressed. +.El +.Sh ENVIRONMENT +If the environment variable +.Ev GZIP +is set, it is parsed as a white-space separated list of options +handled before any options on the command line. +Options on the command line will override anything in +.Ev GZIP . +.Sh EXIT STATUS +The +.Nm +utility exits 0 on success, +1 on errors, +and 2 if a warning occurs. +.Sh SIGNALS +.Nm +responds to the following signals: +.Bl -tag -width indent +.It Dv SIGINFO +Report progress to standard error. +.El +.Sh SEE ALSO +.Xr bzip2 1 , +.Xr compress 1 , +.Xr xz 1 , +.Xr zstd 1 , +.Xr fts 3 , +.Xr zlib 3 +.Sh HISTORY +The +.Nm +program was originally written by Jean-loup Gailly, licensed under +the GNU Public Licence. +Matthew R. Green wrote a simple front end for +.Nx 1.3 +distribution media, based on the freely re-distributable zlib library. +It was enhanced to be mostly feature-compatible with the original +GNU +.Nm +program for +.Nx 2.0 . +.Pp +This implementation of +.Nm +was ported based on the +.Nx +.Nm +version 20181111, +and first appeared in +.Fx 7.0 . +.Sh AUTHORS +.An -nosplit +This implementation of +.Nm +was written by +.An Matthew R. Green Aq Mt mrg@eterna.com.au +with unpack support written by +.An Xin LI Aq Mt delphij@FreeBSD.org . +.Sh BUGS +According to RFC 1952, the recorded file size is stored in a 32-bit +integer, therefore, it cannot represent files larger than 4GB. +This limitation also applies to +.Fl l +option of +.Nm +utility. diff --git a/usr.bin/gzip/gzip.c b/usr.bin/gzip/gzip.c new file mode 100644 index 000000000000..fd8026af6a81 --- /dev/null +++ b/usr.bin/gzip/gzip.c @@ -0,0 +1,2261 @@ +/* $NetBSD: gzip.c,v 1.116 2018/10/27 11:39:12 skrll Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 1997, 1998, 2003, 2004, 2006, 2008, 2009, 2010, 2011, 2015, 2017 + * Matthew R. Green + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * gzip.c -- GPL free gzip using zlib. + * + * RFC 1950 covers the zlib format + * RFC 1951 covers the deflate format + * RFC 1952 covers the gzip format + * + * TODO: + * - use mmap where possible + * - make bzip2/compress -v/-t/-l support work as well as possible + */ + +#include <sys/endian.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/time.h> + +#include <inttypes.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <zlib.h> +#include <fts.h> +#include <libgen.h> +#include <stdarg.h> +#include <getopt.h> +#include <time.h> + +/* what type of file are we dealing with */ +enum filetype { + FT_GZIP, +#ifndef NO_BZIP2_SUPPORT + FT_BZIP2, +#endif +#ifndef NO_COMPRESS_SUPPORT + FT_Z, +#endif +#ifndef NO_PACK_SUPPORT + FT_PACK, +#endif +#ifndef NO_XZ_SUPPORT + FT_XZ, +#endif +#ifndef NO_LZ_SUPPORT + FT_LZ, +#endif +#ifndef NO_ZSTD_SUPPORT + FT_ZSTD, +#endif + FT_LAST, + FT_UNKNOWN +}; + +#ifndef NO_BZIP2_SUPPORT +#include <bzlib.h> + +#define BZ2_SUFFIX ".bz2" +#define BZIP2_MAGIC "BZh" +#endif + +#ifndef NO_COMPRESS_SUPPORT +#define Z_SUFFIX ".Z" +#define Z_MAGIC "\037\235" +#endif + +#ifndef NO_PACK_SUPPORT +#define PACK_MAGIC "\037\036" +#endif + +#ifndef NO_XZ_SUPPORT +#include <lzma.h> +#define XZ_SUFFIX ".xz" +#define XZ_MAGIC "\3757zXZ" +#endif + +#ifndef NO_LZ_SUPPORT +#define LZ_SUFFIX ".lz" +#define LZ_MAGIC "LZIP" +#endif + +#ifndef NO_ZSTD_SUPPORT +#include <zstd.h> +#define ZSTD_SUFFIX ".zst" +#define ZSTD_MAGIC "\050\265\057\375" +#endif + +#define GZ_SUFFIX ".gz" + +#define BUFLEN (64 * 1024) + +#define GZIP_MAGIC0 0x1F +#define GZIP_MAGIC1 0x8B +#define GZIP_OMAGIC1 0x9E + +#define GZIP_TIMESTAMP (off_t)4 +#define GZIP_ORIGNAME (off_t)10 + +#define HEAD_CRC 0x02 +#define EXTRA_FIELD 0x04 +#define ORIG_NAME 0x08 +#define COMMENT 0x10 + +#define OS_CODE 3 /* Unix */ + +typedef struct { + const char *zipped; + int ziplen; + const char *normal; /* for unzip - must not be longer than zipped */ +} suffixes_t; +static suffixes_t suffixes[] = { +#define SUFFIX(Z, N) {Z, sizeof Z - 1, N} + SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S .xxx */ + SUFFIX(GZ_SUFFIX, ""), + SUFFIX(".z", ""), + SUFFIX("-gz", ""), + SUFFIX("-z", ""), + SUFFIX("_z", ""), + SUFFIX(".taz", ".tar"), + SUFFIX(".tgz", ".tar"), +#ifndef NO_BZIP2_SUPPORT + SUFFIX(BZ2_SUFFIX, ""), + SUFFIX(".tbz", ".tar"), + SUFFIX(".tbz2", ".tar"), +#endif +#ifndef NO_COMPRESS_SUPPORT + SUFFIX(Z_SUFFIX, ""), +#endif +#ifndef NO_XZ_SUPPORT + SUFFIX(XZ_SUFFIX, ""), +#endif +#ifndef NO_LZ_SUPPORT + SUFFIX(LZ_SUFFIX, ""), +#endif +#ifndef NO_ZSTD_SUPPORT + SUFFIX(ZSTD_SUFFIX, ""), +#endif + SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S "" */ +#undef SUFFIX +}; +#define NUM_SUFFIXES (nitems(suffixes)) +#define SUFFIX_MAXLEN 30 + +static const char gzip_version[] = "FreeBSD gzip 20190107"; + +static const char gzip_copyright[] = \ +" Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green\n" +" All rights reserved.\n" +"\n" +" Redistribution and use in source and binary forms, with or without\n" +" modification, are permitted provided that the following conditions\n" +" are met:\n" +" 1. Redistributions of source code must retain the above copyright\n" +" notice, this list of conditions and the following disclaimer.\n" +" 2. Redistributions in binary form must reproduce the above copyright\n" +" notice, this list of conditions and the following disclaimer in the\n" +" documentation and/or other materials provided with the distribution.\n" +"\n" +" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n" +" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n" +" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n" +" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n" +" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n" +" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n" +" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n" +" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n" +" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n" +" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n" +" SUCH DAMAGE."; + +static int cflag; /* stdout mode */ +static int dflag; /* decompress mode */ +static int lflag; /* list mode */ +static int numflag = 6; /* gzip -1..-9 value */ + +static const char *remove_file = NULL; /* file to be removed upon SIGINT */ + +static int fflag; /* force mode */ +static int kflag; /* don't delete input files */ +static int nflag; /* don't save name/timestamp */ +static int Nflag; /* don't restore name/timestamp */ +static int qflag; /* quiet mode */ +static int rflag; /* recursive mode */ +static int tflag; /* test */ +static int vflag; /* verbose mode */ +static sig_atomic_t print_info = 0; + +static int exit_value = 0; /* exit value */ + +static const char *infile; /* name of file coming in */ + +static void maybe_err(const char *fmt, ...) __printflike(1, 2) __dead2; +#if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) || \ + !defined(NO_XZ_SUPPORT) || !defined(NO_ZSTD_SUPPORT) +static void maybe_errx(const char *fmt, ...) __printflike(1, 2) __dead2; +#endif +static void maybe_warn(const char *fmt, ...) __printflike(1, 2); +static void maybe_warnx(const char *fmt, ...) __printflike(1, 2); +static enum filetype file_gettype(u_char *); +static off_t gz_compress(int, int, off_t *, const char *, uint32_t); +static off_t gz_uncompress(int, int, char *, size_t, off_t *, const char *); +static off_t file_compress(char *, char *, size_t); +static off_t file_uncompress(char *, char *, size_t); +static void handle_pathname(char *); +static void handle_file(char *, struct stat *); +static void handle_stdin(void); +static void handle_stdout(void); +static void print_ratio(off_t, off_t, FILE *); +static void print_list(int fd, off_t, const char *, time_t); +static void usage(void) __dead2; +static void display_version(void) __dead2; +static void display_license(void); +static const suffixes_t *check_suffix(char *, int); +static ssize_t read_retry(int, void *, size_t); +static ssize_t write_retry(int, const void *, size_t); +static void print_list_out(off_t, off_t, const char*); + +static void infile_set(const char *newinfile, off_t total); + +static off_t infile_total; /* total expected to read/write */ +static off_t infile_current; /* current read/write */ + +static void check_siginfo(void); +static off_t cat_fd(unsigned char *, size_t, off_t *, int fd); +static void prepend_gzip(char *, int *, char ***); +static void handle_dir(char *); +static void print_verbage(const char *, const char *, off_t, off_t); +static void print_test(const char *, int); +static void copymodes(int fd, const struct stat *, const char *file); +static int check_outfile(const char *outfile); +static void setup_signals(void); +static void infile_newdata(size_t newdata); +static void infile_clear(void); + +#ifndef NO_BZIP2_SUPPORT +static off_t unbzip2(int, int, char *, size_t, off_t *); +#endif + +#ifndef NO_COMPRESS_SUPPORT +static FILE *zdopen(int); +static off_t zuncompress(FILE *, FILE *, char *, size_t, off_t *); +#endif + +#ifndef NO_PACK_SUPPORT +static off_t unpack(int, int, char *, size_t, off_t *); +#endif + +#ifndef NO_XZ_SUPPORT +static off_t unxz(int, int, char *, size_t, off_t *); +static off_t unxz_len(int); +#endif + +#ifndef NO_LZ_SUPPORT +static off_t unlz(int, int, char *, size_t, off_t *); +#endif + +#ifndef NO_ZSTD_SUPPORT +static off_t unzstd(int, int, char *, size_t, off_t *); +#endif + +static const struct option longopts[] = { + { "stdout", no_argument, 0, 'c' }, + { "to-stdout", no_argument, 0, 'c' }, + { "decompress", no_argument, 0, 'd' }, + { "uncompress", no_argument, 0, 'd' }, + { "force", no_argument, 0, 'f' }, + { "help", no_argument, 0, 'h' }, + { "keep", no_argument, 0, 'k' }, + { "list", no_argument, 0, 'l' }, + { "no-name", no_argument, 0, 'n' }, + { "name", no_argument, 0, 'N' }, + { "quiet", no_argument, 0, 'q' }, + { "recursive", no_argument, 0, 'r' }, + { "suffix", required_argument, 0, 'S' }, + { "test", no_argument, 0, 't' }, + { "verbose", no_argument, 0, 'v' }, + { "version", no_argument, 0, 'V' }, + { "fast", no_argument, 0, '1' }, + { "best", no_argument, 0, '9' }, + { "ascii", no_argument, 0, 'a' }, + { "license", no_argument, 0, 'L' }, + { NULL, no_argument, 0, 0 }, +}; + +int +main(int argc, char **argv) +{ + const char *progname = getprogname(); + char *gzip; + int len; + int ch; + + setup_signals(); + + if ((gzip = getenv("GZIP")) != NULL) + prepend_gzip(gzip, &argc, &argv); + + /* + * XXX + * handle being called `gunzip', `zcat' and `gzcat' + */ + if (strcmp(progname, "gunzip") == 0) + dflag = 1; + else if (strcmp(progname, "zcat") == 0 || + strcmp(progname, "gzcat") == 0) + dflag = cflag = 1; + +#define OPT_LIST "123456789acdfhklLNnqrS:tVv" + + while ((ch = getopt_long(argc, argv, OPT_LIST, longopts, NULL)) != -1) { + switch (ch) { + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + numflag = ch - '0'; + break; + case 'c': + cflag = 1; + break; + case 'd': + dflag = 1; + break; + case 'l': + lflag = 1; + dflag = 1; + break; + case 'V': + display_version(); + /* NOTREACHED */ + case 'a': + fprintf(stderr, "%s: option --ascii ignored on this system\n", progname); + break; + case 'f': + fflag = 1; + break; + case 'k': + kflag = 1; + break; + case 'L': + display_license(); + /* NOT REACHED */ + case 'N': + nflag = 0; + Nflag = 1; + break; + case 'n': + nflag = 1; + Nflag = 0; + break; + case 'q': + qflag = 1; + break; + case 'r': + rflag = 1; + break; + case 'S': + len = strlen(optarg); + if (len != 0) { + if (len > SUFFIX_MAXLEN) + errx(1, "incorrect suffix: '%s': too long", optarg); + suffixes[0].zipped = optarg; + suffixes[0].ziplen = len; + } else { + suffixes[NUM_SUFFIXES - 1].zipped = ""; + suffixes[NUM_SUFFIXES - 1].ziplen = 0; + } + break; + case 't': + cflag = 1; + tflag = 1; + dflag = 1; + break; + case 'v': + vflag = 1; + break; + default: + usage(); + /* NOTREACHED */ + } + } + argv += optind; + argc -= optind; + + if (argc == 0) { + if (dflag) /* stdin mode */ + handle_stdin(); + else /* stdout mode */ + handle_stdout(); + } else { + do { + handle_pathname(argv[0]); + } while (*++argv); + } + if (qflag == 0 && lflag && argc > 1) + print_list(-1, 0, "(totals)", 0); + exit(exit_value); +} + +/* maybe print a warning */ +void +maybe_warn(const char *fmt, ...) +{ + va_list ap; + + if (qflag == 0) { + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + } + if (exit_value == 0) + exit_value = 1; +} + +/* ... without an errno. */ +void +maybe_warnx(const char *fmt, ...) +{ + va_list ap; + + if (qflag == 0) { + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); + } + if (exit_value == 0) + exit_value = 1; +} + +/* maybe print an error */ +void +maybe_err(const char *fmt, ...) +{ + va_list ap; + + if (qflag == 0) { + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + } + exit(2); +} + +#if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) || \ + !defined(NO_XZ_SUPPORT) || !defined(NO_ZSTD_SUPPORT) +/* ... without an errno. */ +void +maybe_errx(const char *fmt, ...) +{ + va_list ap; + + if (qflag == 0) { + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); + } + exit(2); +} +#endif + +/* split up $GZIP and prepend it to the argument list */ +static void +prepend_gzip(char *gzip, int *argc, char ***argv) +{ + char *s, **nargv, **ac; + int nenvarg = 0, i; + + /* scan how many arguments there are */ + for (s = gzip;;) { + while (*s == ' ' || *s == '\t') + s++; + if (*s == 0) + goto count_done; + nenvarg++; + while (*s != ' ' && *s != '\t') + if (*s++ == 0) + goto count_done; + } +count_done: + /* punt early */ + if (nenvarg == 0) + return; + + *argc += nenvarg; + ac = *argv; + + nargv = (char **)malloc((*argc + 1) * sizeof(char *)); + if (nargv == NULL) + maybe_err("malloc"); + + /* stash this away */ + *argv = nargv; + + /* copy the program name first */ + i = 0; + nargv[i++] = *(ac++); + + /* take a copy of $GZIP and add it to the array */ + s = strdup(gzip); + if (s == NULL) + maybe_err("strdup"); + for (;;) { + /* Skip whitespaces. */ + while (*s == ' ' || *s == '\t') + s++; + if (*s == 0) + goto copy_done; + nargv[i++] = s; + /* Find the end of this argument. */ + while (*s != ' ' && *s != '\t') + if (*s++ == 0) + /* Argument followed by NUL. */ + goto copy_done; + /* Terminate by overwriting ' ' or '\t' with NUL. */ + *s++ = 0; + } +copy_done: + + /* copy the original arguments and a NULL */ + while (*ac) + nargv[i++] = *(ac++); + nargv[i] = NULL; +} + +/* compress input to output. Return bytes read, -1 on error */ +static off_t +gz_compress(int in, int out, off_t *gsizep, const char *origname, uint32_t mtime) +{ + z_stream z; + char *outbufp, *inbufp; + off_t in_tot = 0, out_tot = 0; + ssize_t in_size; + int i, error; + uLong crc; + + outbufp = malloc(BUFLEN); + inbufp = malloc(BUFLEN); + if (outbufp == NULL || inbufp == NULL) { + maybe_err("malloc failed"); + goto out; + } + + memset(&z, 0, sizeof z); + z.zalloc = Z_NULL; + z.zfree = Z_NULL; + z.opaque = 0; + + if (nflag != 0) { + mtime = 0; + origname = ""; + } + + i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c%c%c%s", + GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, + *origname ? ORIG_NAME : 0, + mtime & 0xff, + (mtime >> 8) & 0xff, + (mtime >> 16) & 0xff, + (mtime >> 24) & 0xff, + numflag == 1 ? 4 : numflag == 9 ? 2 : 0, + OS_CODE, origname); + if (i >= BUFLEN) + /* this need PATH_MAX > BUFLEN ... */ + maybe_err("snprintf"); + if (*origname) + i++; + + z.next_out = (unsigned char *)outbufp + i; + z.avail_out = BUFLEN - i; + + error = deflateInit2(&z, numflag, Z_DEFLATED, + (-MAX_WBITS), 8, Z_DEFAULT_STRATEGY); + if (error != Z_OK) { + maybe_warnx("deflateInit2 failed"); + in_tot = -1; + goto out; + } + + crc = crc32(0L, Z_NULL, 0); + for (;;) { + if (z.avail_out == 0) { + if (write_retry(out, outbufp, BUFLEN) != BUFLEN) { + maybe_warn("write"); + out_tot = -1; + goto out; + } + + out_tot += BUFLEN; + z.next_out = (unsigned char *)outbufp; + z.avail_out = BUFLEN; + } + + if (z.avail_in == 0) { + in_size = read(in, inbufp, BUFLEN); + if (in_size < 0) { + maybe_warn("read"); + in_tot = -1; + goto out; + } + if (in_size == 0) + break; + infile_newdata(in_size); + + crc = crc32(crc, (const Bytef *)inbufp, (unsigned)in_size); + in_tot += in_size; + z.next_in = (unsigned char *)inbufp; + z.avail_in = in_size; + } + + error = deflate(&z, Z_NO_FLUSH); + if (error != Z_OK && error != Z_STREAM_END) { + maybe_warnx("deflate failed"); + in_tot = -1; + goto out; + } + } + + /* clean up */ + for (;;) { + size_t len; + ssize_t w; + + error = deflate(&z, Z_FINISH); + if (error != Z_OK && error != Z_STREAM_END) { + maybe_warnx("deflate failed"); + in_tot = -1; + goto out; + } + + len = (char *)z.next_out - outbufp; + + w = write_retry(out, outbufp, len); + if (w == -1 || (size_t)w != len) { + maybe_warn("write"); + out_tot = -1; + goto out; + } + out_tot += len; + z.next_out = (unsigned char *)outbufp; + z.avail_out = BUFLEN; + + if (error == Z_STREAM_END) + break; + } + + if (deflateEnd(&z) != Z_OK) { + maybe_warnx("deflateEnd failed"); + in_tot = -1; + goto out; + } + + i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c", + (int)crc & 0xff, + (int)(crc >> 8) & 0xff, + (int)(crc >> 16) & 0xff, + (int)(crc >> 24) & 0xff, + (int)in_tot & 0xff, + (int)(in_tot >> 8) & 0xff, + (int)(in_tot >> 16) & 0xff, + (int)(in_tot >> 24) & 0xff); + if (i != 8) + maybe_err("snprintf"); + if (write_retry(out, outbufp, i) != i) { + maybe_warn("write"); + in_tot = -1; + } else + out_tot += i; + +out: + if (inbufp != NULL) + free(inbufp); + if (outbufp != NULL) + free(outbufp); + if (gsizep) + *gsizep = out_tot; + return in_tot; +} + +/* + * uncompress input to output then close the input. return the + * uncompressed size written, and put the compressed sized read + * into `*gsizep'. + */ +static off_t +gz_uncompress(int in, int out, char *pre, size_t prelen, off_t *gsizep, + const char *filename) +{ + z_stream z; + char *outbufp, *inbufp; + off_t out_tot = -1, in_tot = 0; + uint32_t out_sub_tot = 0; + enum { + GZSTATE_MAGIC0, + GZSTATE_MAGIC1, + GZSTATE_METHOD, + GZSTATE_FLAGS, + GZSTATE_SKIPPING, + GZSTATE_EXTRA, + GZSTATE_EXTRA2, + GZSTATE_EXTRA3, + GZSTATE_ORIGNAME, + GZSTATE_COMMENT, + GZSTATE_HEAD_CRC1, + GZSTATE_HEAD_CRC2, + GZSTATE_INIT, + GZSTATE_READ, + GZSTATE_CRC, + GZSTATE_LEN, + } state = GZSTATE_MAGIC0; + int flags = 0, skip_count = 0; + int error = Z_STREAM_ERROR, done_reading = 0; + uLong crc = 0; + ssize_t wr; + int needmore = 0; + +#define ADVANCE() { z.next_in++; z.avail_in--; } + + if ((outbufp = malloc(BUFLEN)) == NULL) { + maybe_err("malloc failed"); + goto out2; + } + if ((inbufp = malloc(BUFLEN)) == NULL) { + maybe_err("malloc failed"); + goto out1; + } + + memset(&z, 0, sizeof z); + z.avail_in = prelen; + z.next_in = (unsigned char *)pre; + z.avail_out = BUFLEN; + z.next_out = (unsigned char *)outbufp; + z.zalloc = NULL; + z.zfree = NULL; + z.opaque = 0; + + in_tot = prelen; + out_tot = 0; + + for (;;) { + check_siginfo(); + if ((z.avail_in == 0 || needmore) && done_reading == 0) { + ssize_t in_size; + + if (z.avail_in > 0) { + memmove(inbufp, z.next_in, z.avail_in); + } + z.next_in = (unsigned char *)inbufp; + in_size = read(in, z.next_in + z.avail_in, + BUFLEN - z.avail_in); + + if (in_size == -1) { + maybe_warn("failed to read stdin"); + goto stop_and_fail; + } else if (in_size == 0) { + done_reading = 1; + } + infile_newdata(in_size); + + z.avail_in += in_size; + needmore = 0; + + in_tot += in_size; + } + if (z.avail_in == 0) { + if (done_reading && state != GZSTATE_MAGIC0) { + maybe_warnx("%s: unexpected end of file", + filename); + goto stop_and_fail; + } + goto stop; + } + switch (state) { + case GZSTATE_MAGIC0: + if (*z.next_in != GZIP_MAGIC0) { + if (in_tot > 0) { + maybe_warnx("%s: trailing garbage " + "ignored", filename); + exit_value = 2; + goto stop; + } + maybe_warnx("input not gziped (MAGIC0)"); + goto stop_and_fail; + } + ADVANCE(); + state++; + out_sub_tot = 0; + crc = crc32(0L, Z_NULL, 0); + break; + + case GZSTATE_MAGIC1: + if (*z.next_in != GZIP_MAGIC1 && + *z.next_in != GZIP_OMAGIC1) { + maybe_warnx("input not gziped (MAGIC1)"); + goto stop_and_fail; + } + ADVANCE(); + state++; + break; + + case GZSTATE_METHOD: + if (*z.next_in != Z_DEFLATED) { + maybe_warnx("unknown compression method"); + goto stop_and_fail; + } + ADVANCE(); + state++; + break; + + case GZSTATE_FLAGS: + flags = *z.next_in; + ADVANCE(); + skip_count = 6; + state++; + break; + + case GZSTATE_SKIPPING: + if (skip_count > 0) { + skip_count--; + ADVANCE(); + } else + state++; + break; + + case GZSTATE_EXTRA: + if ((flags & EXTRA_FIELD) == 0) { + state = GZSTATE_ORIGNAME; + break; + } + skip_count = *z.next_in; + ADVANCE(); + state++; + break; + + case GZSTATE_EXTRA2: + skip_count |= ((*z.next_in) << 8); + ADVANCE(); + state++; + break; + + case GZSTATE_EXTRA3: + if (skip_count > 0) { + skip_count--; + ADVANCE(); + } else + state++; + break; + + case GZSTATE_ORIGNAME: + if ((flags & ORIG_NAME) == 0) { + state++; + break; + } + if (*z.next_in == 0) + state++; + ADVANCE(); + break; + + case GZSTATE_COMMENT: + if ((flags & COMMENT) == 0) { + state++; + break; + } + if (*z.next_in == 0) + state++; + ADVANCE(); + break; + + case GZSTATE_HEAD_CRC1: + if (flags & HEAD_CRC) + skip_count = 2; + else + skip_count = 0; + state++; + break; + + case GZSTATE_HEAD_CRC2: + if (skip_count > 0) { + skip_count--; + ADVANCE(); + } else + state++; + break; + + case GZSTATE_INIT: + if (inflateInit2(&z, -MAX_WBITS) != Z_OK) { + maybe_warnx("failed to inflateInit"); + goto stop_and_fail; + } + state++; + break; + + case GZSTATE_READ: + error = inflate(&z, Z_FINISH); + switch (error) { + /* Z_BUF_ERROR goes with Z_FINISH... */ + case Z_BUF_ERROR: + if (z.avail_out > 0 && !done_reading) + continue; + + case Z_STREAM_END: + case Z_OK: + break; + + case Z_NEED_DICT: + maybe_warnx("Z_NEED_DICT error"); + goto stop_and_fail; + case Z_DATA_ERROR: + maybe_warnx("data stream error"); + goto stop_and_fail; + case Z_STREAM_ERROR: + maybe_warnx("internal stream error"); + goto stop_and_fail; + case Z_MEM_ERROR: + maybe_warnx("memory allocation error"); + goto stop_and_fail; + + default: + maybe_warn("unknown error from inflate(): %d", + error); + } + wr = BUFLEN - z.avail_out; + + if (wr != 0) { + crc = crc32(crc, (const Bytef *)outbufp, (unsigned)wr); + if ( + /* don't write anything with -t */ + tflag == 0 && + write_retry(out, outbufp, wr) != wr) { + maybe_warn("error writing to output"); + goto stop_and_fail; + } + + out_tot += wr; + out_sub_tot += wr; + } + + if (error == Z_STREAM_END) { + inflateEnd(&z); + state++; + } + + z.next_out = (unsigned char *)outbufp; + z.avail_out = BUFLEN; + + break; + case GZSTATE_CRC: + { + uLong origcrc; + + if (z.avail_in < 4) { + if (!done_reading) { + needmore = 1; + continue; + } + maybe_warnx("truncated input"); + goto stop_and_fail; + } + origcrc = le32dec(&z.next_in[0]); + if (origcrc != crc) { + maybe_warnx("invalid compressed" + " data--crc error"); + goto stop_and_fail; + } + } + + z.avail_in -= 4; + z.next_in += 4; + + if (!z.avail_in && done_reading) { + goto stop; + } + state++; + break; + case GZSTATE_LEN: + { + uLong origlen; + + if (z.avail_in < 4) { + if (!done_reading) { + needmore = 1; + continue; + } + maybe_warnx("truncated input"); + goto stop_and_fail; + } + origlen = le32dec(&z.next_in[0]); + + if (origlen != out_sub_tot) { + maybe_warnx("invalid compressed" + " data--length error"); + goto stop_and_fail; + } + } + + z.avail_in -= 4; + z.next_in += 4; + + if (error < 0) { + maybe_warnx("decompression error"); + goto stop_and_fail; + } + state = GZSTATE_MAGIC0; + break; + } + continue; +stop_and_fail: + out_tot = -1; +stop: + break; + } + if (state > GZSTATE_INIT) + inflateEnd(&z); + + free(inbufp); +out1: + free(outbufp); +out2: + if (gsizep) + *gsizep = in_tot; + return (out_tot); +} + +/* + * set the owner, mode, flags & utimes using the given file descriptor. + * file is only used in possible warning messages. + */ +static void +copymodes(int fd, const struct stat *sbp, const char *file) +{ + struct timespec times[2]; + struct stat sb; + + /* + * If we have no info on the input, give this file some + * default values and return.. + */ + if (sbp == NULL) { + mode_t mask = umask(022); + + (void)fchmod(fd, DEFFILEMODE & ~mask); + (void)umask(mask); + return; + } + sb = *sbp; + + /* if the chown fails, remove set-id bits as-per compress(1) */ + if (fchown(fd, sb.st_uid, sb.st_gid) < 0) { + if (errno != EPERM) + maybe_warn("couldn't fchown: %s", file); + sb.st_mode &= ~(S_ISUID|S_ISGID); + } + + /* we only allow set-id and the 9 normal permission bits */ + sb.st_mode &= S_ISUID | S_ISGID | S_IRWXU | S_IRWXG | S_IRWXO; + if (fchmod(fd, sb.st_mode) < 0) + maybe_warn("couldn't fchmod: %s", file); + + times[0] = sb.st_atim; + times[1] = sb.st_mtim; + if (futimens(fd, times) < 0) + maybe_warn("couldn't futimens: %s", file); + + /* only try flags if they exist already */ + if (sb.st_flags != 0 && fchflags(fd, sb.st_flags) < 0) + maybe_warn("couldn't fchflags: %s", file); +} + +/* what sort of file is this? */ +static enum filetype +file_gettype(u_char *buf) +{ + + if (buf[0] == GZIP_MAGIC0 && + (buf[1] == GZIP_MAGIC1 || buf[1] == GZIP_OMAGIC1)) + return FT_GZIP; +#ifndef NO_BZIP2_SUPPORT + else if (memcmp(buf, BZIP2_MAGIC, 3) == 0 && + buf[3] >= '0' && buf[3] <= '9') + return FT_BZIP2; +#endif +#ifndef NO_COMPRESS_SUPPORT + else if (memcmp(buf, Z_MAGIC, 2) == 0) + return FT_Z; +#endif +#ifndef NO_PACK_SUPPORT + else if (memcmp(buf, PACK_MAGIC, 2) == 0) + return FT_PACK; +#endif +#ifndef NO_XZ_SUPPORT + else if (memcmp(buf, XZ_MAGIC, 4) == 0) /* XXX: We only have 4 bytes */ + return FT_XZ; +#endif +#ifndef NO_LZ_SUPPORT + else if (memcmp(buf, LZ_MAGIC, 4) == 0) + return FT_LZ; +#endif +#ifndef NO_ZSTD_SUPPORT + else if (memcmp(buf, ZSTD_MAGIC, 4) == 0) + return FT_ZSTD; +#endif + else + return FT_UNKNOWN; +} + +/* check the outfile is OK. */ +static int +check_outfile(const char *outfile) +{ + struct stat sb; + int ok = 1; + + if (lflag == 0 && stat(outfile, &sb) == 0) { + if (fflag) + unlink(outfile); + else if (isatty(STDIN_FILENO)) { + char ans[10] = { 'n', '\0' }; /* default */ + + fprintf(stderr, "%s already exists -- do you wish to " + "overwrite (y or n)? " , outfile); + (void)fgets(ans, sizeof(ans) - 1, stdin); + if (ans[0] != 'y' && ans[0] != 'Y') { + fprintf(stderr, "\tnot overwriting\n"); + ok = 0; + } else + unlink(outfile); + } else { + maybe_warnx("%s already exists -- skipping", outfile); + ok = 0; + } + } + return ok; +} + +static void +unlink_input(const char *file, const struct stat *sb) +{ + struct stat nsb; + + if (kflag) + return; + if (stat(file, &nsb) != 0) + /* Must be gone already */ + return; + if (nsb.st_dev != sb->st_dev || nsb.st_ino != sb->st_ino) + /* Definitely a different file */ + return; + unlink(file); +} + +static void +got_sigint(int signo __unused) +{ + + if (remove_file != NULL) + unlink(remove_file); + _exit(2); +} + +static void +got_siginfo(int signo __unused) +{ + + print_info = 1; +} + +static void +setup_signals(void) +{ + + signal(SIGINFO, got_siginfo); + signal(SIGINT, got_sigint); +} + +static void +infile_newdata(size_t newdata) +{ + + infile_current += newdata; +} + +static void +infile_set(const char *newinfile, off_t total) +{ + + if (newinfile) + infile = newinfile; + infile_total = total; +} + +static void +infile_clear(void) +{ + + infile = NULL; + infile_total = infile_current = 0; +} + +static const suffixes_t * +check_suffix(char *file, int xlate) +{ + const suffixes_t *s; + int len = strlen(file); + char *sp; + + for (s = suffixes; s != suffixes + NUM_SUFFIXES; s++) { + /* if it doesn't fit in "a.suf", don't bother */ + if (s->ziplen >= len) + continue; + sp = file + len - s->ziplen; + if (strcmp(s->zipped, sp) != 0) + continue; + if (xlate) + strcpy(sp, s->normal); + return s; + } + return NULL; +} + +/* + * compress the given file: create a corresponding .gz file and remove the + * original. + */ +static off_t +file_compress(char *file, char *outfile, size_t outsize) +{ + int in; + int out; + off_t size, in_size; + struct stat isb, osb; + const suffixes_t *suff; + + in = open(file, O_RDONLY); + if (in == -1) { + maybe_warn("can't open %s", file); + return (-1); + } + + if (fstat(in, &isb) != 0) { + maybe_warn("couldn't stat: %s", file); + close(in); + return (-1); + } + + if (fstat(in, &isb) != 0) { + close(in); + maybe_warn("can't stat %s", file); + return -1; + } + infile_set(file, isb.st_size); + + if (cflag == 0) { + if (isb.st_nlink > 1 && fflag == 0) { + maybe_warnx("%s has %ju other link%s -- " + "skipping", file, + (uintmax_t)isb.st_nlink - 1, + isb.st_nlink == 1 ? "" : "s"); + close(in); + return -1; + } + + if (fflag == 0 && (suff = check_suffix(file, 0)) && + suff->zipped[0] != 0) { + maybe_warnx("%s already has %s suffix -- unchanged", + file, suff->zipped); + close(in); + return (-1); + } + + /* Add (usually) .gz to filename */ + if ((size_t)snprintf(outfile, outsize, "%s%s", + file, suffixes[0].zipped) >= outsize) + memcpy(outfile + outsize - suffixes[0].ziplen - 1, + suffixes[0].zipped, suffixes[0].ziplen + 1); + + if (check_outfile(outfile) == 0) { + close(in); + return (-1); + } + } + + if (cflag == 0) { + out = open(outfile, O_WRONLY | O_CREAT | O_EXCL, 0600); + if (out == -1) { + maybe_warn("could not create output: %s", outfile); + fclose(stdin); + return (-1); + } + remove_file = outfile; + } else + out = STDOUT_FILENO; + + in_size = gz_compress(in, out, &size, basename(file), (uint32_t)isb.st_mtime); + + (void)close(in); + + /* + * If there was an error, in_size will be -1. + * If we compressed to stdout, just return the size. + * Otherwise stat the file and check it is the correct size. + * We only blow away the file if we can stat the output and it + * has the expected size. + */ + if (cflag != 0) + return in_size == -1 ? -1 : size; + + if (fstat(out, &osb) != 0) { + maybe_warn("couldn't stat: %s", outfile); + goto bad_outfile; + } + + if (osb.st_size != size) { + maybe_warnx("output file: %s wrong size (%ju != %ju), deleting", + outfile, (uintmax_t)osb.st_size, (uintmax_t)size); + goto bad_outfile; + } + + copymodes(out, &isb, outfile); + remove_file = NULL; + if (close(out) == -1) + maybe_warn("couldn't close output"); + + /* output is good, ok to delete input */ + unlink_input(file, &isb); + return (size); + + bad_outfile: + if (close(out) == -1) + maybe_warn("couldn't close output"); + + maybe_warnx("leaving original %s", file); + unlink(outfile); + return (size); +} + +/* uncompress the given file and remove the original */ +static off_t +file_uncompress(char *file, char *outfile, size_t outsize) +{ + struct stat isb, osb; + off_t size; + ssize_t rbytes; + unsigned char fourbytes[4]; + enum filetype method; + int fd, ofd, zfd = -1; + int error; + size_t in_size; + ssize_t rv; + time_t timestamp = 0; + char name[PATH_MAX + 1]; + + /* gather the old name info */ + + fd = open(file, O_RDONLY); + if (fd < 0) { + maybe_warn("can't open %s", file); + goto lose; + } + if (fstat(fd, &isb) != 0) { + maybe_warn("can't stat %s", file); + goto lose; + } + if (S_ISREG(isb.st_mode)) + in_size = isb.st_size; + else + in_size = 0; + infile_set(file, in_size); + + strlcpy(outfile, file, outsize); + if (check_suffix(outfile, 1) == NULL && !(cflag || lflag)) { + maybe_warnx("%s: unknown suffix -- ignored", file); + goto lose; + } + + rbytes = read(fd, fourbytes, sizeof fourbytes); + if (rbytes != sizeof fourbytes) { + /* we don't want to fail here. */ + if (fflag) + goto lose; + if (rbytes == -1) + maybe_warn("can't read %s", file); + else + goto unexpected_EOF; + goto lose; + } + infile_newdata(rbytes); + + method = file_gettype(fourbytes); + if (fflag == 0 && method == FT_UNKNOWN) { + maybe_warnx("%s: not in gzip format", file); + goto lose; + } + + + if (method == FT_GZIP && Nflag) { + unsigned char ts[4]; /* timestamp */ + + rv = pread(fd, ts, sizeof ts, GZIP_TIMESTAMP); + if (rv >= 0 && rv < (ssize_t)(sizeof ts)) + goto unexpected_EOF; + if (rv == -1) { + if (!fflag) + maybe_warn("can't read %s", file); + goto lose; + } + infile_newdata(rv); + timestamp = le32dec(&ts[0]); + + if (fourbytes[3] & ORIG_NAME) { + rbytes = pread(fd, name, sizeof(name) - 1, GZIP_ORIGNAME); + if (rbytes < 0) { + maybe_warn("can't read %s", file); + goto lose; + } + if (name[0] != '\0') { + char *dp, *nf; + + /* Make sure that name is NUL-terminated */ + name[rbytes] = '\0'; + + /* strip saved directory name */ + nf = strrchr(name, '/'); + if (nf == NULL) + nf = name; + else + nf++; + + /* preserve original directory name */ + dp = strrchr(file, '/'); + if (dp == NULL) + dp = file; + else + dp++; + snprintf(outfile, outsize, "%.*s%.*s", + (int) (dp - file), + file, (int) rbytes, nf); + } + } + } + lseek(fd, 0, SEEK_SET); + + if (cflag == 0 || lflag) { + if (isb.st_nlink > 1 && lflag == 0 && fflag == 0) { + maybe_warnx("%s has %ju other links -- skipping", + file, (uintmax_t)isb.st_nlink - 1); + goto lose; + } + if (nflag == 0 && timestamp) + isb.st_mtime = timestamp; + if (check_outfile(outfile) == 0) + goto lose; + } + + if (cflag) + zfd = STDOUT_FILENO; + else if (lflag) + zfd = -1; + else { + zfd = open(outfile, O_WRONLY|O_CREAT|O_EXCL, 0600); + if (zfd == STDOUT_FILENO) { + /* We won't close STDOUT_FILENO later... */ + zfd = dup(zfd); + close(STDOUT_FILENO); + } + if (zfd == -1) { + maybe_warn("can't open %s", outfile); + goto lose; + } + remove_file = outfile; + } + + switch (method) { +#ifndef NO_BZIP2_SUPPORT + case FT_BZIP2: + /* XXX */ + if (lflag) { + maybe_warnx("no -l with bzip2 files"); + goto lose; + } + + size = unbzip2(fd, zfd, NULL, 0, NULL); + break; +#endif + +#ifndef NO_COMPRESS_SUPPORT + case FT_Z: { + FILE *in, *out; + + /* XXX */ + if (lflag) { + maybe_warnx("no -l with Lempel-Ziv files"); + goto lose; + } + + if ((in = zdopen(fd)) == NULL) { + maybe_warn("zdopen for read: %s", file); + goto lose; + } + + out = fdopen(dup(zfd), "w"); + if (out == NULL) { + maybe_warn("fdopen for write: %s", outfile); + fclose(in); + goto lose; + } + + size = zuncompress(in, out, NULL, 0, NULL); + /* need to fclose() if ferror() is true... */ + error = ferror(in); + if (error | fclose(in)) { + if (error) + maybe_warn("failed infile"); + else + maybe_warn("failed infile fclose"); + if (cflag == 0) + unlink(outfile); + (void)fclose(out); + goto lose; + } + if (fclose(out) != 0) { + maybe_warn("failed outfile fclose"); + if (cflag == 0) + unlink(outfile); + goto lose; + } + break; + } +#endif + +#ifndef NO_PACK_SUPPORT + case FT_PACK: + if (lflag) { + maybe_warnx("no -l with packed files"); + goto lose; + } + + size = unpack(fd, zfd, NULL, 0, NULL); + break; +#endif + +#ifndef NO_XZ_SUPPORT + case FT_XZ: + if (lflag) { + size = unxz_len(fd); + if (!tflag) { + print_list_out(in_size, size, file); + close(fd); + return -1; + } + } else + size = unxz(fd, zfd, NULL, 0, NULL); + break; +#endif + +#ifndef NO_LZ_SUPPORT + case FT_LZ: + if (lflag) { + maybe_warnx("no -l with lzip files"); + goto lose; + } + size = unlz(fd, zfd, NULL, 0, NULL); + break; +#endif + +#ifndef NO_ZSTD_SUPPORT + case FT_ZSTD: + if (lflag) { + maybe_warnx("no -l with zstd files"); + goto lose; + } + size = unzstd(fd, zfd, NULL, 0, NULL); + break; +#endif + case FT_UNKNOWN: + if (lflag) { + maybe_warnx("no -l for unknown filetypes"); + goto lose; + } + size = cat_fd(NULL, 0, NULL, fd); + break; + default: + if (lflag) { + print_list(fd, in_size, outfile, isb.st_mtime); + if (!tflag) { + close(fd); + return -1; /* XXX */ + } + } + + size = gz_uncompress(fd, zfd, NULL, 0, NULL, file); + break; + } + + if (close(fd) != 0) + maybe_warn("couldn't close input"); + if (zfd != STDOUT_FILENO && close(zfd) != 0) + maybe_warn("couldn't close output"); + + if (size == -1) { + if (cflag == 0) + unlink(outfile); + maybe_warnx("%s: uncompress failed", file); + return -1; + } + + /* if testing, or we uncompressed to stdout, this is all we need */ + if (tflag) + return size; + /* if we are uncompressing to stdin, don't remove the file. */ + if (cflag) + return size; + + /* + * if we create a file... + */ + /* + * if we can't stat the file don't remove the file. + */ + + ofd = open(outfile, O_RDWR, 0); + if (ofd == -1) { + maybe_warn("couldn't open (leaving original): %s", + outfile); + return -1; + } + if (fstat(ofd, &osb) != 0) { + maybe_warn("couldn't stat (leaving original): %s", + outfile); + close(ofd); + return -1; + } + if (osb.st_size != size) { + maybe_warnx("stat gave different size: %ju != %ju (leaving original)", + (uintmax_t)size, (uintmax_t)osb.st_size); + close(ofd); + unlink(outfile); + return -1; + } + copymodes(ofd, &isb, outfile); + remove_file = NULL; + close(ofd); + unlink_input(file, &isb); + return size; + + unexpected_EOF: + maybe_warnx("%s: unexpected end of file", file); + lose: + if (fd != -1) + close(fd); + if (zfd != -1 && zfd != STDOUT_FILENO) + close(zfd); + return -1; +} + +static void +check_siginfo(void) +{ + if (print_info == 0) + return; + if (infile) { + if (infile_total) { + int pcent = (int)((100.0 * infile_current) / infile_total); + + fprintf(stderr, "%s: done %llu/%llu bytes %d%%\n", + infile, (unsigned long long)infile_current, + (unsigned long long)infile_total, pcent); + } else + fprintf(stderr, "%s: done %llu bytes\n", + infile, (unsigned long long)infile_current); + } + print_info = 0; +} + +static off_t +cat_fd(unsigned char * prepend, size_t count, off_t *gsizep, int fd) +{ + char buf[BUFLEN]; + off_t in_tot; + ssize_t w; + + in_tot = count; + w = write_retry(STDOUT_FILENO, prepend, count); + if (w == -1 || (size_t)w != count) { + maybe_warn("write to stdout"); + return -1; + } + for (;;) { + ssize_t rv; + + rv = read(fd, buf, sizeof buf); + if (rv == 0) + break; + if (rv < 0) { + maybe_warn("read from fd %d", fd); + break; + } + infile_newdata(rv); + + if (write_retry(STDOUT_FILENO, buf, rv) != rv) { + maybe_warn("write to stdout"); + break; + } + in_tot += rv; + } + + if (gsizep) + *gsizep = in_tot; + return (in_tot); +} + +static void +handle_stdin(void) +{ + struct stat isb; + unsigned char fourbytes[4]; + size_t in_size; + off_t usize, gsize; + enum filetype method; + ssize_t bytes_read; +#ifndef NO_COMPRESS_SUPPORT + FILE *in; +#endif + + if (fflag == 0 && lflag == 0 && isatty(STDIN_FILENO)) { + maybe_warnx("standard input is a terminal -- ignoring"); + goto out; + } + + if (fstat(STDIN_FILENO, &isb) < 0) { + maybe_warn("fstat"); + goto out; + } + if (S_ISREG(isb.st_mode)) + in_size = isb.st_size; + else + in_size = 0; + infile_set("(stdin)", in_size); + + if (lflag) { + print_list(STDIN_FILENO, in_size, infile, isb.st_mtime); + goto out; + } + + bytes_read = read_retry(STDIN_FILENO, fourbytes, sizeof fourbytes); + if (bytes_read == -1) { + maybe_warn("can't read stdin"); + goto out; + } else if (bytes_read != sizeof(fourbytes)) { + maybe_warnx("(stdin): unexpected end of file"); + goto out; + } + + method = file_gettype(fourbytes); + switch (method) { + default: + if (fflag == 0) { + maybe_warnx("unknown compression format"); + goto out; + } + usize = cat_fd(fourbytes, sizeof fourbytes, &gsize, STDIN_FILENO); + break; + case FT_GZIP: + usize = gz_uncompress(STDIN_FILENO, STDOUT_FILENO, + (char *)fourbytes, sizeof fourbytes, &gsize, "(stdin)"); + break; +#ifndef NO_BZIP2_SUPPORT + case FT_BZIP2: + usize = unbzip2(STDIN_FILENO, STDOUT_FILENO, + (char *)fourbytes, sizeof fourbytes, &gsize); + break; +#endif +#ifndef NO_COMPRESS_SUPPORT + case FT_Z: + if ((in = zdopen(STDIN_FILENO)) == NULL) { + maybe_warnx("zopen of stdin"); + goto out; + } + + usize = zuncompress(in, stdout, (char *)fourbytes, + sizeof fourbytes, &gsize); + fclose(in); + break; +#endif +#ifndef NO_PACK_SUPPORT + case FT_PACK: + usize = unpack(STDIN_FILENO, STDOUT_FILENO, + (char *)fourbytes, sizeof fourbytes, &gsize); + break; +#endif +#ifndef NO_XZ_SUPPORT + case FT_XZ: + usize = unxz(STDIN_FILENO, STDOUT_FILENO, + (char *)fourbytes, sizeof fourbytes, &gsize); + break; +#endif +#ifndef NO_LZ_SUPPORT + case FT_LZ: + usize = unlz(STDIN_FILENO, STDOUT_FILENO, + (char *)fourbytes, sizeof fourbytes, &gsize); + break; +#endif +#ifndef NO_ZSTD_SUPPORT + case FT_ZSTD: + usize = unzstd(STDIN_FILENO, STDOUT_FILENO, + (char *)fourbytes, sizeof fourbytes, &gsize); + break; +#endif + } + + if (vflag && !tflag && usize != -1 && gsize != -1) + print_verbage(NULL, NULL, usize, gsize); + if (vflag && tflag) + print_test("(stdin)", usize != -1); + +out: + infile_clear(); +} + +static void +handle_stdout(void) +{ + off_t gsize; + off_t usize; + struct stat sb; + time_t systime; + uint32_t mtime; + int ret; + + infile_set("(stdout)", 0); + + if (fflag == 0 && isatty(STDOUT_FILENO)) { + maybe_warnx("standard output is a terminal -- ignoring"); + return; + } + + /* If stdin is a file use its mtime, otherwise use current time */ + ret = fstat(STDIN_FILENO, &sb); + if (ret < 0) { + maybe_warn("Can't stat stdin"); + return; + } + + if (S_ISREG(sb.st_mode)) { + infile_set("(stdout)", sb.st_size); + mtime = (uint32_t)sb.st_mtime; + } else { + systime = time(NULL); + if (systime == -1) { + maybe_warn("time"); + return; + } + mtime = (uint32_t)systime; + } + + usize = + gz_compress(STDIN_FILENO, STDOUT_FILENO, &gsize, "", mtime); + if (vflag && !tflag && usize != -1 && gsize != -1) + print_verbage(NULL, NULL, usize, gsize); +} + +/* do what is asked for, for the path name */ +static void +handle_pathname(char *path) +{ + char *opath = path, *s = NULL; + ssize_t len; + int slen; + struct stat sb; + + /* check for stdout/stdin */ + if (path[0] == '-' && path[1] == '\0') { + if (dflag) + handle_stdin(); + else + handle_stdout(); + return; + } + +retry: + if (stat(path, &sb) != 0 || (fflag == 0 && cflag == 0 && + lstat(path, &sb) != 0)) { + /* lets try <path>.gz if we're decompressing */ + if (dflag && s == NULL && errno == ENOENT) { + len = strlen(path); + slen = suffixes[0].ziplen; + s = malloc(len + slen + 1); + if (s == NULL) + maybe_err("malloc"); + memcpy(s, path, len); + memcpy(s + len, suffixes[0].zipped, slen + 1); + path = s; + goto retry; + } + maybe_warn("can't stat: %s", opath); + goto out; + } + + if (S_ISDIR(sb.st_mode)) { + if (rflag) + handle_dir(path); + else + maybe_warnx("%s is a directory", path); + goto out; + } + + if (S_ISREG(sb.st_mode)) + handle_file(path, &sb); + else + maybe_warnx("%s is not a regular file", path); + +out: + if (s) + free(s); +} + +/* compress/decompress a file */ +static void +handle_file(char *file, struct stat *sbp) +{ + off_t usize, gsize; + char outfile[PATH_MAX]; + + infile_set(file, sbp->st_size); + if (dflag) { + usize = file_uncompress(file, outfile, sizeof(outfile)); + if (vflag && tflag) + print_test(file, usize != -1); + if (usize == -1) + return; + gsize = sbp->st_size; + } else { + gsize = file_compress(file, outfile, sizeof(outfile)); + if (gsize == -1) + return; + usize = sbp->st_size; + } + infile_clear(); + + if (vflag && !tflag) + print_verbage(file, (cflag) ? NULL : outfile, usize, gsize); +} + +/* this is used with -r to recursively descend directories */ +static void +handle_dir(char *dir) +{ + char *path_argv[2]; + FTS *fts; + FTSENT *entry; + + path_argv[0] = dir; + path_argv[1] = 0; + fts = fts_open(path_argv, FTS_PHYSICAL | FTS_NOCHDIR, NULL); + if (fts == NULL) { + warn("couldn't fts_open %s", dir); + return; + } + + while (errno = 0, (entry = fts_read(fts))) { + switch(entry->fts_info) { + case FTS_D: + case FTS_DP: + continue; + + case FTS_DNR: + case FTS_ERR: + case FTS_NS: + maybe_warn("%s", entry->fts_path); + continue; + case FTS_F: + handle_file(entry->fts_path, entry->fts_statp); + } + } + if (errno != 0) + warn("error with fts_read %s", dir); + (void)fts_close(fts); +} + +/* print a ratio - size reduction as a fraction of uncompressed size */ +static void +print_ratio(off_t in, off_t out, FILE *where) +{ + int percent10; /* 10 * percent */ + off_t diff; + char buff[8]; + int len; + + diff = in - out/2; + if (in == 0 && out == 0) + percent10 = 0; + else if (diff < 0) + /* + * Output is more than double size of input! print -99.9% + * Quite possibly we've failed to get the original size. + */ + percent10 = -999; + else { + /* + * We only need 12 bits of result from the final division, + * so reduce the values until a 32bit division will suffice. + */ + while (in > 0x100000) { + diff >>= 1; + in >>= 1; + } + if (in != 0) + percent10 = ((u_int)diff * 2000) / (u_int)in - 1000; + else + percent10 = 0; + } + + len = snprintf(buff, sizeof buff, "%2.2d.", percent10); + /* Move the '.' to before the last digit */ + buff[len - 1] = buff[len - 2]; + buff[len - 2] = '.'; + fprintf(where, "%5s%%", buff); +} + +/* print compression statistics, and the new name (if there is one!) */ +static void +print_verbage(const char *file, const char *nfile, off_t usize, off_t gsize) +{ + if (file) + fprintf(stderr, "%s:%s ", file, + strlen(file) < 7 ? "\t\t" : "\t"); + print_ratio(usize, gsize, stderr); + if (nfile) + fprintf(stderr, " -- replaced with %s", nfile); + fprintf(stderr, "\n"); + fflush(stderr); +} + +/* print test results */ +static void +print_test(const char *file, int ok) +{ + + if (exit_value == 0 && ok == 0) + exit_value = 1; + fprintf(stderr, "%s:%s %s\n", file, + strlen(file) < 7 ? "\t\t" : "\t", ok ? "OK" : "NOT OK"); + fflush(stderr); +} + +/* print a file's info ala --list */ +/* eg: + compressed uncompressed ratio uncompressed_name + 354841 1679360 78.8% /usr/pkgsrc/distfiles/libglade-2.0.1.tar +*/ +static void +print_list(int fd, off_t out, const char *outfile, time_t ts) +{ + static int first = 1; + static off_t in_tot, out_tot; + uint32_t crc = 0; + off_t in = 0, rv; + + if (first) { + if (vflag) + printf("method crc date time "); + if (qflag == 0) + printf(" compressed uncompressed " + "ratio uncompressed_name\n"); + } + first = 0; + + /* print totals? */ + if (fd == -1) { + in = in_tot; + out = out_tot; + } else + { + /* read the last 4 bytes - this is the uncompressed size */ + rv = lseek(fd, (off_t)(-8), SEEK_END); + if (rv != -1) { + unsigned char buf[8]; + uint32_t usize; + + rv = read(fd, (char *)buf, sizeof(buf)); + if (rv == -1) + maybe_warn("read of uncompressed size"); + else if (rv != sizeof(buf)) + maybe_warnx("read of uncompressed size"); + + else { + usize = le32dec(&buf[4]); + in = (off_t)usize; + crc = le32dec(&buf[0]); + } + } + } + + if (vflag && fd == -1) + printf(" "); + else if (vflag) { + char *date = ctime(&ts); + + /* skip the day, 1/100th second, and year */ + date += 4; + date[12] = 0; + printf("%5s %08x %11s ", "defla"/*XXX*/, crc, date); + } + in_tot += in; + out_tot += out; + print_list_out(out, in, outfile); +} + +static void +print_list_out(off_t out, off_t in, const char *outfile) +{ + printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in); + print_ratio(in, out, stdout); + printf(" %s\n", outfile); +} + +/* display the usage of NetBSD gzip */ +static void +usage(void) +{ + + fprintf(stderr, "%s\n", gzip_version); + fprintf(stderr, + "usage: %s [-123456789acdfhklLNnqrtVv] [-S .suffix] [<file> [<file> ...]]\n" + " -1 --fast fastest (worst) compression\n" + " -2 .. -8 set compression level\n" + " -9 --best best (slowest) compression\n" + " -c --stdout write to stdout, keep original files\n" + " --to-stdout\n" + " -d --decompress uncompress files\n" + " --uncompress\n" + " -f --force force overwriting & compress links\n" + " -h --help display this help\n" + " -k --keep don't delete input files during operation\n" + " -l --list list compressed file contents\n" + " -N --name save or restore original file name and time stamp\n" + " -n --no-name don't save original file name or time stamp\n" + " -q --quiet output no warnings\n" + " -r --recursive recursively compress files in directories\n" + " -S .suf use suffix .suf instead of .gz\n" + " --suffix .suf\n" + " -t --test test compressed file\n" + " -V --version display program version\n" + " -v --verbose print extra statistics\n", + getprogname()); + exit(0); +} + +/* display the license information of FreeBSD gzip */ +static void +display_license(void) +{ + + fprintf(stderr, "%s (based on NetBSD gzip 20150113)\n", gzip_version); + fprintf(stderr, "%s\n", gzip_copyright); + exit(0); +} + +/* display the version of NetBSD gzip */ +static void +display_version(void) +{ + + fprintf(stderr, "%s\n", gzip_version); + exit(0); +} + +#ifndef NO_BZIP2_SUPPORT +#include "unbzip2.c" +#endif +#ifndef NO_COMPRESS_SUPPORT +#include "zuncompress.c" +#endif +#ifndef NO_PACK_SUPPORT +#include "unpack.c" +#endif +#ifndef NO_XZ_SUPPORT +#include "unxz.c" +#endif +#ifndef NO_LZ_SUPPORT +#include "unlz.c" +#endif +#ifndef NO_ZSTD_SUPPORT +#include "unzstd.c" +#endif + +static ssize_t +read_retry(int fd, void *buf, size_t sz) +{ + char *cp = buf; + size_t left = MIN(sz, (size_t) SSIZE_MAX); + + while (left > 0) { + ssize_t ret; + + ret = read(fd, cp, left); + if (ret == -1) { + return ret; + } else if (ret == 0) { + break; /* EOF */ + } + cp += ret; + left -= ret; + } + + return sz - left; +} + +static ssize_t +write_retry(int fd, const void *buf, size_t sz) +{ + const char *cp = buf; + size_t left = MIN(sz, (size_t) SSIZE_MAX); + + while (left > 0) { + ssize_t ret; + + ret = write(fd, cp, left); + if (ret == -1) { + return ret; + } else if (ret == 0) { + abort(); /* Can't happen */ + } + cp += ret; + left -= ret; + } + + return sz - left; +} diff --git a/usr.bin/gzip/tests/Makefile b/usr.bin/gzip/tests/Makefile new file mode 100644 index 000000000000..96783eec23ea --- /dev/null +++ b/usr.bin/gzip/tests/Makefile @@ -0,0 +1,12 @@ +.include <bsd.own.mk> + +PACKAGE= tests + +ATF_TESTS_SH= zdiff_test +NETBSD_ATF_TESTS_SH= gzip_test + +${PACKAGE}FILES+= foo.diff + +.include <netbsd-tests.test.mk> + +.include <bsd.test.mk> diff --git a/usr.bin/gzip/tests/Makefile.depend b/usr.bin/gzip/tests/Makefile.depend new file mode 100644 index 000000000000..11aba52f82cf --- /dev/null +++ b/usr.bin/gzip/tests/Makefile.depend @@ -0,0 +1,10 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/usr.bin/gzip/tests/foo.diff b/usr.bin/gzip/tests/foo.diff new file mode 100644 index 000000000000..278d14088435 --- /dev/null +++ b/usr.bin/gzip/tests/foo.diff @@ -0,0 +1,2 @@ +1a2 +> bar diff --git a/usr.bin/gzip/tests/zdiff_test.sh b/usr.bin/gzip/tests/zdiff_test.sh new file mode 100644 index 000000000000..e260261fa291 --- /dev/null +++ b/usr.bin/gzip/tests/zdiff_test.sh @@ -0,0 +1,125 @@ +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2022 Kyle Evans <kevans@FreeBSD.org> +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# + +specials="foo'bar foo\"bar foo\$bar" + +prepare_files() +{ + compressfunc="$1" + compresssuffix="$2" + + echo "foo" > foo + + for f in $specials foo; do + [ "$f" == "foo" ] || cp foo "$f" + atf_check "$compressfunc" -k "$f" + atf_check tar -ckf "$f.tar" "$f" + atf_check -o save:"$f.$compresssuffix" \ + "$compressfunc" -c "$f.tar" + + # Regenerate $f.tar to create a diff from the .$compresssuffix + # file, too. + echo "bar" >> "$f" + atf_check tar -ckf "$f.tar" "$f" + done +} + +atf_test_case gzip +gzip_body() +{ + prepare_files gzip tgz + cp foo.gz foo.Z + + for f in foo $specials; do + atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff \ + zdiff "$f.gz" + done + + atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff zdiff foo.Z + + for f in foo $specials; do + rm "$f" + atf_check -s exit:1 -o match:"Binary files" zdiff "$f.tgz" + done +} + +atf_test_case bzip +bzip_body() +{ + prepare_files bzip2 tbz2 + cp foo.bz2 foo.bz + + for f in foo $specials; do + atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff \ + zdiff "$f.bz2" + done + + atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff zdiff foo.bz + + for f in foo $specials; do + rm "$f" + atf_check -s exit:1 -o match:"Binary files" zdiff "$f.tbz2" + done +} + +atf_test_case xzip +xzip_body() +{ + prepare_files xz txz + cp foo.xz foo.lzma + + for f in foo $specials; do + atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff \ + zdiff "$f.xz" + done + + atf_check -s exit:1 -o file:"$(atf_get_srcdir)"/foo.diff zdiff foo.lzma + + for f in foo $specials; do + rm "$f" + atf_check -s exit:1 -o match:"Binary files" zdiff "$f.txz" + done +} + +atf_test_case unknown +unknown_body() +{ + prepare_files xz fxz + + for f in foo $specials; do + atf_check -s exit:1 -e match:"unknown suffix$" zdiff "$f.fxz" + done +} + +atf_init_test_cases() +{ + + atf_add_test_case gzip + atf_add_test_case bzip + atf_add_test_case xzip + atf_add_test_case unknown +} diff --git a/usr.bin/gzip/unbzip2.c b/usr.bin/gzip/unbzip2.c new file mode 100644 index 000000000000..3fd74f1bac57 --- /dev/null +++ b/usr.bin/gzip/unbzip2.c @@ -0,0 +1,143 @@ +/* $NetBSD: unbzip2.c,v 1.14 2017/08/04 07:27:08 mrg Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2006 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Simon Burge. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* This file is #included by gzip.c */ + +static off_t +unbzip2(int in, int out, char *pre, size_t prelen, off_t *bytes_in) +{ + int ret, end_of_file, cold = 0; + off_t bytes_out = 0; + bz_stream bzs; + static char *inbuf, *outbuf; + + if (inbuf == NULL) + inbuf = malloc(BUFLEN); + if (outbuf == NULL) + outbuf = malloc(BUFLEN); + if (inbuf == NULL || outbuf == NULL) + maybe_err("malloc"); + + bzs.bzalloc = NULL; + bzs.bzfree = NULL; + bzs.opaque = NULL; + + end_of_file = 0; + ret = BZ2_bzDecompressInit(&bzs, 0, 0); + if (ret != BZ_OK) + maybe_errx("bzip2 init"); + + /* Prepend. */ + bzs.avail_in = prelen; + bzs.next_in = pre; + + if (bytes_in) + *bytes_in = prelen; + + while (ret == BZ_OK) { + check_siginfo(); + if (bzs.avail_in == 0 && !end_of_file) { + ssize_t n; + + n = read(in, inbuf, BUFLEN); + if (n < 0) + maybe_err("read"); + if (n == 0) + end_of_file = 1; + infile_newdata(n); + bzs.next_in = inbuf; + bzs.avail_in = n; + if (bytes_in) + *bytes_in += n; + } + + bzs.next_out = outbuf; + bzs.avail_out = BUFLEN; + ret = BZ2_bzDecompress(&bzs); + + switch (ret) { + case BZ_STREAM_END: + case BZ_OK: + if (ret == BZ_OK && end_of_file) { + /* + * If we hit this after a stream end, consider + * it as the end of the whole file and don't + * bail out. + */ + if (cold == 1) + ret = BZ_STREAM_END; + else + maybe_errx("truncated file"); + } + cold = 0; + if (!tflag && bzs.avail_out != BUFLEN) { + ssize_t n; + + n = write(out, outbuf, BUFLEN - bzs.avail_out); + if (n < 0) + maybe_err("write"); + bytes_out += n; + } + if (ret == BZ_STREAM_END && !end_of_file) { + if (BZ2_bzDecompressEnd(&bzs) != BZ_OK || + BZ2_bzDecompressInit(&bzs, 0, 0) != BZ_OK) + maybe_errx("bzip2 re-init"); + cold = 1; + ret = BZ_OK; + } + break; + + case BZ_DATA_ERROR: + maybe_warnx("bzip2 data integrity error"); + break; + + case BZ_DATA_ERROR_MAGIC: + maybe_warnx("bzip2 magic number error"); + break; + + case BZ_MEM_ERROR: + maybe_warnx("bzip2 out of memory"); + break; + + default: + maybe_warnx("unknown bzip2 error: %d", ret); + break; + } + } + + if (ret != BZ_STREAM_END || BZ2_bzDecompressEnd(&bzs) != BZ_OK) + return (-1); + + return (bytes_out); +} + diff --git a/usr.bin/gzip/unlz.c b/usr.bin/gzip/unlz.c new file mode 100644 index 000000000000..71e752ab1112 --- /dev/null +++ b/usr.bin/gzip/unlz.c @@ -0,0 +1,642 @@ +/* $NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $ */ + +/*- + * Copyright (c) 2018 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christos Zoulas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* Lzd - Educational decompressor for the lzip format + Copyright (C) 2013-2018 Antonio Diaz Diaz. + + This program is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include <sys/param.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <errno.h> +#include <unistd.h> + +#define LZ_STATES 12 + +#define LITERAL_CONTEXT_BITS 3 +#define POS_STATE_BITS 2 +#define POS_STATES (1 << POS_STATE_BITS) +#define POS_STATE_MASK (POS_STATES - 1) + +#define STATES 4 +#define DIS_SLOT_BITS 6 + +#define DIS_MODEL_START 4 +#define DIS_MODEL_END 14 + +#define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2)) +#define DIS_ALIGN_BITS 4 +#define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS) + +#define LOW_BITS 3 +#define MID_BITS 3 +#define HIGH_BITS 8 + +#define LOW_SYMBOLS (1 << LOW_BITS) +#define MID_SYMBOLS (1 << MID_BITS) +#define HIGH_SYMBOLS (1 << HIGH_BITS) + +#define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS) + +#define MIN_MATCH_LEN 2 + +#define BIT_MODEL_MOVE_BITS 5 +#define BIT_MODEL_TOTAL_BITS 11 +#define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS) +#define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2) + +static const int lz_st_next[] = { + 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, +}; + +static bool +lz_st_is_char(int st) { + return st < 7; +} + +static int +lz_st_get_char(int st) { + return lz_st_next[st]; +} + +static int +lz_st_get_match(int st) { + return st < 7 ? 7 : 10; +} + +static int +lz_st_get_rep(int st) { + return st < 7 ? 8 : 11; +} + +static int +lz_st_get_short_rep(int st) { + return st < 7 ? 9 : 11; +} + +struct lz_len_model { + int choice1; + int choice2; + int bm_low[POS_STATES][LOW_SYMBOLS]; + int bm_mid[POS_STATES][MID_SYMBOLS]; + int bm_high[HIGH_SYMBOLS]; +}; + +static uint32_t lz_crc[256]; + +static void +lz_crc_init(void) +{ + for (unsigned i = 0; i < nitems(lz_crc); i++) { + unsigned c = i; + for (unsigned j = 0; j < 8; j++) { + if (c & 1) + c = 0xEDB88320U ^ (c >> 1); + else + c >>= 1; + } + lz_crc[i] = c; + } +} + +static void +lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len) +{ + for (size_t i = 0; i < len; i++) + *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8); +} + +struct lz_range_decoder { + FILE *fp; + uint32_t code; + uint32_t range; +}; + +static int +lz_rd_create(struct lz_range_decoder *rd, FILE *fp) +{ + rd->fp = fp; + rd->code = 0; + rd->range = ~0; + for (int i = 0; i < 5; i++) + rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); + return ferror(rd->fp) ? -1 : 0; +} + +static unsigned +lz_rd_decode(struct lz_range_decoder *rd, int num_bits) +{ + unsigned symbol = 0; + + for (int i = num_bits; i > 0; i--) { + rd->range >>= 1; + symbol <<= 1; + if (rd->code >= rd->range) { + rd->code -= rd->range; + symbol |= 1; + } + if (rd->range <= 0x00FFFFFFU) { + rd->range <<= 8; + rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); + } + } + + return symbol; +} + +static unsigned +lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm) +{ + unsigned symbol; + const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm; + + if(rd->code < bound) { + rd->range = bound; + *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS; + symbol = 0; + } + else { + rd->range -= bound; + rd->code -= bound; + *bm -= *bm >> BIT_MODEL_MOVE_BITS; + symbol = 1; + } + + if (rd->range <= 0x00FFFFFFU) { + rd->range <<= 8; + rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); + } + return symbol; +} + +static unsigned +lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits) +{ + unsigned symbol = 1; + + for (int i = 0; i < num_bits; i++) + symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]); + + return symbol - (1 << num_bits); +} + +static unsigned +lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits) +{ + unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits); + unsigned reversed_symbol = 0; + + for (int i = 0; i < num_bits; i++) { + reversed_symbol = (reversed_symbol << 1) | (symbol & 1); + symbol >>= 1; + } + + return reversed_symbol; +} + +static unsigned +lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte) +{ + unsigned symbol = 1; + + for (int i = 7; i >= 0; i--) { + const unsigned match_bit = (match_byte >> i) & 1; + const unsigned bit = lz_rd_decode_bit(rd, + &bm[symbol + (match_bit << 8) + 0x100]); + symbol = (symbol << 1) | bit; + if (match_bit != bit) { + while (symbol < 0x100) { + symbol = (symbol << 1) | + lz_rd_decode_bit(rd, &bm[symbol]); + } + break; + } + } + return symbol & 0xFF; +} + +static unsigned +lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm, + int pos_state) +{ + if (lz_rd_decode_bit(rd, &lm->choice1) == 0) + return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS); + + if (lz_rd_decode_bit(rd, &lm->choice2) == 0) { + return LOW_SYMBOLS + + lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS); + } + + return LOW_SYMBOLS + MID_SYMBOLS + + lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS); +} + +struct lz_decoder { + FILE *fin, *fout; + off_t pos, ppos, spos, dict_size; + bool wrapped; + uint32_t crc; + uint8_t *obuf; + struct lz_range_decoder rdec; +}; + +static int +lz_flush(struct lz_decoder *lz) +{ + off_t offs = lz->pos - lz->spos; + if (offs <= 0) + return -1; + + size_t size = (size_t)offs; + lz_crc_update(&lz->crc, lz->obuf + lz->spos, size); + if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size) + return -1; + + lz->wrapped = lz->pos >= lz->dict_size; + if (lz->wrapped) { + lz->ppos += lz->pos; + lz->pos = 0; + } + lz->spos = lz->pos; + return 0; +} + +static void +lz_destroy(struct lz_decoder *lz) +{ + if (lz->fin) + fclose(lz->fin); + if (lz->fout) + fclose(lz->fout); + free(lz->obuf); +} + +static int +lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size) +{ + memset(lz, 0, sizeof(*lz)); + + lz->fin = fdopen(dup(fin), "r"); + if (lz->fin == NULL) + goto out; + + lz->fout = fdopen(dup(fdout), "w"); + if (lz->fout == NULL) + goto out; + + lz->pos = lz->ppos = lz->spos = 0; + lz->crc = ~0; + lz->dict_size = dict_size; + lz->wrapped = false; + + lz->obuf = malloc(dict_size); + if (lz->obuf == NULL) + goto out; + + if (lz_rd_create(&lz->rdec, lz->fin) == -1) + goto out; + return 0; +out: + lz_destroy(lz); + return -1; +} + +static uint8_t +lz_peek(const struct lz_decoder *lz, unsigned ahead) +{ + off_t diff = lz->pos - ahead - 1; + + if (diff >= 0) + return lz->obuf[diff]; + + if (lz->wrapped) + return lz->obuf[lz->dict_size + diff]; + + return 0; +} + +static void +lz_put(struct lz_decoder *lz, uint8_t b) +{ + lz->obuf[lz->pos++] = b; + if (lz->dict_size == lz->pos) + lz_flush(lz); +} + +static off_t +lz_get_data_position(const struct lz_decoder *lz) +{ + return lz->ppos + lz->pos; +} + +static unsigned +lz_get_crc(const struct lz_decoder *lz) +{ + return lz->crc ^ 0xffffffffU; +} + +static void +lz_bm_init(int *a, size_t l) +{ + for (size_t i = 0; i < l; i++) + a[i] = BIT_MODEL_INIT; +} + +#define LZ_BM_INIT(a) lz_bm_init(a, nitems(a)) +#define LZ_BM_INIT2(a) do { \ + size_t l = nitems(a[0]); \ + for (size_t i = 0; i < nitems(a); i++) \ + lz_bm_init(a[i], l); \ +} while (/*CONSTCOND*/0) + +#define LZ_MODEL_INIT(a) do { \ + a.choice1 = BIT_MODEL_INIT; \ + a.choice2 = BIT_MODEL_INIT; \ + LZ_BM_INIT2(a.bm_low); \ + LZ_BM_INIT2(a.bm_mid); \ + LZ_BM_INIT(a.bm_high); \ +} while (/*CONSTCOND*/0) + +static bool +lz_decode_member(struct lz_decoder *lz) +{ + int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300]; + int bm_match[LZ_STATES][POS_STATES]; + int bm_rep[4][LZ_STATES]; + int bm_len[LZ_STATES][POS_STATES]; + int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS]; + int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1]; + int bm_align[DIS_ALIGN_SIZE]; + + LZ_BM_INIT2(bm_literal); + LZ_BM_INIT2(bm_match); + LZ_BM_INIT2(bm_rep); + LZ_BM_INIT2(bm_len); + LZ_BM_INIT2(bm_dis_slot); + LZ_BM_INIT(bm_dis); + LZ_BM_INIT(bm_align); + + struct lz_len_model match_len_model; + struct lz_len_model rep_len_model; + + LZ_MODEL_INIT(match_len_model); + LZ_MODEL_INIT(rep_len_model); + + struct lz_range_decoder *rd = &lz->rdec; + unsigned rep[4] = { 0 }; + + + int state = 0; + + while (!feof(lz->fin) && !ferror(lz->fin)) { + const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK; + // bit 1 + if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) { + const uint8_t prev_byte = lz_peek(lz, 0); + const int literal_state = + prev_byte >> (8 - LITERAL_CONTEXT_BITS); + int *bm = bm_literal[literal_state]; + if (lz_st_is_char(state)) + lz_put(lz, lz_rd_decode_tree(rd, bm, 8)); + else { + int peek = lz_peek(lz, rep[0]); + lz_put(lz, lz_rd_decode_matched(rd, bm, peek)); + } + state = lz_st_get_char(state); + continue; + } + int len; + // bit 2 + if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) { + // bit 3 + if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) { + // bit 4 + if (lz_rd_decode_bit(rd, + &bm_len[state][pos_state]) == 0) + { + state = lz_st_get_short_rep(state); + lz_put(lz, lz_peek(lz, rep[0])); + continue; + } + } else { + unsigned distance; + // bit 4 + if (lz_rd_decode_bit(rd, &bm_rep[2][state]) + == 0) + distance = rep[1]; + else { + // bit 5 + if (lz_rd_decode_bit(rd, + &bm_rep[3][state]) == 0) + distance = rep[2]; + else { + distance = rep[3]; + rep[3] = rep[2]; + } + rep[2] = rep[1]; + } + rep[1] = rep[0]; + rep[0] = distance; + } + state = lz_st_get_rep(state); + len = MIN_MATCH_LEN + + lz_rd_decode_len(rd, &rep_len_model, pos_state); + } else { + rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0]; + len = MIN_MATCH_LEN + + lz_rd_decode_len(rd, &match_len_model, pos_state); + const int len_state = + MIN(len - MIN_MATCH_LEN, STATES - 1); + rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state], + DIS_SLOT_BITS); + if (rep[0] >= DIS_MODEL_START) { + const unsigned dis_slot = rep[0]; + const int direct_bits = (dis_slot >> 1) - 1; + rep[0] = (2 | (dis_slot & 1)) << direct_bits; + if (dis_slot < DIS_MODEL_END) + rep[0] += lz_rd_decode_tree_reversed(rd, + &bm_dis[rep[0] - dis_slot], + direct_bits); + else { + rep[0] += lz_rd_decode(rd, direct_bits + - DIS_ALIGN_BITS) << DIS_ALIGN_BITS; + rep[0] += lz_rd_decode_tree_reversed(rd, + bm_align, DIS_ALIGN_BITS); + if (rep[0] == 0xFFFFFFFFU) { + lz_flush(lz); + return len == MIN_MATCH_LEN; + } + } + } + state = lz_st_get_match(state); + if (rep[0] >= lz->dict_size || + (rep[0] >= lz->pos && !lz->wrapped)) { + lz_flush(lz); + return false; + } + } + for (int i = 0; i < len; i++) + lz_put(lz, lz_peek(lz, rep[0])); + } + lz_flush(lz); + return false; +} + +/* + * 0-3 CRC32 of the uncompressed data + * 4-11 size of the uncompressed data + * 12-19 member size including header and trailer + */ +#define TRAILER_SIZE 20 + + +static off_t +lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize) +{ + struct lz_decoder lz; + off_t rv = -1; + + if (lz_create(&lz, fin, fdout, dict_size) == -1) + return -1; + + if (!lz_decode_member(&lz)) + goto out; + + uint8_t trailer[TRAILER_SIZE]; + + for(size_t i = 0; i < nitems(trailer); i++) + trailer[i] = (uint8_t)getc(lz.fin); + + unsigned crc = 0; + for (int i = 3; i >= 0; --i) { + crc <<= 8; + crc += trailer[i]; + } + + int64_t data_size = 0; + for (int i = 11; i >= 4; --i) { + data_size <<= 8; + data_size += trailer[i]; + } + + if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz)) + goto out; + + rv = 0; + for (int i = 19; i >= 12; --i) { + rv <<= 8; + rv += trailer[i]; + } + if (insize) + *insize = rv; +#if 0 + /* Does not work with pipes */ + rv = ftello(lz.fout); +#else + rv = data_size; +#endif +out: + lz_destroy(&lz); + return rv; +} + + +/* + * 0-3 magic + * 4 version + * 5 coded dict_size + */ +#define HDR_SIZE 6 +#define MIN_DICTIONARY_SIZE (1 << 12) +#define MAX_DICTIONARY_SIZE (1 << 29) + +static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 }; + +static unsigned +lz_get_dict_size(unsigned char c) +{ + unsigned dict_size = 1 << (c & 0x1f); + dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7); + if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE) + return 0; + return dict_size; +} + +static off_t +unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in) +{ + if (lz_crc[0] == 0) + lz_crc_init(); + + char header[HDR_SIZE]; + + if (pre && prelen) + memcpy(header, pre, prelen); + + ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen); + switch (nr) { + case -1: + return -1; + case 0: + return prelen ? -1 : 0; + default: + if ((size_t)nr != sizeof(header) - prelen) + return -1; + break; + } + + if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0) + return -1; + + unsigned dict_size = lz_get_dict_size(header[5]); + if (dict_size == 0) + return -1; + + return lz_decode(fin, fout, dict_size, bytes_in); +} diff --git a/usr.bin/gzip/unpack.c b/usr.bin/gzip/unpack.c new file mode 100644 index 000000000000..110500ac21d6 --- /dev/null +++ b/usr.bin/gzip/unpack.c @@ -0,0 +1,334 @@ +/* $NetBSD: unpack.c,v 1.3 2017/08/04 07:27:08 mrg Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2009 Xin LI <delphij@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* This file is #included by gzip.c */ + +/* + * pack(1) file format: + * + * The first 7 bytes is the header: + * 00, 01 - Signature (US, RS), we already validated it earlier. + * 02..05 - Uncompressed size + * 06 - Level for the huffman tree (<=24) + * + * pack(1) will then store symbols (leaf) nodes count in each huffman + * tree levels, each level would consume 1 byte (See [1]). + * + * After the symbol count table, there is the symbol table, storing + * symbols represented by corresponding leaf node. EOB is not being + * explicitly transmitted (not necessary anyway) in the symbol table. + * + * Compressed data goes after the symbol table. + * + * NOTES + * + * [1] If we count EOB into the symbols, that would mean that we will + * have at most 256 symbols in the huffman tree. pack(1) rejects empty + * file and files that just repeats one character, which means that we + * will have at least 2 symbols. Therefore, pack(1) would reduce the + * last level symbol count by 2 which makes it a number in + * range [0..254], so all levels' symbol count would fit into 1 byte. + */ + +#define PACK_HEADER_LENGTH 7 +#define HTREE_MAXLEVEL 24 + +/* + * unpack descriptor + * + * Represent the huffman tree in a similar way that pack(1) would + * store in a packed file. We store all symbols in a linear table, + * and store pointers to each level's first symbol. In addition to + * that, maintain two counts for each level: inner nodes count and + * leaf nodes count. + */ +typedef struct { + int symbol_size; /* Size of the symbol table */ + int treelevels; /* Levels for the huffman tree */ + + int *symbolsin; /* Table of leaf symbols count in each + * level */ + int *inodesin; /* Table of internal nodes count in + * each level */ + + char *symbol; /* The symbol table */ + char *symbol_eob; /* Pointer to the EOB symbol */ + char **tree; /* Decoding huffman tree (pointers to + * first symbol of each tree level */ + + off_t uncompressed_size; /* Uncompressed size */ + FILE *fpIn; /* Input stream */ + FILE *fpOut; /* Output stream */ +} unpack_descriptor_t; + +/* + * Release resource allocated to an unpack descriptor. + * + * Caller is responsible to make sure that all of these pointers are + * initialized (in our case, they all point to valid memory block). + * We don't zero out pointers here because nobody else would ever + * reference the memory block without scrubbing them. + */ +static void +unpack_descriptor_fini(unpack_descriptor_t *unpackd) +{ + + free(unpackd->symbolsin); + free(unpackd->inodesin); + free(unpackd->symbol); + free(unpackd->tree); + + fclose(unpackd->fpIn); + fclose(unpackd->fpOut); +} + +/* + * Recursively fill the internal node count table + */ +static void +unpackd_fill_inodesin(const unpack_descriptor_t *unpackd, int level) +{ + + /* + * The internal nodes would be 1/2 of total internal nodes and + * leaf nodes in the next level. For the last level there + * would be no internal node by definition. + */ + if (level < unpackd->treelevels) { + unpackd_fill_inodesin(unpackd, level + 1); + unpackd->inodesin[level] = (unpackd->inodesin[level + 1] + + unpackd->symbolsin[level + 1]) / 2; + } else + unpackd->inodesin[level] = 0; +} + +/* + * Update counter for accepted bytes + */ +static void +accepted_bytes(off_t *bytes_in, off_t newbytes) +{ + + if (bytes_in != NULL) + (*bytes_in) += newbytes; +} + +/* + * Read file header and construct the tree. Also, prepare the buffered I/O + * for decode routine. + * + * Return value is uncompressed size. + */ +static void +unpack_parse_header(int in, int out, char *pre, size_t prelen, off_t *bytes_in, + unpack_descriptor_t *unpackd) +{ + unsigned char hdr[PACK_HEADER_LENGTH]; /* buffer for header */ + ssize_t bytesread; /* Bytes read from the file */ + int i, j, thisbyte; + + /* Prepend the header buffer if we already read some data */ + if (prelen != 0) + memcpy(hdr, pre, prelen); + + /* Read in and fill the rest bytes of header */ + bytesread = read(in, hdr + prelen, PACK_HEADER_LENGTH - prelen); + if (bytesread < 0) + maybe_err("Error reading pack header"); + infile_newdata(bytesread); + + accepted_bytes(bytes_in, PACK_HEADER_LENGTH); + + /* Obtain uncompressed length (bytes 2,3,4,5) */ + unpackd->uncompressed_size = 0; + for (i = 2; i <= 5; i++) { + unpackd->uncompressed_size <<= 8; + unpackd->uncompressed_size |= hdr[i]; + } + + /* Get the levels of the tree */ + unpackd->treelevels = hdr[6]; + if (unpackd->treelevels > HTREE_MAXLEVEL || unpackd->treelevels < 1) + maybe_errx("Huffman tree has insane levels"); + + /* Let libc take care for buffering from now on */ + if ((unpackd->fpIn = fdopen(in, "r")) == NULL) + maybe_err("Can not fdopen() input stream"); + if ((unpackd->fpOut = fdopen(out, "w")) == NULL) + maybe_err("Can not fdopen() output stream"); + + /* Allocate for the tables of bounds and the tree itself */ + unpackd->inodesin = + calloc(unpackd->treelevels, sizeof(*(unpackd->inodesin))); + unpackd->symbolsin = + calloc(unpackd->treelevels, sizeof(*(unpackd->symbolsin))); + unpackd->tree = + calloc(unpackd->treelevels, (sizeof(*(unpackd->tree)))); + if (unpackd->inodesin == NULL || unpackd->symbolsin == NULL || + unpackd->tree == NULL) + maybe_err("calloc"); + + /* We count from 0 so adjust to match array upper bound */ + unpackd->treelevels--; + + /* Read the levels symbol count table and calculate total */ + unpackd->symbol_size = 1; /* EOB */ + for (i = 0; i <= unpackd->treelevels; i++) { + if ((thisbyte = fgetc(unpackd->fpIn)) == EOF) + maybe_err("File appears to be truncated"); + unpackd->symbolsin[i] = (unsigned char)thisbyte; + unpackd->symbol_size += unpackd->symbolsin[i]; + } + accepted_bytes(bytes_in, unpackd->treelevels); + if (unpackd->symbol_size > 256) + maybe_errx("Bad symbol table"); + infile_newdata(unpackd->treelevels); + + /* Allocate for the symbol table, point symbol_eob at the beginning */ + unpackd->symbol_eob = unpackd->symbol = calloc(1, unpackd->symbol_size); + if (unpackd->symbol == NULL) + maybe_err("calloc"); + + /* + * Read in the symbol table, which contain [2, 256] symbols. + * In order to fit the count in one byte, pack(1) would offset + * it by reducing 2 from the actual number from the last level. + * + * We adjust the last level's symbol count by 1 here, because + * the EOB symbol is not being transmitted explicitly. Another + * adjustment would be done later afterward. + */ + unpackd->symbolsin[unpackd->treelevels]++; + for (i = 0; i <= unpackd->treelevels; i++) { + unpackd->tree[i] = unpackd->symbol_eob; + for (j = 0; j < unpackd->symbolsin[i]; j++) { + if ((thisbyte = fgetc(unpackd->fpIn)) == EOF) + maybe_errx("Symbol table truncated"); + *unpackd->symbol_eob++ = (char)thisbyte; + } + infile_newdata(unpackd->symbolsin[i]); + accepted_bytes(bytes_in, unpackd->symbolsin[i]); + } + + /* Now, take account for the EOB symbol as well */ + unpackd->symbolsin[unpackd->treelevels]++; + + /* + * The symbolsin table has been constructed now. + * Calculate the internal nodes count table based on it. + */ + unpackd_fill_inodesin(unpackd, 0); +} + +/* + * Decode huffman stream, based on the huffman tree. + */ +static void +unpack_decode(const unpack_descriptor_t *unpackd, off_t *bytes_in) +{ + int thislevel, thiscode, thisbyte, inlevelindex; + int i; + off_t bytes_out = 0; + const char *thissymbol; /* The symbol pointer decoded from stream */ + + /* + * Decode huffman. Fetch every bytes from the file, get it + * into 'thiscode' bit-by-bit, then output the symbol we got + * when one has been found. + * + * Assumption: sizeof(int) > ((max tree levels + 1) / 8). + * bad things could happen if not. + */ + thislevel = 0; + thiscode = thisbyte = 0; + + while ((thisbyte = fgetc(unpackd->fpIn)) != EOF) { + accepted_bytes(bytes_in, 1); + infile_newdata(1); + check_siginfo(); + + /* + * Split one bit from thisbyte, from highest to lowest, + * feed the bit into thiscode, until we got a symbol from + * the tree. + */ + for (i = 7; i >= 0; i--) { + thiscode = (thiscode << 1) | ((thisbyte >> i) & 1); + + /* Did we got a symbol? (referencing leaf node) */ + if (thiscode >= unpackd->inodesin[thislevel]) { + inlevelindex = + thiscode - unpackd->inodesin[thislevel]; + if (inlevelindex > unpackd->symbolsin[thislevel]) + maybe_errx("File corrupt"); + + thissymbol = + &(unpackd->tree[thislevel][inlevelindex]); + if ((thissymbol == unpackd->symbol_eob) && + (bytes_out == unpackd->uncompressed_size)) + goto finished; + + fputc((*thissymbol), unpackd->fpOut); + bytes_out++; + + /* Prepare for next input */ + thislevel = 0; thiscode = 0; + } else { + thislevel++; + if (thislevel > unpackd->treelevels) + maybe_errx("File corrupt"); + } + } + } + +finished: + if (bytes_out != unpackd->uncompressed_size) + maybe_errx("Premature EOF"); +} + +/* Handler for pack(1)'ed file */ +static off_t +unpack(int in, int out, char *pre, size_t prelen, off_t *bytes_in) +{ + unpack_descriptor_t unpackd; + + in = dup(in); + if (in == -1) + maybe_err("dup"); + out = dup(out); + if (out == -1) + maybe_err("dup"); + + unpack_parse_header(in, out, pre, prelen, bytes_in, &unpackd); + unpack_decode(&unpackd, bytes_in); + unpack_descriptor_fini(&unpackd); + + /* If we reached here, the unpack was successful */ + return (unpackd.uncompressed_size); +} diff --git a/usr.bin/gzip/unxz.c b/usr.bin/gzip/unxz.c new file mode 100644 index 000000000000..de6683b246b8 --- /dev/null +++ b/usr.bin/gzip/unxz.c @@ -0,0 +1,474 @@ +/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christos Zoulas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include <sys/cdefs.h> +#include <stdarg.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <lzma.h> + +static off_t +unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) +{ + lzma_stream strm = LZMA_STREAM_INIT; + static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED; + lzma_ret ret; + lzma_action action = LZMA_RUN; + off_t bytes_out, bp; + uint8_t ibuf[BUFSIZ]; + uint8_t obuf[BUFSIZ]; + + if (bytes_in == NULL) + bytes_in = &bp; + + strm.next_in = ibuf; + memcpy(ibuf, pre, prelen); + strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); + if (strm.avail_in == (size_t)-1) + maybe_err("read failed"); + infile_newdata(strm.avail_in); + strm.avail_in += prelen; + *bytes_in = strm.avail_in; + + if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK) + maybe_errx("Can't initialize decoder (%d)", ret); + + strm.next_out = NULL; + strm.avail_out = 0; + if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK) + maybe_errx("Can't read headers (%d)", ret); + + bytes_out = 0; + strm.next_out = obuf; + strm.avail_out = sizeof(obuf); + + for (;;) { + check_siginfo(); + if (strm.avail_in == 0) { + strm.next_in = ibuf; + strm.avail_in = read(i, ibuf, sizeof(ibuf)); + switch (strm.avail_in) { + case (size_t)-1: + maybe_err("read failed"); + /*NOTREACHED*/ + case 0: + action = LZMA_FINISH; + break; + default: + infile_newdata(strm.avail_in); + *bytes_in += strm.avail_in; + break; + } + } + + ret = lzma_code(&strm, action); + + // Write and check write error before checking decoder error. + // This way as much data as possible gets written to output + // even if decoder detected an error. + if (strm.avail_out == 0 || ret != LZMA_OK) { + const size_t write_size = sizeof(obuf) - strm.avail_out; + + if (write(o, obuf, write_size) != (ssize_t)write_size) + maybe_err("write failed"); + + strm.next_out = obuf; + strm.avail_out = sizeof(obuf); + bytes_out += write_size; + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) { + // Check that there's no trailing garbage. + if (strm.avail_in != 0 || read(i, ibuf, 1)) + ret = LZMA_DATA_ERROR; + else { + lzma_end(&strm); + return bytes_out; + } + } + + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = strerror(ENOMEM); + break; + + case LZMA_FORMAT_ERROR: + msg = "File format not recognized"; + break; + + case LZMA_OPTIONS_ERROR: + // FIXME: Better message? + msg = "Unsupported compression options"; + break; + + case LZMA_DATA_ERROR: + msg = "File is corrupt"; + break; + + case LZMA_BUF_ERROR: + msg = "Unexpected end of input"; + break; + + case LZMA_MEMLIMIT_ERROR: + msg = "Reached memory limit"; + break; + + default: + maybe_errx("Unknown error (%d)", ret); + break; + } + maybe_errx("%s", msg); + + } + } +} + +#include <stdbool.h> + +/* + * Copied various bits and pieces from xz support code or brute force + * replacements. + */ + +#define my_min(A,B) ((A)<(B)?(A):(B)) + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. +// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) +#if BUFSIZ <= 1024 +# define IO_BUFFER_SIZE 8192 +#else +# define IO_BUFFER_SIZE (BUFSIZ & ~7U) +#endif + +/// is_sparse() accesses the buffer as uint64_t for maximum speed. +/// Use an union to make sure that the buffer is properly aligned. +typedef union { + uint8_t u8[IO_BUFFER_SIZE]; + uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; + uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; +} io_buf; + + +static bool +io_pread(int fd, io_buf *buf, size_t size, off_t pos) +{ + // Using lseek() and read() is more portable than pread() and + // for us it is as good as real pread(). + if (lseek(fd, pos, SEEK_SET) != pos) { + return true; + } + + const size_t amount = read(fd, buf, size); + if (amount == SIZE_MAX) + return true; + + if (amount != size) { + return true; + } + + return false; +} + +/* + * Most of the following is copied (mostly verbatim) from the xz + * distribution, from file src/xz/list.c + */ + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + + +/// Information about a .xz file +typedef struct { + /// Combined Index of all Streams in the file + lzma_index *idx; + + /// Total amount of Stream Padding + uint64_t stream_padding; + + /// Highest memory usage so far + uint64_t memusage_max; + + /// True if all Blocks so far have Compressed Size and + /// Uncompressed Size fields + bool all_have_sizes; + + /// Oldest XZ Utils version that will decompress the file + uint32_t min_version; + +} xz_file_info; + +#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } + + +/// \brief Parse the Index(es) from the given .xz file +/// +/// \param xfi Pointer to structure where the decoded information +/// is stored. +/// \param pair Input file +/// +/// \return On success, false is returned. On error, true is returned. +/// +// TODO: This function is pretty big. liblzma should have a function that +// takes a callback function to parse the Index(es) from a .xz file to make +// it easy for applications. +static bool +parse_indexes(xz_file_info *xfi, int src_fd) +{ + struct stat st; + + if (fstat(src_fd, &st) != 0) { + return true; + } + + if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { + return true; + } + + io_buf buf; + lzma_stream_flags header_flags; + lzma_stream_flags footer_flags; + lzma_ret ret; + + // lzma_stream for the Index decoder + lzma_stream strm = LZMA_STREAM_INIT; + + // All Indexes decoded so far + lzma_index *combined_index = NULL; + + // The Index currently being decoded + lzma_index *this_index = NULL; + + // Current position in the file. We parse the file backwards so + // initialize it to point to the end of the file. + off_t pos = st.st_size; + + // Each loop iteration decodes one Index. + do { + // Check that there is enough data left to contain at least + // the Stream Header and Stream Footer. This check cannot + // fail in the first pass of this loop. + if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { + goto error; + } + + pos -= LZMA_STREAM_HEADER_SIZE; + lzma_vli stream_padding = 0; + + // Locate the Stream Footer. There may be Stream Padding which + // we must skip when reading backwards. + while (true) { + if (pos < LZMA_STREAM_HEADER_SIZE) { + goto error; + } + + if (io_pread(src_fd, &buf, + LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + // Stream Padding is always a multiple of four bytes. + int i = 2; + if (buf.u32[i] != 0) + break; + + // To avoid calling io_pread() for every four bytes + // of Stream Padding, take advantage that we read + // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and + // check them too before calling io_pread() again. + do { + stream_padding += 4; + pos -= 4; + --i; + } while (i >= 0 && buf.u32[i] == 0); + } + + // Decode the Stream Footer. + ret = lzma_stream_footer_decode(&footer_flags, buf.u8); + if (ret != LZMA_OK) { + goto error; + } + + // Check that the Stream Footer doesn't specify something + // that we don't support. This can only happen if the xz + // version is older than liblzma and liblzma supports + // something new. + // + // It is enough to check Stream Footer. Stream Header must + // match when it is compared against Stream Footer with + // lzma_stream_flags_compare(). + if (footer_flags.version != 0) { + goto error; + } + + // Check that the size of the Index field looks sane. + lzma_vli index_size = footer_flags.backward_size; + if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { + goto error; + } + + // Set pos to the beginning of the Index. + pos -= index_size; + + // Decode the Index. + ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); + if (ret != LZMA_OK) { + goto error; + } + + do { + // Don't give the decoder more input than the + // Index size. + strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); + if (io_pread(src_fd, &buf, strm.avail_in, pos)) + goto error; + + pos += strm.avail_in; + index_size -= strm.avail_in; + + strm.next_in = buf.u8; + ret = lzma_code(&strm, LZMA_RUN); + + } while (ret == LZMA_OK); + + // If the decoding seems to be successful, check also that + // the Index decoder consumed as much input as indicated + // by the Backward Size field. + if (ret == LZMA_STREAM_END) + if (index_size != 0 || strm.avail_in != 0) + ret = LZMA_DATA_ERROR; + + if (ret != LZMA_STREAM_END) { + // LZMA_BUFFER_ERROR means that the Index decoder + // would have liked more input than what the Index + // size should be according to Stream Footer. + // The message for LZMA_DATA_ERROR makes more + // sense in that case. + if (ret == LZMA_BUF_ERROR) + ret = LZMA_DATA_ERROR; + + goto error; + } + + // Decode the Stream Header and check that its Stream Flags + // match the Stream Footer. + pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; + if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { + goto error; + } + + pos -= lzma_index_total_size(this_index); + if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + ret = lzma_stream_header_decode(&header_flags, buf.u8); + if (ret != LZMA_OK) { + goto error; + } + + ret = lzma_stream_flags_compare(&header_flags, &footer_flags); + if (ret != LZMA_OK) { + goto error; + } + + // Store the decoded Stream Flags into this_index. This is + // needed so that we can print which Check is used in each + // Stream. + ret = lzma_index_stream_flags(this_index, &footer_flags); + if (ret != LZMA_OK) + goto error; + + // Store also the size of the Stream Padding field. It is + // needed to show the offsets of the Streams correctly. + ret = lzma_index_stream_padding(this_index, stream_padding); + if (ret != LZMA_OK) + goto error; + + if (combined_index != NULL) { + // Append the earlier decoded Indexes + // after this_index. + ret = lzma_index_cat( + this_index, combined_index, NULL); + if (ret != LZMA_OK) { + goto error; + } + } + + combined_index = this_index; + this_index = NULL; + + xfi->stream_padding += stream_padding; + + } while (pos > 0); + + lzma_end(&strm); + + // All OK. Make combined_index available to the caller. + xfi->idx = combined_index; + return false; + +error: + // Something went wrong, free the allocated memory. + lzma_end(&strm); + lzma_index_end(combined_index, NULL); + lzma_index_end(this_index, NULL); + return true; +} + +/***************** end of copy form list.c *************************/ + +/* + * Small wrapper to extract total length of a file + */ +off_t +unxz_len(int fd) +{ + xz_file_info xfi = XZ_FILE_INFO_INIT; + if (!parse_indexes(&xfi, fd)) { + off_t res = lzma_index_uncompressed_size(xfi.idx); + lzma_index_end(xfi.idx, NULL); + return res; + } + return 0; +} + diff --git a/usr.bin/gzip/unzstd.c b/usr.bin/gzip/unzstd.c new file mode 100644 index 000000000000..4536f3119ace --- /dev/null +++ b/usr.bin/gzip/unzstd.c @@ -0,0 +1,89 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 Klara, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* This file is #included by gzip.c */ + +static off_t +unzstd(int in, int out, char *pre, size_t prelen, off_t *bytes_in) +{ + static char *ibuf, *obuf; + ZSTD_inBuffer zib; + ZSTD_outBuffer zob; + ZSTD_DCtx *zds; + ssize_t res; + size_t zres; + size_t bytes_out = 0; + int eof = 0; + + if (ibuf == NULL) + ibuf = malloc(BUFLEN); + if (obuf == NULL) + obuf = malloc(BUFLEN); + if (ibuf == NULL || obuf == NULL) + maybe_err("malloc"); + + zds = ZSTD_createDStream(); + ZSTD_initDStream(zds); + + zib.src = pre; + zib.size = prelen; + zib.pos = 0; + if (bytes_in != NULL) + *bytes_in = prelen; + zob.dst = obuf; + zob.size = BUFLEN; + zob.pos = 0; + + while (!eof) { + if (zib.pos >= zib.size) { + res = read(in, ibuf, BUFLEN); + if (res < 0) + maybe_err("read"); + if (res == 0) + eof = 1; + infile_newdata(res); + zib.src = ibuf; + zib.size = res; + zib.pos = 0; + if (bytes_in != NULL) + *bytes_in += res; + } + zres = ZSTD_decompressStream(zds, &zob, &zib); + if (ZSTD_isError(zres)) { + maybe_errx("%s", ZSTD_getErrorName(zres)); + } + if (zob.pos > 0) { + res = write(out, obuf, zob.pos); + if (res < 0) + maybe_err("write"); + zob.pos = 0; + bytes_out += res; + } + } + ZSTD_freeDStream(zds); + return (bytes_out); +} diff --git a/usr.bin/gzip/zdiff b/usr.bin/gzip/zdiff new file mode 100644 index 000000000000..f0f8c2d64681 --- /dev/null +++ b/usr.bin/gzip/zdiff @@ -0,0 +1,141 @@ +#!/bin/sh - +# +# $NetBSD: zdiff,v 1.5 2010/04/14 20:30:28 joerg Exp $ +# +# $OpenBSD: zdiff,v 1.2 2003/07/29 07:42:44 otto Exp $ +# +#- +# Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> +# Copyright (c) 2010 Joerg Sonnenberger <joerg@NetBSD.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# +# Sponsored in part by the Defense Advanced Research Projects +# Agency (DARPA) and Air Force Research Laboratory, Air Force +# Materiel Command, USAF, under agreement number F39502-99-1-0512. +# + +# Set $prog based on $0 +case $0 in + *cmp) prog=cmp + ;; + *) prog=diff + ;; +esac +USAGE="usage: $0 [options] file1 [file2]" + +check_suffix() { + case "$1" in + *[._-][Zz]) + eval "$2=\${1%??}" + eval "$3=\"gzip -cdqf\"" + ;; + *[._-]bz) + eval "$2=\${1%???}" + eval "$3=\"bzip2 -cdqf\"" + ;; + *[._-]gz) + eval "$2=\${1%???}" + eval "$3=\"gzip -cdqf\"" + ;; + *[._-]xz) + eval "$2=\${1%???}" + eval "$3=\"xz -cdqf\"" + ;; + *[._-]bz2) + eval "$2=\${1%????}" + eval "$3=\"bzip2 -cdqf\"" + ;; + *[._-]lzma) + eval "$2=\${1%?????}" + eval "$3=\"xz -cdqf\"" + ;; + *.t[ag]z) + eval "$2=\${1%??}ar" + eval "$3=\"gzip -cdqf\"" + ;; + *.tbz) + eval "$2=\${1%??}ar" + eval "$3=\"bzip2 -cdqf\"" + ;; + *.tbz2) + eval "$2=\${1%???}ar" + eval "$3=\"bzip2 -cdqf\"" + ;; + *.t[lx]z) + eval "$2=\${1%??}ar" + eval "$3=\"xz -cdqf\"" + ;; + *) + eval "$2=\$1" + eval "$3=\"\"" + ;; + esac +} + + +# Pull out any command line flags so we can pass them to diff/cmp +# XXX - assumes there is no optarg +flags= +while test $# -ne 0; do + case "$1" in + --) + shift + break + ;; + -) + break + ;; + -*) + flags="$flags $1" + shift + ;; + *) + break + ;; + esac +done + +if [ $# -eq 1 ]; then + # One file given, compare compressed to uncompressed + files="$1" + check_suffix "$1" files filt + if [ -z "$filt" ]; then + echo "z$prog: unknown suffix" 1>&2 + exit 1 + fi + $filt -- "$1" | $prog $flags -- - "$files" + status=$? +elif [ $# -eq 2 ]; then + # Two files given, compare the two uncompressing as needed + check_suffix "$1" files filt + check_suffix "$2" files2 filt2 + if [ -z "$filt" -a -z "$filt2" ]; then + $prog $flags -- "$1" "$2" + elif [ -z "$filt" -a -n "$filt2" -a "$1" != "-" ]; then + $filt2 -- "$2" | $prog $flags -- "$1" - + elif [ -n "$filt" -a -z "$filt2" -a "$2" != "-" ]; then + $filt -- "$1" | $prog $flags -- - "$2" + else + tmp=`mktemp -t z$prog.XXXXXXXXXX` || exit 1 + trap "rm -f $tmp" 0 1 2 3 13 15 + ${filt2:-cat} -- "$2" > $tmp || exit $? + ${filt:-cat} -- "$1" | $prog $flags -- - "$tmp" + fi + status=$? +else + echo "$USAGE" 1>&2 + exit 1 +fi + +exit $status diff --git a/usr.bin/gzip/zdiff.1 b/usr.bin/gzip/zdiff.1 new file mode 100644 index 000000000000..3ce1663ae1ce --- /dev/null +++ b/usr.bin/gzip/zdiff.1 @@ -0,0 +1,140 @@ +.\" $NetBSD: zdiff.1,v 1.5 2010/04/14 19:52:05 wiz Exp $ +.\" $OpenBSD: zdiff.1,v 1.2 2003/07/13 17:39:14 millert Exp $ +.\" +.\" Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> +.\" Copyright (c) 2010 Joerg Sonnenberger <joerg@NetBSD.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.\" Sponsored in part by the Defense Advanced Research Projects +.\" Agency (DARPA) and Air Force Research Laboratory, Air Force +.\" Materiel Command, USAF, under agreement number F39502-99-1-0512. +.Dd May 23, 2011 +.Dt ZDIFF 1 +.Os +.Sh NAME +.Nm zcmp , +.Nm zdiff +.Nd compare compressed files +.Sh SYNOPSIS +.Nm zcmp +.Op Ar options +.Ar file +.Op Ar file2 +.Nm zdiff +.Op Ar options +.Ar file +.Op Ar file2 +.Sh DESCRIPTION +.Nm zcmp +and +.Nm zdiff +are filters that invoke +.Xr cmp 1 +or +.Xr diff 1 +respectively to compare compressed files. +Any +.Ar options +that are specified are passed to +.Xr cmp 1 +or +.Xr diff 1 . +.Pp +If only +.Ar file1 +is specified, it is compared against a file with the same name, but +with the extension removed. +When both +.Ar file1 +or +.Ar file2 +are specified, either file may be compressed. +.Pp +Extensions handled by +.Xr gzip 1 : +.Bl -bullet -compact +.It +z, Z, +.It +gz, +.It +taz, +.It +tgz. +.El +.Pp +Extensions handled by +.Xr bzip2 1 : +.Bl -bullet -compact +.It +bz, +.It +bz2, +.It +tbz, +.It +tbz2. +.El +.Pp +Extensions handled by +.Xr xz 1 : +.Bl -bullet -compact +.It +lzma, +.It +xz, +.It +tlz, +.It +txz. +.El +.Sh ENVIRONMENT +.Bl -tag -width "TMPDIR" +.It Ev TMPDIR +Directory in which to place temporary files. +If unset, +.Pa /tmp +is used. +.El +.Sh FILES +.Bl -tag -width "/tmp/zdiff.XXXXXXXXXX" -compact +.It Pa /tmp/zcmp.XXXXXXXXXX +Temporary file for +.Nm zcmp . +.It Pa /tmp/zdiff.XXXXXXXXXX +Temporary file for +.Nm zdiff . +.El +.Sh SEE ALSO +.Xr bzip2 1 , +.Xr cmp 1 , +.Xr diff 1 , +.Xr gzip 1 , +.Xr xz 1 +.Sh CAVEATS +.Nm zcmp +and +.Nm zdiff +rely solely on the file extension to determine what is, or is not, +a compressed file. +Consequently, the following are not supported as arguments: +.Bl -dash +.It +directories +.It +device special files +.It +filenames indicating the standard input +.Pq Dq \- +.El diff --git a/usr.bin/gzip/zforce b/usr.bin/gzip/zforce new file mode 100644 index 000000000000..06c897ddf709 --- /dev/null +++ b/usr.bin/gzip/zforce @@ -0,0 +1,54 @@ +#!/bin/sh - +# +# $NetBSD: zforce,v 1.2 2003/12/28 12:43:43 wiz Exp $ +# $OpenBSD: zforce,v 1.2 2003/08/05 18:22:17 deraadt Exp $ +# +#- +# Copyright (c) 2003 Otto Moerbeek <otto@drijf.net> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# +prog=`basename $0` +USAGE="usage: $prog file ..." +if test $# -eq 0; then + echo $USAGE + exit 1 +fi + +ret=0 + +while test $# -ne 0; do + case "$1" in + *[._-]gz) + shift + ;; + *.t[ag]z) + shift + ;; + *) + if file "$1" | + grep -q "gzip compressed data" 2> /dev/null + then + n="$1".gz + if mv "$1" "$n" 2> /dev/null; then + echo "$1" -- renamed to "$n" + else + ret=1 + echo $prog: cannot rename "$1" to "$n" + fi + fi + shift + ;; + esac +done +exit $ret diff --git a/usr.bin/gzip/zforce.1 b/usr.bin/gzip/zforce.1 new file mode 100644 index 000000000000..10010d61cc14 --- /dev/null +++ b/usr.bin/gzip/zforce.1 @@ -0,0 +1,51 @@ +.\" $NetBSD: zforce.1,v 1.2 2003/12/28 12:43:43 wiz Exp $ +.\" $OpenBSD: zforce.1,v 1.1 2003/07/29 11:50:09 otto Exp $ +.\" +.\" Copyright (c) 2003 Otto Moerbeek <otto@drijf.net> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.Dd January 26, 2007 +.Dt ZFORCE 1 +.Os +.Sh NAME +.Nm zforce +.Nd force gzip files to have a .gz suffix +.Sh SYNOPSIS +.Nm zforce +.Ar +.Sh DESCRIPTION +The +.Nm +utility renames +.Xr gzip 1 +files to have a +.Sq .gz +suffix, so that +.Xr gzip 1 +will not compress them twice. +This can be useful if file names were truncated during a file transfer. +Files that have an existing +.Sq .gz , +.Sq -gz , +.Sq _gz , +.Sq .tgz +or +.Sq .taz +suffix, or that have not been compressed by +.Xr gzip 1 , +are ignored. +.Sh SEE ALSO +.Xr gzip 1 +.Sh CAVEATS +.Nm +overwrites existing files without warning. diff --git a/usr.bin/gzip/zmore b/usr.bin/gzip/zmore new file mode 100644 index 000000000000..fb195c482a99 --- /dev/null +++ b/usr.bin/gzip/zmore @@ -0,0 +1,81 @@ +#!/bin/sh - +# +# $NetBSD: zmore,v 1.5 2013/12/06 13:33:15 pettai Exp $ +# +# $OpenBSD: zmore,v 1.6 2008/08/20 09:22:02 mpf Exp $ +# +#- +# Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# +# Sponsored in part by the Defense Advanced Research Projects +# Agency (DARPA) and Air Force Research Laboratory, Air Force +# Materiel Command, USAF, under agreement number F39502-99-1-0512. +# + +# Pull out any command line flags so we can pass them to more/less +flags= +while test $# -ne 0; do + case "$1" in + --) + shift + break + ;; + -*) + flags="$flags $1" + shift + ;; + *) + break + ;; + esac +done + +if [ `basename $0` = "zless" ] ; then + pager=${PAGER-less} +else + pager=${PAGER-more} +fi + +# No files means read from stdin +if [ $# -eq 0 ]; then + gzip -cdfq 2>&1 | $pager $flags + exit 0 +fi + +oterm=`stty -g 2>/dev/null` +while test $# -ne 0; do + gzip -cdfq "$1" 2>&1 | $pager $flags + prev="$1" + shift + if tty -s && test -n "$oterm" -a $# -gt 0; then + #echo -n "--More--(Next file: $1)" + echo -n "$prev (END) - Next: $1 " + trap "stty $oterm 2>/dev/null" 0 1 2 3 13 15 + stty cbreak -echo 2>/dev/null + REPLY=`dd bs=1 count=1 2>/dev/null` + stty $oterm 2>/dev/null + trap - 0 1 2 3 13 15 + echo + case "$REPLY" in + s) + shift + ;; + e|q) + break + ;; + esac + fi +done +exit 0 diff --git a/usr.bin/gzip/zmore.1 b/usr.bin/gzip/zmore.1 new file mode 100644 index 000000000000..5e0acc9b5901 --- /dev/null +++ b/usr.bin/gzip/zmore.1 @@ -0,0 +1,108 @@ +.\" $NetBSD: zmore.1,v 1.4 2013/11/12 21:58:37 pettai Exp $ +.\" $OpenBSD: zmore.1,v 1.10 2009/08/16 09:41:08 sobrado Exp $ +.\" +.\" Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.\" Sponsored in part by the Defense Advanced Research Projects +.\" Agency (DARPA) and Air Force Research Laboratory, Air Force +.\" Materiel Command, USAF, under agreement number F39502-99-1-0512. +.Dd October 22, 2014 +.Dt ZMORE 1 +.Os +.Sh NAME +.Nm zmore , +.Nm zless +.Nd view compressed files +.Sh SYNOPSIS +.Nm zmore +.Op Ar flags +.Op Ar +.Nm zless +.Op Ar flags +.Op Ar +.Sh DESCRIPTION +.Nm +is a filter that allows the viewing of files compressed with Lempel-Ziv +encoding. +Such files generally have a +.Dq Z +or +.Dq gz +extension (both the +.Xr compress 1 +and +.Xr gzip 1 +formats are supported). +Any +.Ar flags +that are specified are passed to the user's preferred +.Ev PAGER +(which is +.Pa /usr/bin/more +by default). +.Pp +.Nm zless +is equivalent to +.Nm zmore +but uses +.Xr less 1 +as a pager instead of +.Xr more 1 . +.Pp +When multiple files are specified, +.Nm +will pause at the end of each file and present the following prompt to the user: +.Bd -literal -offset indent +prev_file (END) - Next: next_file +.Ed +.Pp +Where +.Sy prev_file +is the file that was just displayed and +.Sy next_file +is the next file to be displayed. +The following keys are recognized at the prompt: +.Bl -tag -width "e or q" -offset indent +.It Ic e No or Ic q +quit +.Nm zmore . +.It Ic s +skip the next file (or exit if the next file is the last). +.El +.Pp +If no files are specified, +.Nm +will read from the standard input. +In this mode +.Nm +will assume +.Xr gzip 1 +style compression since there is no suffix on which to make a decision. +.Sh ENVIRONMENT +.Bl -tag -width "PAGER" +.It Ev PAGER +Program used to display files. +If unset, +.Pa /usr/bin/more +is used +.Pq Nm zmore +or +.Pa /usr/bin/less +.Pq Nm zless . +.El +.Sh SEE ALSO +.Xr compress 1 , +.Xr less 1 , +.Xr more 1 diff --git a/usr.bin/gzip/znew b/usr.bin/gzip/znew new file mode 100644 index 000000000000..498c1b70622d --- /dev/null +++ b/usr.bin/gzip/znew @@ -0,0 +1,136 @@ +#!/bin/sh - +# +# $NetBSD: znew,v 1.3 2008/04/27 09:07:13 nakayama Exp $ +# $OpenBSD: znew,v 1.2 2003/08/05 18:22:17 deraadt Exp $ +# +#- +# Copyright (c) 2003 Otto Moerbeek <otto@drijf.net> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# + +# Return 0 if the first arg file size is smaller than the second, 1 otherwise. +smaller () { + a=`du -k "$1" | awk '{ print $1 }'` + b=`du -k "$2" | awk '{ print $1 }'` + test $a -lt $b +} + +# Check gzip integrity if the -t flag is specified +checkfile () { + if test $tflag -eq 1; then + gzip -qt < "$1" + fi +} + +# Decompress a file and then gzip it +process () { + prefix="${1%.Z}" + filez="$prefix".Z + filegz="$prefix".gz + + if test ! -e "$filez"; then + echo "$prog: $filez does not exist" + return 1 + fi + if test ! -f "$filez"; then + echo "$prog: $filez is not a regular file" + return 1 + fi + if test -e "$filegz" -a $fflag -eq 0; then + echo "$prog: $filegz already exists" + return 1 + fi + + tmp=`mktemp /tmp/znewXXXXXXXXXX` || { + echo "$prog: cannot create tmp file" + return 1 + } + trap 'rm -f "$tmp"; exit 1' HUP INT QUIT PIPE TERM + + # Do the actual work, producing a file "$tmp" + if uncompress -f -c < "$filez" | gzip -f -c $gzipflags > "$tmp"; then + if test $kflag -eq 1 && smaller "$filez" "$tmp"; then + echo -n "$prog: $filez is smaller than $filegz" + echo "; keeping it" + rm -f "$tmp" + return 0 + fi + if ! checkfile "$tmp"; then + echo "$prog: integrity check of $tmp failed" + rm -f "$tmp" + return 1; + fi + + # Try to keep the mode of the original file + if ! cp -fp "$filez" "$filegz"; then + echo "$prog: warning: could not keep mode of $filez" + fi + if ! cp "$tmp" "$filegz" 2> /dev/null; then + echo "$prog: warning: could not keep mode of $filez" + if ! cp -f "$tmp" "$filegz" 2> /dev/null; then + echo "$prog: could not copy $tmp to $filegz" + rm -f "$filegz" "$tmp" + return 1 + fi + fi + if ! touch -fr "$filez" "$filegz"; then + echo -n "$prog: warning: could not keep timestamp of " + echo "$filez" + fi + rm -f "$filez" "$tmp" + else + echo "$prog: failed to process $filez" + rm -f "$tmp" + return 1 + fi +} + +prog=`basename "$0"` +usage="usage: $prog [-ftv9K] file ..." + +fflag=0 +tflag=0 +kflag=0 +gzipflags= + +# -P flag is recognized to maintain compatibility, but ignored. Pipe mode is +# always used +while getopts :ftv9PK i; do + case $i in + f) fflag=1;; + t) tflag=1;; + v) gzipflags="-v $gzipflags";; + 9) gzipflags="-9 $gzipflags";; + P) ;; + K) kflag=1;; + \?) echo "$usage"; exit 1;; + esac +done + +shift $((OPTIND - 1)) + +if test $# -eq 0; then + echo "$usage" + exit 1 +fi + +rc=0 + +while test $# -ne 0; do + if ! process "$1"; then + rc=$? + fi + shift +done +exit $rc diff --git a/usr.bin/gzip/znew.1 b/usr.bin/gzip/znew.1 new file mode 100644 index 000000000000..0b97fe6f4a9f --- /dev/null +++ b/usr.bin/gzip/znew.1 @@ -0,0 +1,69 @@ +.\" $NetBSD: znew.1,v 1.2 2003/12/28 12:43:43 wiz Exp $ +.\" $OpenBSD: znew.1,v 1.1 2003/08/02 20:52:50 otto Exp $ +.\" +.\" Copyright (c) 2003 Otto Moerbeek <otto@drijf.net> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.Dd January 26, 2007 +.Dt ZNEW 1 +.Os +.Sh NAME +.Nm znew +.Nd convert compressed files to gzipped files +.Sh SYNOPSIS +.Nm +.Op Fl ftv9K +.Ar +.Sh DESCRIPTION +The +.Nm +utility uncompresses files compressed by +.Xr compress 1 +and recompresses them with +.Xr gzip 1 . +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl f +Overwrite existing +.Sq .gz +files. +Unless this option is specified, +.Nm +refuses to overwrite existing files. +.It Fl t +Test integrity of the gzipped file before deleting the original file. +If the integrity check fails, the original +.Sq .Z +file is not removed. +.It Fl v +Print a report specifying the achieved compression ratios. +.It Fl 9 +Use the -9 mode of +.Xr gzip 1 , +achieving better compression at the cost of slower execution. +.It Fl K +Keep the original +.Sq .Z +file if it uses less disk blocks than the gzipped one. +A disk block is 1024 bytes. +.El +.Sh SEE ALSO +.Xr gzip 1 +.Sh CAVEATS +The +.Nm +utility tries to maintain the file mode of the original file. +If the original file is not writable, it is not able to do that and +.Nm +will print a warning. diff --git a/usr.bin/gzip/zuncompress.c b/usr.bin/gzip/zuncompress.c new file mode 100644 index 000000000000..79f3983037f7 --- /dev/null +++ b/usr.bin/gzip/zuncompress.c @@ -0,0 +1,397 @@ +/* $NetBSD: zuncompress.c,v 1.11 2011/08/16 13:55:02 joerg Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1985, 1986, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis and James A. Woods, derived from original + * work by Spencer Thomas and Joseph Orost. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: NetBSD: zopen.c,v 1.8 2003/08/07 11:13:29 agc Exp + */ + +/* This file is #included by gzip.c */ + +static int zread(void *, char *, int); + +#define tab_prefixof(i) (zs->zs_codetab[i]) +#define tab_suffixof(i) ((char_type *)(zs->zs_htab))[i] +#define de_stack ((char_type *)&tab_suffixof(1 << BITS)) + +#define BITS 16 /* Default bits. */ +#define HSIZE 69001 /* 95% occupancy */ /* XXX may not need HSIZE */ +#define BIT_MASK 0x1f /* Defines for third byte of header. */ +#define BLOCK_MASK 0x80 +#define CHECK_GAP 10000 /* Ratio check interval. */ +#define BUFSIZE (64 * 1024) + +/* + * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is + * a fourth header byte (for expansion). + */ +#define INIT_BITS 9 /* Initial number of bits/code. */ + +/* + * the next two codes should not be changed lightly, as they must not + * lie within the contiguous general code space. + */ +#define FIRST 257 /* First free entry. */ +#define CLEAR 256 /* Table clear output code. */ + + +#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) + +typedef long code_int; +typedef long count_int; +typedef u_char char_type; + +static char_type magic_header[] = + {'\037', '\235'}; /* 1F 9D */ + +static char_type rmask[9] = + {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; + +static off_t total_compressed_bytes; +static size_t compressed_prelen; +static char *compressed_pre; + +struct s_zstate { + FILE *zs_fp; /* File stream for I/O */ + char zs_mode; /* r or w */ + enum { + S_START, S_MIDDLE, S_EOF + } zs_state; /* State of computation */ + int zs_n_bits; /* Number of bits/code. */ + int zs_maxbits; /* User settable max # bits/code. */ + code_int zs_maxcode; /* Maximum code, given n_bits. */ + code_int zs_maxmaxcode; /* Should NEVER generate this code. */ + count_int zs_htab [HSIZE]; + u_short zs_codetab [HSIZE]; + code_int zs_hsize; /* For dynamic table sizing. */ + code_int zs_free_ent; /* First unused entry. */ + /* + * Block compression parameters -- after all codes are used up, + * and compression rate changes, start over. + */ + int zs_block_compress; + int zs_clear_flg; + long zs_ratio; + count_int zs_checkpoint; + int zs_offset; + long zs_in_count; /* Length of input. */ + long zs_bytes_out; /* Length of compressed output. */ + long zs_out_count; /* # of codes output (for debugging). */ + char_type zs_buf[BITS]; + union { + struct { + long zs_fcode; + code_int zs_ent; + code_int zs_hsize_reg; + int zs_hshift; + } w; /* Write parameters */ + struct { + char_type *zs_stackp; + int zs_finchar; + code_int zs_code, zs_oldcode, zs_incode; + int zs_roffset, zs_size; + char_type zs_gbuf[BITS]; + } r; /* Read parameters */ + } u; +}; + +static code_int getcode(struct s_zstate *zs); + +static off_t +zuncompress(FILE *in, FILE *out, char *pre, size_t prelen, + off_t *compressed_bytes) +{ + off_t bin, bout = 0; + char *buf; + + buf = malloc(BUFSIZE); + if (buf == NULL) + return -1; + + /* XXX */ + compressed_prelen = prelen; + if (prelen != 0) + compressed_pre = pre; + else + compressed_pre = NULL; + + while ((bin = fread(buf, 1, BUFSIZE, in)) != 0) { + if (tflag == 0 && (off_t)fwrite(buf, 1, bin, out) != bin) { + free(buf); + return -1; + } + bout += bin; + } + + if (compressed_bytes) + *compressed_bytes = total_compressed_bytes; + + free(buf); + return bout; +} + +static int +zclose(void *zs) +{ + free(zs); + /* We leave the caller to close the fd passed to zdopen() */ + return 0; +} + +FILE * +zdopen(int fd) +{ + struct s_zstate *zs; + + if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL) + return (NULL); + + zs->zs_state = S_START; + + /* XXX we can get rid of some of these */ + zs->zs_hsize = HSIZE; /* For dynamic table sizing. */ + zs->zs_free_ent = 0; /* First unused entry. */ + zs->zs_block_compress = BLOCK_MASK; + zs->zs_clear_flg = 0; /* XXX we calloc()'d this structure why = 0? */ + zs->zs_ratio = 0; + zs->zs_checkpoint = CHECK_GAP; + zs->zs_in_count = 1; /* Length of input. */ + zs->zs_out_count = 0; /* # of codes output (for debugging). */ + zs->u.r.zs_roffset = 0; + zs->u.r.zs_size = 0; + + /* + * Layering compress on top of stdio in order to provide buffering, + * and ensure that reads and write work with the data specified. + */ + if ((zs->zs_fp = fdopen(fd, "r")) == NULL) { + free(zs); + return NULL; + } + + return funopen(zs, zread, NULL, NULL, zclose); +} + +/* + * Decompress read. This routine adapts to the codes in the file building + * the "string" table on-the-fly; requiring no table to be stored in the + * compressed file. The tables used herein are shared with those of the + * compress() routine. See the definitions above. + */ +static int +zread(void *cookie, char *rbp, int num) +{ + u_int count, i; + struct s_zstate *zs; + u_char *bp, header[3]; + + if (num == 0) + return (0); + + zs = cookie; + count = num; + bp = (u_char *)rbp; + switch (zs->zs_state) { + case S_START: + zs->zs_state = S_MIDDLE; + break; + case S_MIDDLE: + goto middle; + case S_EOF: + goto eof; + } + + /* Check the magic number */ + for (i = 0; i < 3 && compressed_prelen; i++, compressed_prelen--) + header[i] = *compressed_pre++; + + if (fread(header + i, 1, sizeof(header) - i, zs->zs_fp) != + sizeof(header) - i || + memcmp(header, magic_header, sizeof(magic_header)) != 0) { + errno = EFTYPE; + return (-1); + } + total_compressed_bytes = 0; + zs->zs_maxbits = header[2]; /* Set -b from file. */ + zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK; + zs->zs_maxbits &= BIT_MASK; + zs->zs_maxmaxcode = 1L << zs->zs_maxbits; + if (zs->zs_maxbits > BITS || zs->zs_maxbits < 12) { + errno = EFTYPE; + return (-1); + } + /* As above, initialize the first 256 entries in the table. */ + zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS); + for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; zs->u.r.zs_code--) { + tab_prefixof(zs->u.r.zs_code) = 0; + tab_suffixof(zs->u.r.zs_code) = (char_type) zs->u.r.zs_code; + } + zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256; + + zs->u.r.zs_oldcode = -1; + zs->u.r.zs_stackp = de_stack; + + while ((zs->u.r.zs_code = getcode(zs)) > -1) { + + if ((zs->u.r.zs_code == CLEAR) && zs->zs_block_compress) { + for (zs->u.r.zs_code = 255; zs->u.r.zs_code >= 0; + zs->u.r.zs_code--) + tab_prefixof(zs->u.r.zs_code) = 0; + zs->zs_clear_flg = 1; + zs->zs_free_ent = FIRST; + zs->u.r.zs_oldcode = -1; + continue; + } + zs->u.r.zs_incode = zs->u.r.zs_code; + + /* Special case for KwKwK string. */ + if (zs->u.r.zs_code >= zs->zs_free_ent) { + if (zs->u.r.zs_code > zs->zs_free_ent || + zs->u.r.zs_oldcode == -1) { + /* Bad stream. */ + errno = EFTYPE; + return (-1); + } + *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar; + zs->u.r.zs_code = zs->u.r.zs_oldcode; + } + /* + * The above condition ensures that code < free_ent. + * The construction of tab_prefixof in turn guarantees that + * each iteration decreases code and therefore stack usage is + * bound by 1 << BITS - 256. + */ + + /* Generate output characters in reverse order. */ + while (zs->u.r.zs_code >= 256) { + *zs->u.r.zs_stackp++ = tab_suffixof(zs->u.r.zs_code); + zs->u.r.zs_code = tab_prefixof(zs->u.r.zs_code); + } + *zs->u.r.zs_stackp++ = zs->u.r.zs_finchar = tab_suffixof(zs->u.r.zs_code); + + /* And put them out in forward order. */ +middle: do { + if (count-- == 0) + return (num); + *bp++ = *--zs->u.r.zs_stackp; + } while (zs->u.r.zs_stackp > de_stack); + + /* Generate the new entry. */ + if ((zs->u.r.zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode && + zs->u.r.zs_oldcode != -1) { + tab_prefixof(zs->u.r.zs_code) = (u_short) zs->u.r.zs_oldcode; + tab_suffixof(zs->u.r.zs_code) = zs->u.r.zs_finchar; + zs->zs_free_ent = zs->u.r.zs_code + 1; + } + + /* Remember previous code. */ + zs->u.r.zs_oldcode = zs->u.r.zs_incode; + } + zs->zs_state = S_EOF; +eof: return (num - count); +} + +/*- + * Read one code from the standard input. If EOF, return -1. + * Inputs: + * stdin + * Outputs: + * code or -1 is returned. + */ +static code_int +getcode(struct s_zstate *zs) +{ + code_int gcode; + int r_off, bits, i; + char_type *bp; + + bp = zs->u.r.zs_gbuf; + if (zs->zs_clear_flg > 0 || zs->u.r.zs_roffset >= zs->u.r.zs_size || + zs->zs_free_ent > zs->zs_maxcode) { + /* + * If the next entry will be too big for the current gcode + * size, then we must increase the size. This implies reading + * a new buffer full, too. + */ + if (zs->zs_free_ent > zs->zs_maxcode) { + zs->zs_n_bits++; + if (zs->zs_n_bits == zs->zs_maxbits) /* Won't get any bigger now. */ + zs->zs_maxcode = zs->zs_maxmaxcode; + else + zs->zs_maxcode = MAXCODE(zs->zs_n_bits); + } + if (zs->zs_clear_flg > 0) { + zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS); + zs->zs_clear_flg = 0; + } + /* XXX */ + for (i = 0; i < zs->zs_n_bits && compressed_prelen; i++, compressed_prelen--) + zs->u.r.zs_gbuf[i] = *compressed_pre++; + zs->u.r.zs_size = fread(zs->u.r.zs_gbuf + i, 1, zs->zs_n_bits - i, zs->zs_fp); + zs->u.r.zs_size += i; + if (zs->u.r.zs_size <= 0) /* End of file. */ + return (-1); + zs->u.r.zs_roffset = 0; + + total_compressed_bytes += zs->u.r.zs_size; + + /* Round size down to integral number of codes. */ + zs->u.r.zs_size = (zs->u.r.zs_size << 3) - (zs->zs_n_bits - 1); + } + r_off = zs->u.r.zs_roffset; + bits = zs->zs_n_bits; + + /* Get to the first byte. */ + bp += (r_off >> 3); + r_off &= 7; + + /* Get first part (low order bits). */ + gcode = (*bp++ >> r_off); + bits -= (8 - r_off); + r_off = 8 - r_off; /* Now, roffset into gcode word. */ + + /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ + if (bits >= 8) { + gcode |= *bp++ << r_off; + r_off += 8; + bits -= 8; + } + + /* High order bits. */ + gcode |= (*bp & rmask[bits]) << r_off; + zs->u.r.zs_roffset += zs->zs_n_bits; + + return (gcode); +} + |