src - FreeBSD source tree

diff options


context:
space:
mode:

author	Rodney W. Grimes <rgrimes@FreeBSD.org>	1994-05-27 12:33:43 +0000
committer	Rodney W. Grimes <rgrimes@FreeBSD.org>	1994-05-27 12:33:43 +0000
commit	9b50d9027575220cb6dd09b3e62f03f511e908b8 (patch)
tree	defc987843071f19a5891a97145437d43cba2af8 /usr.bin/compress
parent	efd31c5952bb9bb2485c109382100127424f8611 (diff)
download	src-9b50d9027575220cb6dd09b3e62f03f511e908b8.tar.gz src-9b50d9027575220cb6dd09b3e62f03f511e908b8.zip

Notes

Diffstat (limited to 'usr.bin/compress')

-rw-r--r--

usr.bin/compress/Makefile

-rw-r--r--

usr.bin/compress/compress.1

172

-rw-r--r--

usr.bin/compress/compress.c

444

-rw-r--r--

usr.bin/compress/doc/NOTES

139

-rw-r--r--

usr.bin/compress/doc/README

283

-rw-r--r--

usr.bin/compress/doc/revision.log

116

-rw-r--r--

usr.bin/compress/zcat.sh

-rw-r--r--

usr.bin/compress/zopen.3

139

-rw-r--r--

usr.bin/compress/zopen.c

740

9 files changed, 2082 insertions, 0 deletions

diff --git a/usr.bin/compress/Makefile b/usr.bin/compress/Makefile
new file mode 100644
index 000000000000..6a0683018cb9
--- /dev/null
+++ b/usr.bin/compress/Makefile

@@ -0,0 +1,12 @@

+# @(#)Makefile 8.2 (Berkeley) 4/17/94

+PROG= compress

+SRCS= compress.c zopen.c

+LINKS= ${BINDIR}/compress ${BINDIR}/uncompress

+MLINKS= compress.1 uncompress.1 compress.1 zcat.1

+afterinstall:

+ install -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \

+ ${.CURDIR}/zcat.sh ${DESTDIR}/usr/bin/zcat

+.include <bsd.prog.mk>

diff --git a/usr.bin/compress/compress.1 b/usr.bin/compress/compress.1
new file mode 100644
index 000000000000..74b7348cf928
--- /dev/null
+++ b/usr.bin/compress/compress.1

@@ -0,0 +1,172 @@

+.\"

+.\" This code is derived from software contributed to Berkeley by

+.\" James A. Woods, derived from original work by Spencer Thomas

+.\" and Joseph Orost.

+.\"

+.\" Redistribution and use in source and binary forms, with or without

+.\" modification, are permitted provided that the following conditions

+.\" are met:

+.\" 1. Redistributions of source code must retain the above copyright

+.\" notice, this list of conditions and the following disclaimer.

+.\" 2. Redistributions in binary form must reproduce the above copyright

+.\" notice, this list of conditions and the following disclaimer in the

+.\" documentation and/or other materials provided with the distribution.

+.\" 3. All advertising materials mentioning features or use of this software

+.\" must display the following acknowledgement:

+.\" This product includes software developed by the University of

+.\" California, Berkeley and its contributors.

+.\" 4. Neither the name of the University nor the names of its contributors

+.\" may be used to endorse or promote products derived from this software

+.\" without specific prior written permission.

+.\"

+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+.\" SUCH DAMAGE.

+.\"

+.\" @(#)compress.1 8.2 (Berkeley) 4/18/94

+.\"

+.Dd April 18, 1994

+.Dt COMPRESS 1

+.Os BSD 4.3

+.Sh NAME

+.Nm compress ,

+.Nm uncompress ,

+.Nm zcat

+.Nd compress and expand data

+.Sh SYNOPSIS

+.Nm compress

+.Op Fl cfv

+.Op Fl b Ar bits

+.Op Ar

+.Nm uncompress

+.Op Fl cfv

+.Op Ar

+.Nm zcat

+.Op Ar

+.Sh DESCRIPTION

+.Nm Compress

+reduces the size of the named files using adaptive Lempel-Ziv coding.

+Each

+.Ar file

+is renamed to the same name plus the extension

+.Dq .Z .

+As many of the modification time, access time, file flags, file mode,

+user ID, and group ID as allowed by permissions are retained in the

+new file.

+If compression would not reduce the size of a

+.Ar file ,

+the file is ignored.

+.Pp

+.Nm Uncompress

+restores the compressed files to their original form, renaming the

+files by deleting the

+.Dq .Z

+extension.

+.Pp

+.Nm Zcat

+is an alias for

+.Dq "uncompress -c" .

+.Pp

+If renaming the files would cause files to be overwritten and the standard

+input device is a terminal, the user is prompted (on the standard error

+output) for confirmation.

+If prompting is not possible or confirmation is not received, the files

+are not overwritten.

+.Pp

+If no files are specified, the standard input is compressed or uncompressed

+to the standard output.

+If either the input and output files are not regular files, the checks for

+reduction in size and file overwriting are not performed, the input file is

+not removed, and the attributes of the input file are not retained.

+.Pp

+The options are as follows:

+.Bl -tag -width Ds

+.It Fl b

+Specify the

+.Ar bits

+code limit (see below).

+.It Fl c

+Compressed or uncompressed output is written to the standard output.

+No files are modified.

+.It Fl f

+Force compression of

+.Ar file ,

+even if it is not actually reduced in size.

+Additionally, files are overwritten without prompting for confirmation.

+.It Fl v

+Print the percentage reduction of each file.

+.El

+.Pp

+.Nm Compress

+uses a modified Lempel-Ziv algorithm.

+Common substrings in the file are first replaced by 9-bit codes 257 and up.

+When code 512 is reached, the algorithm switches to 10-bit codes and

+continues to use more bits until the

+limit specified by the

+.Fl b

+flag is reached (the default is 16).

+.Ar Bits

+must be between 9 and 16.

+.Pp

+After the

+.Ar bits

+limit is reached,

+.Nm compress

+periodically checks the compression ratio.

+If it is increasing,

+.Nm compress

+continues to use the existing code dictionary.

+However, if the compression ratio decreases,

+.Nm compress

+discards the table of substrings and rebuilds it from scratch. This allows

+the algorithm to adapt to the next "block" of the file.

+.Pp

+The

+.Fl b

+flag is omitted for

+.Ar uncompress

+since the

+.Ar bits

+parameter specified during compression

+is encoded within the output, along with

+a magic number to ensure that neither decompression of random data nor

+recompression of compressed data is attempted.

+.Pp

+.ne 8

+The amount of compression obtained depends on the size of the

+input, the number of

+.Ar bits

+per code, and the distribution of common substrings.

+Typically, text such as source code or English is reduced by 50\-60%.

+Compression is generally much better than that achieved by Huffman

+coding (as used in the historical command pack), or adaptive Huffman

+coding (as used in the historical command compact), and takes less

+time to compute.

+.Pp

+The

+.Nm compress

+utility exits 0 on success, and >0 if an error occurs.

+.Sh SEE ALSO

+.Rs

+.%A Welch, Terry A.

+.%D June, 1984

+.%T "A Technique for High Performance Data Compression"

+.%J "IEEE Computer"

+.%V 17:6

+.%P pp. 8-19

+.Re

+.Sh HISTORY

+The

+.Nm

+command appeared in

+.Bx 4.3 .

diff --git a/usr.bin/compress/compress.c b/usr.bin/compress/compress.c
new file mode 100644
index 000000000000..d66d22433dcb
--- /dev/null
+++ b/usr.bin/compress/compress.c

@@ -0,0 +1,444 @@

+/*-

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ * 3. All advertising materials mentioning features or use of this software

+ * must display the following acknowledgement:

+ * This product includes software developed by the University of

+ * California, Berkeley and its contributors.

+ * 4. Neither the name of the University nor the names of its contributors

+ * may be used to endorse or promote products derived from this software

+ * without specific prior written permission.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+ * SUCH DAMAGE.

+ */

+#ifndef lint

+static char copyright[] =

+#endif /* not lint */

+#ifndef lint

+static char sccsid[] = "@(#)compress.c 8.2 (Berkeley) 1/7/94";

+#endif /* not lint */

+#include <sys/param.h>

+#include <sys/time.h>

+#include <sys/stat.h>

+#include <err.h>

+#include <errno.h>

+#include <stdio.h>

+#include <stdlib.h>

+#include <string.h>

+#include <unistd.h>

+#ifdef __STDC__

+#include <stdarg.h>

+#else

+#include <varargs.h>

+#endif

+void compress __P((char *, char *, int));

+void cwarn __P((const char *, ...));

+void cwarnx __P((const char *, ...));

+void decompress __P((char *, char *, int));

+int permission __P((char *));

+void setfile __P((char *, struct stat *));

+void usage __P((int));

+int eval, force, verbose;

+int

+main(argc, argv)

+ int argc;

+ char *argv[];

+ enum {COMPRESS, DECOMPRESS} style;

+ size_t len;

+ int bits, cat, ch;

+ char *p, newname[MAXPATHLEN];

+ if ((p = rindex(argv[0], '/')) == NULL)

+ p = argv[0];

+ else

+ ++p;

+ if (!strcmp(p, "uncompress"))

+ style = DECOMPRESS;

+ else if (!strcmp(p, "compress"))

+ style = COMPRESS;

+ else

+ errx(1, "unknown program name");

+ bits = cat = 0;

+ while ((ch = getopt(argc, argv, "b:cdfv")) != EOF)

+ switch(ch) {

+ case 'b':

+ bits = strtol(optarg, &p, 10);

+ if (*p)

+ errx(1, "illegal bit count -- %s", optarg);

+ break;

+ case 'c':

+ cat = 1;

+ break;

+ case 'd': /* Backward compatible. */

+ style = DECOMPRESS;

+ break;

+ case 'f':

+ force = 1;

+ break;

+ case 'v':

+ verbose = 1;

+ break;

+ case '?':

+ default:

+ usage(style == COMPRESS);

+ }

+ argc -= optind;

+ argv += optind;

+ if (argc == 0) {

+ switch(style) {

+ case COMPRESS:

+ (void)compress("/dev/stdin", "/dev/stdout", bits);

+ break;

+ case DECOMPRESS:

+ (void)decompress("/dev/stdin", "/dev/stdout", bits);

+ break;

+ }

+ exit (eval);

+ }

+ if (cat == 1 && argc > 1)

+ errx(1, "the -c option permits only a single file argument");

+ for (; *argv; ++argv)

+ switch(style) {

+ case COMPRESS:

+ if (cat) {

+ compress(*argv, "/dev/stdout", bits);

+ break;

+ }

+ if ((p = rindex(*argv, '.')) != NULL &&

+ !strcmp(p, ".Z")) {

+ cwarnx("%s: name already has trailing .Z",

+ *argv);

+ break;

+ }

+ len = strlen(*argv);

+ if (len > sizeof(newname) - 3) {

+ cwarnx("%s: name too long", *argv);

+ break;

+ }

+ memmove(newname, *argv, len);

+ newname[len] = '.';

+ newname[len + 1] = 'Z';

+ newname[len + 2] = '\0';

+ compress(*argv, newname, bits);

+ break;

+ case DECOMPRESS:

+ len = strlen(*argv);

+ if ((p = rindex(*argv, '.')) == NULL ||

+ strcmp(p, ".Z")) {

+ if (len > sizeof(newname) - 3) {

+ cwarnx("%s: name too long", *argv);

+ break;

+ }

+ memmove(newname, *argv, len);

+ newname[len] = '.';

+ newname[len + 1] = 'Z';

+ newname[len + 2] = '\0';

+ decompress(newname,

+ cat ? "/dev/stdout" : *argv, bits);

+ } else {

+ if (len - 2 > sizeof(newname) - 1) {

+ cwarnx("%s: name too long", *argv);

+ break;

+ }

+ memmove(newname, *argv, len - 2);

+ newname[len - 2] = '\0';

+ decompress(*argv,

+ cat ? "/dev/stdout" : newname, bits);

+ }

+ break;

+ }

+ exit (eval);

+void

+compress(in, out, bits)

+ char *in, *out;

+ int bits;

+ register int nr;

+ struct stat isb, sb;

+ FILE *ifp, *ofp;

+ int exists, isreg, oreg;

+ u_char buf[1024];

+ exists = !stat(out, &sb);

+ if (!force && exists && S_ISREG(sb.st_mode) && !permission(out))

+ return;

+ isreg = oreg = !exists || S_ISREG(sb.st_mode);

+ ifp = ofp = NULL;

+ if ((ifp = fopen(in, "r")) == NULL) {

+ cwarn("%s", in);

+ return;

+ }

+ if (stat(in, &isb)) { /* DON'T FSTAT! */

+ cwarn("%s", in);

+ goto err;

+ }

+ if (!S_ISREG(isb.st_mode))

+ isreg = 0;

+ if ((ofp = zopen(out, "w", bits)) == NULL) {

+ cwarn("%s", out);

+ goto err;

+ }

+ while ((nr = fread(buf, 1, sizeof(buf), ifp)) != 0)

+ if (fwrite(buf, 1, nr, ofp) != nr) {

+ cwarn("%s", out);

+ goto err;

+ }

+ if (ferror(ifp) || fclose(ifp)) {

+ cwarn("%s", in);

+ goto err;

+ }

+ ifp = NULL;

+ if (fclose(ofp)) {

+ cwarn("%s", out);

+ goto err;

+ }

+ ofp = NULL;

+ if (isreg) {

+ if (stat(out, &sb)) {

+ cwarn("%s", out);

+ goto err;

+ }

+ if (!force && sb.st_size >= isb.st_size) {

+ if (verbose)

+ (void)printf("%s: file would grow; left unmodified\n", in);

+ if (unlink(out))

+ cwarn("%s", out);

+ goto err;

+ }

+ setfile(out, &isb);

+ if (unlink(in))

+ cwarn("%s", in);

+ if (verbose) {

+ (void)printf("%s: ", out);

+ if (isb.st_size > sb.st_size)

+ (void)printf("%.0f%% compression\n",

+ ((float)sb.st_size / isb.st_size) * 100.0);

+ else

+ (void)printf("%.0f%% expansion\n",

+ ((float)isb.st_size / sb.st_size) * 100.0);

+ }

+ return;

+err: if (ofp) {

+ if (oreg)

+ (void)unlink(out);

+ (void)fclose(ofp);

+ }

+ if (ifp)

+ (void)fclose(ifp);

+void

+decompress(in, out, bits)

+ char *in, *out;

+ int bits;

+ register int nr;

+ struct stat sb;

+ FILE *ifp, *ofp;

+ int exists, isreg, oreg;

+ u_char buf[1024];

+ exists = !stat(out, &sb);

+ if (!force && exists && S_ISREG(sb.st_mode) && !permission(out))

+ return;

+ isreg = oreg = !exists || S_ISREG(sb.st_mode);

+ ifp = ofp = NULL;

+ if ((ofp = fopen(out, "w")) == NULL) {

+ cwarn("%s", out);

+ return;

+ }

+ if ((ifp = zopen(in, "r", bits)) == NULL) {

+ cwarn("%s", in);

+ goto err;

+ }

+ if (stat(in, &sb)) {

+ cwarn("%s", in);

+ goto err;

+ }

+ if (!S_ISREG(sb.st_mode))

+ isreg = 0;

+ while ((nr = fread(buf, 1, sizeof(buf), ifp)) != 0)

+ if (fwrite(buf, 1, nr, ofp) != nr) {

+ cwarn("%s", out);

+ goto err;

+ }

+ if (ferror(ifp) || fclose(ifp)) {

+ cwarn("%s", in);

+ goto err;

+ }

+ ifp = NULL;

+ if (fclose(ofp)) {

+ cwarn("%s", out);

+ goto err;

+ }

+ if (isreg) {

+ setfile(out, &sb);

+ if (unlink(in))

+ cwarn("%s", in);

+ }

+ return;

+err: if (ofp) {

+ if (oreg)

+ (void)unlink(out);

+ (void)fclose(ofp);

+ }

+ if (ifp)

+ (void)fclose(ifp);

+void

+setfile(name, fs)

+ char *name;

+ register struct stat *fs;

+ static struct timeval tv[2];

+ fs->st_mode &= S_ISUID|S_ISGID|S_IRWXU|S_IRWXG|S_IRWXO;

+ TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atimespec);

+ TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtimespec);

+ if (utimes(name, tv))

+ cwarn("utimes: %s", name);

+ /*

+ * Changing the ownership probably won't succeed, unless we're root

+ * or POSIX_CHOWN_RESTRICTED is not set. Set uid/gid before setting

+ * the mode; current BSD behavior is to remove all setuid bits on

+ * chown. If chown fails, lose setuid/setgid bits.

+ */

+ if (chown(name, fs->st_uid, fs->st_gid)) {

+ if (errno != EPERM)

+ cwarn("chown: %s", name);

+ fs->st_mode &= ~(S_ISUID|S_ISGID);

+ }

+ if (chmod(name, fs->st_mode))

+ cwarn("chown: %s", name);

+ if (chflags(name, fs->st_flags))

+ cwarn("chflags: %s", name);

+int

+permission(fname)

+ char *fname;

+ int ch, first;

+ if (!isatty(fileno(stderr)))

+ return (0);

+ (void)fprintf(stderr, "overwrite %s? ", fname);

+ first = ch = getchar();

+ while (ch != '\n' && ch != EOF)

+ ch = getchar();

+ return (first == 'y');

+void

+usage(iscompress)

+ int iscompress;

+ if (iscompress)

+ (void)fprintf(stderr,

+ "usage: compress [-cfv] [-b bits] [file ...]\n");

+ else

+ (void)fprintf(stderr,

+ "usage: uncompress [-c] [-b bits] [file ...]\n");

+ exit(1);

+void

+#if __STDC__

+cwarnx(const char *fmt, ...)

+#else

+cwarnx(fmt, va_alist)

+ int eval;

+ const char *fmt;

+ va_dcl

+#endif

+ va_list ap;

+#if __STDC__

+ va_start(ap, fmt);

+#else

+ va_start(ap);

+#endif

+ vwarnx(fmt, ap);

+ va_end(ap);

+ eval = 1;

+void

+#if __STDC__

+cwarn(const char *fmt, ...)

+#else

+cwarn(fmt, va_alist)

+ int eval;

+ const char *fmt;

+ va_dcl

+#endif

+ va_list ap;

+#if __STDC__

+ va_start(ap, fmt);

+#else

+ va_start(ap);

+#endif

+ vwarn(fmt, ap);

+ va_end(ap);

+ eval = 1;

diff --git a/usr.bin/compress/doc/NOTES b/usr.bin/compress/doc/NOTES
new file mode 100644
index 000000000000..7c28c9c22b4f
--- /dev/null
+++ b/usr.bin/compress/doc/NOTES

@@ -0,0 +1,139 @@

+From: James A. Woods <jaw@eos.arc.nasa.gov>

+>From vn Fri Dec 2 18:05:27 1988

+Subject: Re: Looking for C source for RSA

+Newsgroups: sci.crypt

+# Illegitimi noncarborundum

+Patents are a tar pit.

+A good case can be made that most are just a license to sue, and nothing

+is illegal until a patent is upheld in court.

+For example, if you receive netnews by means other than 'nntp',

+these very words are being modulated by 'compress',

+a variation on the patented Lempel-Ziv-Welch algorithm.

+Original Ziv-Lempel is patent number 4,464,650, and the more powerful

+LZW method is #4,558,302. Yet despite any similarities between 'compress'

+and LZW (the public-domain 'compress' code was designed and given to the

+world before the ink on the Welch patent was dry), no attorneys from Sperry

+(the assignee) have asked you to unplug your Usenet connection.

+Why? I can't speak for them, but it is possible the claims are too broad,

+or, just as bad, not broad enough. ('compress' does things not mentioned

+in the Welch patent.) Maybe they realize that they can commercialize

+LZW better by selling hardware implementations rather than by licensing

+software. Again, the LZW software delineated in the patent is *not*

+the same as that of 'compress'.

+At any rate, court-tested software patents are a different animal;

+corporate patents in a portfolio are usually traded like baseball cards

+to shut out small fry rather than actually be defended before

+non-technical juries. Perhaps RSA will undergo this test successfully,

+although the grant to "exclude others from making, using, or selling"

+the invention would then only apply to the U.S. (witness the

+Genentech patent of the TPA molecule in the U.S. but struck down

+in Great Britain as too broad.)

+The concept is still exotic for those who learned in school the rule of thumb

+that one may patent "apparatus" but not an "idea".

+Apparently this all changed in Diamond v. Diehr (1981) when the U. S. Supreme

+Court reversed itself.

+Scholars should consult the excellent article in the Washington and Lee

+Law Review (fall 1984, vol. 41, no. 4) by Anthony and Colwell for a

+comprehensive survey of an area which will remain murky for some time.

+Until the dust clears, how you approach ideas which are patented depends

+on how paranoid you are of a legal onslaught. Arbitrary? Yes. But

+the patent bar the the CCPA (Court of Customs and Patent Appeals)

+thanks you for any uncertainty as they, at least, stand to gain

+from any trouble.

+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

+From: James A. Woods <jaw@eos.arc.nasa.gov>

+Subject: Re: Looking for C source for RSA (actually 'compress' patents)

+ In article <2042@eos.UUCP> you write:

+ >The concept is still exotic for those who learned in school the rule of thumb

+ >that one may patent "apparatus" but not an "idea".

+A rule of thumb that has never been completely valid, as any chemical

+engineer can tell you. (Chemical processes were among the earliest patents,

+as I recall.)

+ ah yes -- i date myself when relaying out-of-date advice from elderly

+ attorneys who don't even specialize in patents. one other interesting

+ class of patents include the output of optical lens design programs,

+ which yield formulae which can then fairly directly can be molded

+ into glass. although there are restrictions on patenting equations,

+ the "embedded systems" seem to fly past the legal gauntlets.

+ anyway, i'm still learning about intellectual property law after

+ several conversations from a unisys (nee sperry) lawyer re 'compress'.

+ it's more complicated than this, but they're letting (oral

+ communication only) software versions of 'compress' slide

+ as far as licensing fees go. this includes 'arc', 'stuffit',

+ and other commercial wrappers for 'compress'. yet they are

+ signing up licensees for hardware chips. hewlett-packard

+ supposedly has an active vlsi project, and unisys has

+ board-level lzw-based tape controllers. (to build lzw into

+ a disk controller would be strange, as you'd have to build

+ in a filesystem too!)

+ it's byzantine

+ that unisys is in a tiff with hp regarding the patents,

+ after discovering some sort of "compress" button on some

+ hp terminal product. why? well, professor abraham lempel jumped

+ from being department chairman of computer science at technion in

+ israel to sperry (where he got the first patent), but then to work

+ at hewlett-packard on sabbatical. the second welch patent

+ is only weakly derivative of the first, so they want chip

+ licenses and hp relented. however, everyone agrees something

+ like the current unix implementation is the way to go with

+ software, so hp (and ucb) long ago asked spencer thomas and i to sign

+ off on copyright permission (although they didn't need to, it being pd).

+ lempel, hp, and unisys grumbles they can't make money off the

+ software since a good free implementation (not the best --

+ i have more ideas!) escaped via usenet. (lempel's own pascal

+ code was apparently horribly slow.)

+ i don't follow the ibm 'arc' legal bickering; my impression

+ is that the pc folks are making money off the archiver/wrapper

+ look/feel of the thing [if ms-dos can be said to have a look and feel].

+ now where is telebit with the compress firmware? in a limbo

+ netherworld, probably, with sperry still welcoming outfits

+ to sign patent licenses, a common tactic to bring other small fry

+ into the fold. the guy who crammed 12-bit compess into the modem

+ there left. also what is transpiring with 'compress' and sys 5 rel 4?

+ beats me, but if sperry got a hold of them on these issues,

+ at&t would likely re-implement another algorithm if they

+ thought 'compress' infringes. needful to say, i don't think

+ it does after the abovementioned legal conversation.

+ my own beliefs on whether algorithms should be patentable at all

+ change with the weather. if the courts finally nail down

+ patent protection for algorithms, academic publication in

+ textbooks will be somewhat at odds with the engineering world,

+ where the textbook codes will simply be a big tease to get

+ money into the patent holder coffers...

+ oh, if you implement lzw from the patent, you won't get

+ good rates because it doesn't mention adaptive table reset,

+ lack thereof being *the* serious deficiency of thomas' first version.

+ now i know that patent law generally protects against independent

+ re-invention (like the 'xor' hash function pleasantly mentioned

+ in the patent [but not the paper]).

+ but the upshot is that if anyone ever wanted to sue us,

+ we're partially covered with

+ independently-developed twists, plus the fact that some of us work

+ in a bureacratic morass (as contractor to a public agency in my case).

+ quite a mess, huh? i've wanted to tell someone this stuff

+ for a long time, for posterity if nothing else.

+james

diff --git a/usr.bin/compress/doc/README b/usr.bin/compress/doc/README
new file mode 100644
index 000000000000..6803287c207a
--- /dev/null
+++ b/usr.bin/compress/doc/README

@@ -0,0 +1,283 @@

+ @(#)README 8.1 (Berkeley) 6/9/93

+Compress version 4.0 improvements over 3.0:

+ o compress() speedup (10-50%) by changing division hash to xor

+ o decompress() speedup (5-10%)

+ o Memory requirements reduced (3-30%)

+ o Stack requirements reduced to less than 4kb

+ o Removed 'Big+Fast' compress code (FBITS) because of compress speedup

+ o Portability mods for Z8000 and PC/XT (but not zeus 3.2)

+ o Default to 'quiet' mode

+ o Unification of 'force' flags

+ o Manual page overhaul

+ o Portability enhancement for M_XENIX

+ o Removed text on #else and #endif

+ o Added "-V" switch to print version and options

+ o Added #defines for SIGNED_COMPARE_SLOW

+ o Added Makefile and "usermem" program

+ o Removed all floating point computations

+ o New programs: [deleted]

+The "usermem" script attempts to determine the maximum process size. Some

+editing of the script may be necessary (see the comments). [It should work

+fine on 4.3 bsd.] If you can't get it to work at all, just create file

+"USERMEM" containing the maximum process size in decimal.

+The following preprocessor symbols control the compilation of "compress.c":

+ o USERMEM Maximum process memory on the system

+ o SACREDMEM Amount to reserve for other proceses

+ o SIGNED_COMPARE_SLOW Unsigned compare instructions are faster

+ o NO_UCHAR Don't use "unsigned char" types

+ o BITS Overrules default set by USERMEM-SACREDMEM

+ o vax Generate inline assembler

+ o interdata Defines SIGNED_COMPARE_SLOW

+ o M_XENIX Makes arrays < 65536 bytes each

+ o pdp11 BITS=12, NO_UCHAR

+ o z8000 BITS=12

+ o pcxt BITS=12

+ o BSD4_2 Allow long filenames ( > 14 characters) &

+ Call setlinebuf(stderr)

+The difference "usermem-sacredmem" determines the maximum BITS that can be

+specified with the "-b" flag.

+memory: at least BITS

+------ -- ----- ----

+ 433,484 16

+ 229,600 15

+ 127,536 14

+ 73,464 13

+ 0 12

+The default is BITS=16.

+The maximum bits can be overrulled by specifying "-DBITS=bits" at

+compilation time.

+WARNING: files compressed on a large machine with more bits than allowed by

+a version of compress on a smaller machine cannot be decompressed! Use the

+"-b12" flag to generate a file on a large machine that can be uncompressed

+on a 16-bit machine.

+The output of compress 4.0 is fully compatible with that of compress 3.0.

+In other words, the output of compress 4.0 may be fed into uncompress 3.0 or

+the output of compress 3.0 may be fed into uncompress 4.0.

+The output of compress 4.0 not compatible with that of

+compress 2.0. However, compress 4.0 still accepts the output of

+compress 2.0. To generate output that is compatible with compress

+2.0, use the undocumented "-C" flag.

+ -from mod.sources, submitted by vax135!petsd!joe (Joe Orost), 8/1/85

+--------------------------------

+Enclosed is compress version 3.0 with the following changes:

+1. "Block" compression is performed. After the BITS run out, the

+ compression ratio is checked every so often. If it is decreasing,

+ the table is cleared and a new set of substrings are generated.

+ This makes the output of compress 3.0 not compatible with that of

+ compress 2.0. However, compress 3.0 still accepts the output of

+ compress 2.0. To generate output that is compatible with compress

+ 2.0, use the undocumented "-C" flag.

+2. A quiet "-q" flag has been added for use by the news system.

+3. The character chaining has been deleted and the program now uses

+ hashing. This improves the speed of the program, especially

+ during decompression. Other speed improvements have been made,

+ such as using putc() instead of fwrite().

+4. A large table is used on large machines when a relatively small

+ number of bits is specified. This saves much time when compressing

+ for a 16-bit machine on a 32-bit virtual machine. Note that the

+ speed improvement only occurs when the input file is > 30000

+ characters, and the -b BITS is less than or equal to the cutoff

+ described below.

+Most of these changes were made by James A. Woods (ames!jaw). Thank you

+James!

+To compile compress:

+ cc -O -DUSERMEM=usermem -o compress compress.c

+Where "usermem" is the amount of physical user memory available (in bytes).

+If any physical memory is to be reserved for other processes, put in

+"-DSACREDMEM sacredmem", where "sacredmem" is the amount to be reserved.

+The difference "usermem-sacredmem" determines the maximum BITS that can be

+specified, and the cutoff bits where the large+fast table is used.

+memory: at least BITS cutoff

+------ -- ----- ---- ------

+ 4,718,592 16 13

+ 2,621,440 16 12

+ 1,572,864 16 11

+ 1,048,576 16 10

+ 631,808 16 --

+ 329,728 15 --

+ 178,176 14 --

+ 99,328 13 --

+ 0 12 --

+The default memory size is 750,000 which gives a maximum BITS=16 and no

+large+fast table.

+The maximum bits can be overruled by specifying "-DBITS=bits" at

+compilation time.

+If your machine doesn't support unsigned characters, define "NO_UCHAR"

+when compiling.

+If your machine has "int" as 16-bits, define "SHORT_INT" when compiling.

+After compilation, move "compress" to a standard executable location, such

+as /usr/local. Then:

+ cd /usr/local

+ ln compress uncompress

+ ln compress zcat

+On machines that have a fixed stack size (such as Perkin-Elmer), set the

+stack to at least 12kb. ("setstack compress 12" on Perkin-Elmer).

+Next, install the manual (compress.l).

+ cp compress.l /usr/man/manl

+ cd /usr/man/manl

+ ln compress.l uncompress.l

+ ln compress.l zcat.l

+ - or -

+ cp compress.l /usr/man/man1/compress.1

+ cd /usr/man/man1

+ ln compress.1 uncompress.1

+ ln compress.1 zcat.1

+ regards,

+ petsd!joe

+Here is a note from the net:

+>From hplabs!pesnta!amd!turtlevax!ken Sat Jan 5 03:35:20 1985

+Path: ames!hplabs!pesnta!amd!turtlevax!ken

+From: ken@turtlevax.UUCP (Ken Turkowski)

+Newsgroups: net.sources

+Subject: Re: Compress release 3.0 : sample Makefile

+Organization: CADLINC, Inc. @ Menlo Park, CA

+In the compress 3.0 source recently posted to mod.sources, there is a

+#define variable which can be set for optimum performance on a machine

+with a large amount of memory. A program (usermem) to calculate the

+useable amount of physical user memory is enclosed, as well as a sample

+4.2bsd Vax Makefile for compress.

+Here is the README file from the previous version of compress (2.0):

+>Enclosed is compress.c version 2.0 with the following bugs fixed:

+>1. The packed files produced by compress are different on different

+> machines and dependent on the vax sysgen option.

+> The bug was in the different byte/bit ordering on the

+> various machines. This has been fixed.

+> This version is NOT compatible with the original vax posting

+> unless the '-DCOMPATIBLE' option is specified to the C

+> compiler. The original posting has a bug which I fixed,

+> causing incompatible files. I recommend you NOT to use this

+> option unless you already have a lot of packed files from

+> the original posting by thomas.

+>2. The exit status is not well defined (on some machines) causing the

+> scripts to fail.

+> The exit status is now 0,1 or 2 and is documented in

+> compress.l.

+>3. The function getopt() is not available in all C libraries.

+> The function getopt() is no longer referenced by the

+> program.

+>4. Error status is not being checked on the fwrite() and fflush() calls.

+> Fixed.

+>The following enhancements have been made:

+>1. Added facilities of "compact" into the compress program. "Pack",

+> "Unpack", and "Pcat" are no longer required (no longer supplied).

+>2. Installed work around for C compiler bug with "-O".

+>3. Added a magic number header (\037\235). Put the bits specified

+> in the file.

+>4. Added "-f" flag to force overwrite of output file.

+>5. Added "-c" flag and "zcat" program. 'ln compress zcat' after you

+> compile.

+>6. The 'uncompress' script has been deleted; simply

+> 'ln compress uncompress' after you compile and it will work.

+>7. Removed extra bit masking for machines that support unsigned

+> characters. If your machine doesn't support unsigned characters,

+> define "NO_UCHAR" when compiling.

+>Compile "compress.c" with "-O -o compress" flags. Move "compress" to a

+>standard executable location, such as /usr/local. Then:

+> cd /usr/local

+> ln compress uncompress

+> ln compress zcat

+>On machines that have a fixed stack size (such as Perkin-Elmer), set the

+>stack to at least 12kb. ("setstack compress 12" on Perkin-Elmer).

+>Next, install the manual (compress.l).

+> cp compress.l /usr/man/manl - or -

+> cp compress.l /usr/man/man1/compress.1

+>Here is the README that I sent with my first posting:

+>>Enclosed is a modified version of compress.c, along with scripts to make it

+>>run identically to pack(1), unpack(1), an pcat(1). Here is what I

+>>(petsd!joe) and a colleague (petsd!peora!srd) did:

+>>

+>>1. Removed VAX dependencies.

+>>2. Changed the struct to separate arrays; saves mucho memory.

+>>3. Did comparisons in unsigned, where possible. (Faster on Perkin-Elmer.)

+>>4. Sorted the character next chain and changed the search to stop

+>>prematurely. This saves a lot on the execution time when compressing.

+>>

+>>This version is totally compatible with the original version. Even though

+>>lint(1) -p has no complaints about compress.c, it won't run on a 16-bit

+>>machine, due to the size of the arrays.

+>>

+>>Here is the README file from the original author:

+>>

+>>>Well, with all this discussion about file compression (for news batching

+>>>in particular) going around, I decided to implement the text compression

+>>>algorithm described in the June Computer magazine. The author claimed

+>>>blinding speed and good compression ratios. It's certainly faster than

+>>>compact (but, then, what wouldn't be), but it's also the same speed as

+>>>pack, and gets better compression than both of them. On 350K bytes of

+>>>unix-wizards, compact took about 8 minutes of CPU, pack took about 80

+>>>seconds, and compress (herein) also took 80 seconds. But, compact and

+>>>pack got about 30% compression, whereas compress got over 50%. So, I

+>>>decided I had something, and that others might be interested, too.

+>>>

+>>>As is probably true of compact and pack (although I haven't checked),

+>>>the byte order within a word is probably relevant here, but as long as

+>>>you stay on a single machine type, you should be ok. (Can anybody

+>>>elucidate on this?) There are a couple of asm's in the code (extv and

+>>>insv instructions), so anyone porting it to another machine will have to

+>>>deal with this anyway (and could probably make it compatible with Vax

+>>>byte order at the same time). Anyway, I've linted the code (both with

+>>>and without -p), so it should run elsewhere. Note the longs in the

+>>>code, you can take these out if you reduce BITS to <= 15.

+>>>

+>>>Have fun, and as always, if you make good enhancements, or bug fixes,

+>>>I'd like to see them.

+>>>

+>>>=Spencer (thomas@utah-20, {harpo,hplabs,arizona}!utah-cs!thomas)

+>>

+>> regards,

+>> joe

+>>

+>>--

+>>Full-Name: Joseph M. Orost

+>>UUCP: ..!{decvax,ucbvax,ihnp4}!vax135!petsd!joe

+>>US Mail: MS 313; Perkin-Elmer; 106 Apple St; Tinton Falls, NJ 07724

+>>Phone: (201) 870-5844

diff --git a/usr.bin/compress/doc/revision.log b/usr.bin/compress/doc/revision.log
new file mode 100644
index 000000000000..b1d8b24cc4f0
--- /dev/null
+++ b/usr.bin/compress/doc/revision.log

@@ -0,0 +1,116 @@

+/*

+ * $Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $

+ * $Log: compress.c,v $

+ * Revision 4.0 85/07/30 12:50:00 joe

+ * Removed ferror() calls in output routine on every output except first.

+ * Prepared for release to the world.

+ *

+ * Revision 3.6 85/07/04 01:22:21 joe

+ * Remove much wasted storage by overlaying hash table with the tables

+ * used by decompress: tab_suffix[1<<BITS], stack[8000]. Updated USERMEM

+ * computations. Fixed dump_tab() DEBUG routine.

+ *

+ * Revision 3.5 85/06/30 20:47:21 jaw

+ * Change hash function to use exclusive-or. Rip out hash cache. These

+ * speedups render the megamemory version defunct, for now. Make decoder

+ * stack global. Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply.

+ *

+ * Revision 3.4 85/06/27 12:00:00 ken

+ * Get rid of all floating-point calculations by doing all compression ratio

+ * calculations in fixed point.

+ *

+ * Revision 3.3 85/06/24 21:53:24 joe

+ * Incorporate portability suggestion for M_XENIX. Got rid of text on #else

+ * and #endif lines. Cleaned up #ifdefs for vax and interdata.

+ *

+ * Revision 3.2 85/06/06 21:53:24 jaw

+ * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list.

+ * Default to "quiet" output (no compression statistics).

+ *

+ * Revision 3.1 85/05/12 18:56:13 jaw

+ * Integrate decompress() stack speedups (from early pointer mods by McKie).

+ * Repair multi-file USERMEM gaffe. Unify 'force' flags to mimic semantics

+ * of SVR2 'pack'. Streamline block-compress table clear logic. Increase

+ * output byte count by magic number size.

+ *

+ * Revision 3.0 84/11/27 11:50:00 petsd!joe

+ * Set HSIZE depending on BITS. Set BITS depending on USERMEM. Unrolled

+ * loops in clear routines. Added "-C" flag for 2.0 compatibility. Used

+ * unsigned compares on Perkin-Elmer. Fixed foreground check.

+ *

+ * Revision 2.7 84/11/16 19:35:39 ames!jaw

+ * Cache common hash codes based on input statistics; this improves

+ * performance for low-density raster images. Pass on #ifdef bundle

+ * from Turkowski.

+ *

+ * Revision 2.6 84/11/05 19:18:21 ames!jaw

+ * Vary size of hash tables to reduce time for small files.

+ * Tune PDP-11 hash function.

+ *

+ * Revision 2.5 84/10/30 20:15:14 ames!jaw

+ * Junk chaining; replace with the simpler (and, on the VAX, faster)

+ * double hashing, discussed within. Make block compression standard.

+ *

+ * Revision 2.4 84/10/16 11:11:11 ames!jaw

+ * Introduce adaptive reset for block compression, to boost the rate

+ * another several percent. (See mailing list notes.)

+ *

+ * Revision 2.3 84/09/22 22:00:00 petsd!joe

+ * Implemented "-B" block compress. Implemented REVERSE sorting of tab_next.

+ * Bug fix for last bits. Changed fwrite to putchar loop everywhere.

+ *

+ * Revision 2.2 84/09/18 14:12:21 ames!jaw

+ * Fold in news changes, small machine typedef from thomas,

+ * #ifdef interdata from joe.

+ *

+ * Revision 2.1 84/09/10 12:34:56 ames!jaw

+ * Configured fast table lookup for 32-bit machines.

+ * This cuts user time in half for b <= FBITS, and is useful for news batching

+ * from VAX to PDP sites. Also sped up decompress() [fwrite->putc] and

+ * added signal catcher [plus beef in writeerr()] to delete effluvia.

+ *

+ * Revision 2.0 84/08/28 22:00:00 petsd!joe

+ * Add check for foreground before prompting user. Insert maxbits into

+ * compressed file. Force file being uncompressed to end with ".Z".

+ * Added "-c" flag and "zcat". Prepared for release.

+ *

+ * Revision 1.10 84/08/24 18:28:00 turtlevax!ken

+ * Will only compress regular files (no directories), added a magic number

+ * header (plus an undocumented -n flag to handle old files without headers),

+ * added -f flag to force overwriting of possibly existing destination file,

+ * otherwise the user is prompted for a response. Will tack on a .Z to a

+ * filename if it doesn't have one when decompressing. Will only replace

+ * file if it was compressed.

+ *

+ * Revision 1.9 84/08/16 17:28:00 turtlevax!ken

+ * Removed scanargs(), getopt(), added .Z extension and unlimited number of

+ * filenames to compress. Flags may be clustered (-Ddvb12) or separated

+ * (-D -d -v -b 12), or combination thereof. Modes and other status is

+ * copied with copystat(). -O bug for 4.2 seems to have disappeared with

+ * 1.8.

+ *

+ * Revision 1.8 84/08/09 23:15:00 joe

+ * Made it compatible with vax version, installed jim's fixes/enhancements

+ *

+ * Revision 1.6 84/08/01 22:08:00 joe

+ * Sped up algorithm significantly by sorting the compress chain.

+ *

+ * Revision 1.5 84/07/13 13:11:00 srd

+ * Added C version of vax asm routines. Changed structure to arrays to

+ * save much memory. Do unsigned compares where possible (faster on

+ * Perkin-Elmer)

+ *

+ * Revision 1.4 84/07/05 03:11:11 thomas

+ * Clean up the code a little and lint it. (Lint complains about all

+ * the regs used in the asm, but I'm not going to "fix" this.)

+ *

+ * Revision 1.3 84/07/05 02:06:54 thomas

+ * Minor fixes.

+ *

+ * Revision 1.2 84/07/05 00:27:27 thomas

+ * Add variable bit length output.

+ *

+ */

+static char rcs_ident[] =

+ "$Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $";

diff --git a/usr.bin/compress/zcat.sh b/usr.bin/compress/zcat.sh
new file mode 100644
index 000000000000..c4931e486bb0
--- /dev/null
+++ b/usr.bin/compress/zcat.sh

@@ -0,0 +1,37 @@

+#!/bin/sh -

+# Redistribution and use in source and binary forms, with or without

+# modification, are permitted provided that the following conditions

+# are met:

+# 1. Redistributions of source code must retain the above copyright

+# notice, this list of conditions and the following disclaimer.

+# 2. Redistributions in binary form must reproduce the above copyright

+# notice, this list of conditions and the following disclaimer in the

+# documentation and/or other materials provided with the distribution.

+# 3. All advertising materials mentioning features or use of this software

+# must display the following acknowledgement:

+# This product includes software developed by the University of

+# California, Berkeley and its contributors.

+# 4. Neither the name of the University nor the names of its contributors

+# may be used to endorse or promote products derived from this software

+# without specific prior written permission.

+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+# SUCH DAMAGE.

+# @(#)zcat.sh 8.1 (Berkeley) 6/6/93

+uncompress -c $*

diff --git a/usr.bin/compress/zopen.3 b/usr.bin/compress/zopen.3
new file mode 100644
index 000000000000..853462f5057a
--- /dev/null
+++ b/usr.bin/compress/zopen.3