1 files changed, 474 insertions, 0 deletions
diff --git a/usr.bin/gzip/unxz.c b/usr.bin/gzip/unxz.c
new file mode 100644
index 000000000000..de6683b246b8
--- /dev/null
+++ b/usr.bin/gzip/unxz.c
@@ -0,0 +1,474 @@
+/*	$NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $	*/
+
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <lzma.h>
+
+static off_t
+unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
+{
+	lzma_stream strm = LZMA_STREAM_INIT;
+	static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
+	lzma_ret ret;
+	lzma_action action = LZMA_RUN;
+	off_t bytes_out, bp;
+	uint8_t ibuf[BUFSIZ];
+	uint8_t obuf[BUFSIZ];
+
+	if (bytes_in == NULL)
+		bytes_in = &bp;
+
+	strm.next_in = ibuf;
+	memcpy(ibuf, pre, prelen);
+	strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
+	if (strm.avail_in == (size_t)-1)
+		maybe_err("read failed");
+	infile_newdata(strm.avail_in);
+	strm.avail_in += prelen;
+	*bytes_in = strm.avail_in;
+
+	if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
+		maybe_errx("Can't initialize decoder (%d)", ret);
+
+	strm.next_out = NULL;
+	strm.avail_out = 0;
+	if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
+		maybe_errx("Can't read headers (%d)", ret);
+
+	bytes_out = 0;
+	strm.next_out = obuf;
+	strm.avail_out = sizeof(obuf);
+
+	for (;;) {
+		check_siginfo();
+		if (strm.avail_in == 0) {
+			strm.next_in = ibuf;
+			strm.avail_in = read(i, ibuf, sizeof(ibuf));
+			switch (strm.avail_in) {
+			case (size_t)-1:
+				maybe_err("read failed");
+				/*NOTREACHED*/
+			case 0:
+				action = LZMA_FINISH;
+				break;
+			default:
+				infile_newdata(strm.avail_in);
+				*bytes_in += strm.avail_in;
+				break;
+			}
+		}
+
+		ret = lzma_code(&strm, action);
+
+		// Write and check write error before checking decoder error.
+		// This way as much data as possible gets written to output
+		// even if decoder detected an error.
+		if (strm.avail_out == 0 || ret != LZMA_OK) {
+			const size_t write_size = sizeof(obuf) - strm.avail_out;
+
+			if (write(o, obuf, write_size) != (ssize_t)write_size)
+				maybe_err("write failed");
+
+			strm.next_out = obuf;
+			strm.avail_out = sizeof(obuf);
+			bytes_out += write_size;
+		}
+
+		if (ret != LZMA_OK) {
+			if (ret == LZMA_STREAM_END) {
+				// Check that there's no trailing garbage.
+				if (strm.avail_in != 0 || read(i, ibuf, 1))
+					ret = LZMA_DATA_ERROR;
+				else {
+					lzma_end(&strm);
+					return bytes_out;
+				}
+			}
+
+			const char *msg;
+			switch (ret) {
+			case LZMA_MEM_ERROR:
+				msg = strerror(ENOMEM);
+				break;
+
+			case LZMA_FORMAT_ERROR:
+				msg = "File format not recognized";
+				break;
+
+			case LZMA_OPTIONS_ERROR:
+				// FIXME: Better message?
+				msg = "Unsupported compression options";
+				break;
+
+			case LZMA_DATA_ERROR:
+				msg = "File is corrupt";
+				break;
+
+			case LZMA_BUF_ERROR:
+				msg = "Unexpected end of input";
+				break;
+
+			case LZMA_MEMLIMIT_ERROR:
+				msg = "Reached memory limit";
+				break;
+
+			default:
+				maybe_errx("Unknown error (%d)", ret);
+				break;
+			}
+			maybe_errx("%s", msg);
+
+		}
+	}
+}
+
+#include <stdbool.h>
+
+/*
+ * Copied various bits and pieces from xz support code or brute force
+ * replacements.
+ */
+
+#define	my_min(A,B)	((A)<(B)?(A):(B))
+
+// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
+// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
+#if BUFSIZ <= 1024
+#       define IO_BUFFER_SIZE 8192
+#else
+#       define IO_BUFFER_SIZE (BUFSIZ & ~7U)
+#endif
+
+/// is_sparse() accesses the buffer as uint64_t for maximum speed.
+/// Use an union to make sure that the buffer is properly aligned.
+typedef union {
+        uint8_t u8[IO_BUFFER_SIZE];
+        uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
+        uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
+} io_buf;
+
+
+static bool
+io_pread(int fd, io_buf *buf, size_t size, off_t pos)
+{
+	// Using lseek() and read() is more portable than pread() and
+	// for us it is as good as real pread().
+	if (lseek(fd, pos, SEEK_SET) != pos) {
+		return true;
+	}
+
+	const size_t amount = read(fd, buf, size);
+	if (amount == SIZE_MAX)
+		return true;
+
+	if (amount != size) {
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Most of the following is copied (mostly verbatim) from the xz
+ * distribution, from file src/xz/list.c
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       list.c
+/// \brief      Listing information about .xz files
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+/// Information about a .xz file
+typedef struct {
+	/// Combined Index of all Streams in the file
+	lzma_index *idx;
+
+	/// Total amount of Stream Padding
+	uint64_t stream_padding;
+
+	/// Highest memory usage so far
+	uint64_t memusage_max;
+
+	/// True if all Blocks so far have Compressed Size and
+	/// Uncompressed Size fields
+	bool all_have_sizes;
+
+	/// Oldest XZ Utils version that will decompress the file
+	uint32_t min_version;
+
+} xz_file_info;
+
+#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
+
+
+/// \brief      Parse the Index(es) from the given .xz file
+///
+/// \param      xfi     Pointer to structure where the decoded information
+///                     is stored.
+/// \param      pair    Input file
+///
+/// \return     On success, false is returned. On error, true is returned.
+///
+// TODO: This function is pretty big. liblzma should have a function that
+// takes a callback function to parse the Index(es) from a .xz file to make
+// it easy for applications.
+static bool
+parse_indexes(xz_file_info *xfi, int src_fd)
+{
+	struct stat st;
+
+	if (fstat(src_fd, &st) != 0) {
+		return true;
+	}
+
+	if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
+		return true;
+	}
+
+	io_buf buf;
+	lzma_stream_flags header_flags;
+	lzma_stream_flags footer_flags;
+	lzma_ret ret;
+
+	// lzma_stream for the Index decoder
+	lzma_stream strm = LZMA_STREAM_INIT;
+
+	// All Indexes decoded so far
+	lzma_index *combined_index = NULL;
+
+	// The Index currently being decoded
+	lzma_index *this_index = NULL;
+
+	// Current position in the file. We parse the file backwards so
+	// initialize it to point to the end of the file.
+	off_t pos = st.st_size;
+
+	// Each loop iteration decodes one Index.
+	do {
+		// Check that there is enough data left to contain at least
+		// the Stream Header and Stream Footer. This check cannot
+		// fail in the first pass of this loop.
+		if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
+			goto error;
+		}
+
+		pos -= LZMA_STREAM_HEADER_SIZE;
+		lzma_vli stream_padding = 0;
+
+		// Locate the Stream Footer. There may be Stream Padding which
+		// we must skip when reading backwards.
+		while (true) {
+			if (pos < LZMA_STREAM_HEADER_SIZE) {
+				goto error;
+			}
+
+			if (io_pread(src_fd, &buf,
+					LZMA_STREAM_HEADER_SIZE, pos))
+				goto error;
+
+			// Stream Padding is always a multiple of four bytes.
+			int i = 2;
+			if (buf.u32[i] != 0)
+				break;
+
+			// To avoid calling io_pread() for every four bytes
+			// of Stream Padding, take advantage that we read
+			// 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
+			// check them too before calling io_pread() again.
+			do {
+				stream_padding += 4;
+				pos -= 4;
+				--i;
+			} while (i >= 0 && buf.u32[i] == 0);
+		}
+
+		// Decode the Stream Footer.
+		ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
+		if (ret != LZMA_OK) {
+			goto error;
+		}
+
+		// Check that the Stream Footer doesn't specify something
+		// that we don't support. This can only happen if the xz
+		// version is older than liblzma and liblzma supports
+		// something new.
+		//
+		// It is enough to check Stream Footer. Stream Header must
+		// match when it is compared against Stream Footer with
+		// lzma_stream_flags_compare().
+		if (footer_flags.version != 0) {
+			goto error;
+		}
+
+		// Check that the size of the Index field looks sane.
+		lzma_vli index_size = footer_flags.backward_size;
+		if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
+			goto error;
+		}
+
+		// Set pos to the beginning of the Index.
+		pos -= index_size;
+
+		// Decode the Index.
+		ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
+		if (ret != LZMA_OK) {
+			goto error;
+		}
+
+		do {
+			// Don't give the decoder more input than the
+			// Index size.
+			strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
+			if (io_pread(src_fd, &buf, strm.avail_in, pos))
+				goto error;
+
+			pos += strm.avail_in;
+			index_size -= strm.avail_in;
+
+			strm.next_in = buf.u8;
+			ret = lzma_code(&strm, LZMA_RUN);
+
+		} while (ret == LZMA_OK);
+
+		// If the decoding seems to be successful, check also that
+		// the Index decoder consumed as much input as indicated
+		// by the Backward Size field.
+		if (ret == LZMA_STREAM_END)
+			if (index_size != 0 || strm.avail_in != 0)
+				ret = LZMA_DATA_ERROR;
+
+		if (ret != LZMA_STREAM_END) {
+			// LZMA_BUFFER_ERROR means that the Index decoder
+			// would have liked more input than what the Index
+			// size should be according to Stream Footer.
+			// The message for LZMA_DATA_ERROR makes more
+			// sense in that case.
+			if (ret == LZMA_BUF_ERROR)
+				ret = LZMA_DATA_ERROR;
+
+			goto error;
+		}
+
+		// Decode the Stream Header and check that its Stream Flags
+		// match the Stream Footer.
+		pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
+		if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
+			goto error;
+		}
+
+		pos -= lzma_index_total_size(this_index);
+		if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
+			goto error;
+
+		ret = lzma_stream_header_decode(&header_flags, buf.u8);
+		if (ret != LZMA_OK) {
+			goto error;
+		}
+
+		ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
+		if (ret != LZMA_OK) {
+			goto error;
+		}
+
+		// Store the decoded Stream Flags into this_index. This is
+		// needed so that we can print which Check is used in each
+		// Stream.
+		ret = lzma_index_stream_flags(this_index, &footer_flags);
+		if (ret != LZMA_OK)
+			goto error;
+
+		// Store also the size of the Stream Padding field. It is
+		// needed to show the offsets of the Streams correctly.
+		ret = lzma_index_stream_padding(this_index, stream_padding);
+		if (ret != LZMA_OK)
+			goto error;
+
+		if (combined_index != NULL) {
+			// Append the earlier decoded Indexes
+			// after this_index.
+			ret = lzma_index_cat(
+					this_index, combined_index, NULL);
+			if (ret != LZMA_OK) {
+				goto error;
+			}
+		}
+
+		combined_index = this_index;
+		this_index = NULL;
+
+		xfi->stream_padding += stream_padding;
+
+	} while (pos > 0);
+
+	lzma_end(&strm);
+
+	// All OK. Make combined_index available to the caller.
+	xfi->idx = combined_index;
+	return false;
+
+error:
+	// Something went wrong, free the allocated memory.
+	lzma_end(&strm);
+	lzma_index_end(combined_index, NULL);
+	lzma_index_end(this_index, NULL);
+	return true;
+}
+
+/***************** end of copy form list.c *************************/
+
+/*
+ * Small wrapper to extract total length of a file
+ */
+off_t
+unxz_len(int fd)
+{
+	xz_file_info xfi = XZ_FILE_INFO_INIT;
+	if (!parse_indexes(&xfi, fd)) {
+		off_t res = lzma_index_uncompressed_size(xfi.idx);
+		lzma_index_end(xfi.idx, NULL);
+		return res;
+	}
+	return 0;
+}
+