diff options
Diffstat (limited to 'usr.bin/gzip/unxz.c')
-rw-r--r-- | usr.bin/gzip/unxz.c | 474 |
1 files changed, 474 insertions, 0 deletions
diff --git a/usr.bin/gzip/unxz.c b/usr.bin/gzip/unxz.c new file mode 100644 index 000000000000..de6683b246b8 --- /dev/null +++ b/usr.bin/gzip/unxz.c @@ -0,0 +1,474 @@ +/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christos Zoulas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include <sys/cdefs.h> +#include <stdarg.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <lzma.h> + +static off_t +unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) +{ + lzma_stream strm = LZMA_STREAM_INIT; + static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED; + lzma_ret ret; + lzma_action action = LZMA_RUN; + off_t bytes_out, bp; + uint8_t ibuf[BUFSIZ]; + uint8_t obuf[BUFSIZ]; + + if (bytes_in == NULL) + bytes_in = &bp; + + strm.next_in = ibuf; + memcpy(ibuf, pre, prelen); + strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); + if (strm.avail_in == (size_t)-1) + maybe_err("read failed"); + infile_newdata(strm.avail_in); + strm.avail_in += prelen; + *bytes_in = strm.avail_in; + + if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK) + maybe_errx("Can't initialize decoder (%d)", ret); + + strm.next_out = NULL; + strm.avail_out = 0; + if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK) + maybe_errx("Can't read headers (%d)", ret); + + bytes_out = 0; + strm.next_out = obuf; + strm.avail_out = sizeof(obuf); + + for (;;) { + check_siginfo(); + if (strm.avail_in == 0) { + strm.next_in = ibuf; + strm.avail_in = read(i, ibuf, sizeof(ibuf)); + switch (strm.avail_in) { + case (size_t)-1: + maybe_err("read failed"); + /*NOTREACHED*/ + case 0: + action = LZMA_FINISH; + break; + default: + infile_newdata(strm.avail_in); + *bytes_in += strm.avail_in; + break; + } + } + + ret = lzma_code(&strm, action); + + // Write and check write error before checking decoder error. + // This way as much data as possible gets written to output + // even if decoder detected an error. + if (strm.avail_out == 0 || ret != LZMA_OK) { + const size_t write_size = sizeof(obuf) - strm.avail_out; + + if (write(o, obuf, write_size) != (ssize_t)write_size) + maybe_err("write failed"); + + strm.next_out = obuf; + strm.avail_out = sizeof(obuf); + bytes_out += write_size; + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) { + // Check that there's no trailing garbage. + if (strm.avail_in != 0 || read(i, ibuf, 1)) + ret = LZMA_DATA_ERROR; + else { + lzma_end(&strm); + return bytes_out; + } + } + + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = strerror(ENOMEM); + break; + + case LZMA_FORMAT_ERROR: + msg = "File format not recognized"; + break; + + case LZMA_OPTIONS_ERROR: + // FIXME: Better message? + msg = "Unsupported compression options"; + break; + + case LZMA_DATA_ERROR: + msg = "File is corrupt"; + break; + + case LZMA_BUF_ERROR: + msg = "Unexpected end of input"; + break; + + case LZMA_MEMLIMIT_ERROR: + msg = "Reached memory limit"; + break; + + default: + maybe_errx("Unknown error (%d)", ret); + break; + } + maybe_errx("%s", msg); + + } + } +} + +#include <stdbool.h> + +/* + * Copied various bits and pieces from xz support code or brute force + * replacements. + */ + +#define my_min(A,B) ((A)<(B)?(A):(B)) + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. +// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) +#if BUFSIZ <= 1024 +# define IO_BUFFER_SIZE 8192 +#else +# define IO_BUFFER_SIZE (BUFSIZ & ~7U) +#endif + +/// is_sparse() accesses the buffer as uint64_t for maximum speed. +/// Use an union to make sure that the buffer is properly aligned. +typedef union { + uint8_t u8[IO_BUFFER_SIZE]; + uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; + uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; +} io_buf; + + +static bool +io_pread(int fd, io_buf *buf, size_t size, off_t pos) +{ + // Using lseek() and read() is more portable than pread() and + // for us it is as good as real pread(). + if (lseek(fd, pos, SEEK_SET) != pos) { + return true; + } + + const size_t amount = read(fd, buf, size); + if (amount == SIZE_MAX) + return true; + + if (amount != size) { + return true; + } + + return false; +} + +/* + * Most of the following is copied (mostly verbatim) from the xz + * distribution, from file src/xz/list.c + */ + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + + +/// Information about a .xz file +typedef struct { + /// Combined Index of all Streams in the file + lzma_index *idx; + + /// Total amount of Stream Padding + uint64_t stream_padding; + + /// Highest memory usage so far + uint64_t memusage_max; + + /// True if all Blocks so far have Compressed Size and + /// Uncompressed Size fields + bool all_have_sizes; + + /// Oldest XZ Utils version that will decompress the file + uint32_t min_version; + +} xz_file_info; + +#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } + + +/// \brief Parse the Index(es) from the given .xz file +/// +/// \param xfi Pointer to structure where the decoded information +/// is stored. +/// \param pair Input file +/// +/// \return On success, false is returned. On error, true is returned. +/// +// TODO: This function is pretty big. liblzma should have a function that +// takes a callback function to parse the Index(es) from a .xz file to make +// it easy for applications. +static bool +parse_indexes(xz_file_info *xfi, int src_fd) +{ + struct stat st; + + if (fstat(src_fd, &st) != 0) { + return true; + } + + if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { + return true; + } + + io_buf buf; + lzma_stream_flags header_flags; + lzma_stream_flags footer_flags; + lzma_ret ret; + + // lzma_stream for the Index decoder + lzma_stream strm = LZMA_STREAM_INIT; + + // All Indexes decoded so far + lzma_index *combined_index = NULL; + + // The Index currently being decoded + lzma_index *this_index = NULL; + + // Current position in the file. We parse the file backwards so + // initialize it to point to the end of the file. + off_t pos = st.st_size; + + // Each loop iteration decodes one Index. + do { + // Check that there is enough data left to contain at least + // the Stream Header and Stream Footer. This check cannot + // fail in the first pass of this loop. + if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { + goto error; + } + + pos -= LZMA_STREAM_HEADER_SIZE; + lzma_vli stream_padding = 0; + + // Locate the Stream Footer. There may be Stream Padding which + // we must skip when reading backwards. + while (true) { + if (pos < LZMA_STREAM_HEADER_SIZE) { + goto error; + } + + if (io_pread(src_fd, &buf, + LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + // Stream Padding is always a multiple of four bytes. + int i = 2; + if (buf.u32[i] != 0) + break; + + // To avoid calling io_pread() for every four bytes + // of Stream Padding, take advantage that we read + // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and + // check them too before calling io_pread() again. + do { + stream_padding += 4; + pos -= 4; + --i; + } while (i >= 0 && buf.u32[i] == 0); + } + + // Decode the Stream Footer. + ret = lzma_stream_footer_decode(&footer_flags, buf.u8); + if (ret != LZMA_OK) { + goto error; + } + + // Check that the Stream Footer doesn't specify something + // that we don't support. This can only happen if the xz + // version is older than liblzma and liblzma supports + // something new. + // + // It is enough to check Stream Footer. Stream Header must + // match when it is compared against Stream Footer with + // lzma_stream_flags_compare(). + if (footer_flags.version != 0) { + goto error; + } + + // Check that the size of the Index field looks sane. + lzma_vli index_size = footer_flags.backward_size; + if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { + goto error; + } + + // Set pos to the beginning of the Index. + pos -= index_size; + + // Decode the Index. + ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); + if (ret != LZMA_OK) { + goto error; + } + + do { + // Don't give the decoder more input than the + // Index size. + strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); + if (io_pread(src_fd, &buf, strm.avail_in, pos)) + goto error; + + pos += strm.avail_in; + index_size -= strm.avail_in; + + strm.next_in = buf.u8; + ret = lzma_code(&strm, LZMA_RUN); + + } while (ret == LZMA_OK); + + // If the decoding seems to be successful, check also that + // the Index decoder consumed as much input as indicated + // by the Backward Size field. + if (ret == LZMA_STREAM_END) + if (index_size != 0 || strm.avail_in != 0) + ret = LZMA_DATA_ERROR; + + if (ret != LZMA_STREAM_END) { + // LZMA_BUFFER_ERROR means that the Index decoder + // would have liked more input than what the Index + // size should be according to Stream Footer. + // The message for LZMA_DATA_ERROR makes more + // sense in that case. + if (ret == LZMA_BUF_ERROR) + ret = LZMA_DATA_ERROR; + + goto error; + } + + // Decode the Stream Header and check that its Stream Flags + // match the Stream Footer. + pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; + if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { + goto error; + } + + pos -= lzma_index_total_size(this_index); + if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + ret = lzma_stream_header_decode(&header_flags, buf.u8); + if (ret != LZMA_OK) { + goto error; + } + + ret = lzma_stream_flags_compare(&header_flags, &footer_flags); + if (ret != LZMA_OK) { + goto error; + } + + // Store the decoded Stream Flags into this_index. This is + // needed so that we can print which Check is used in each + // Stream. + ret = lzma_index_stream_flags(this_index, &footer_flags); + if (ret != LZMA_OK) + goto error; + + // Store also the size of the Stream Padding field. It is + // needed to show the offsets of the Streams correctly. + ret = lzma_index_stream_padding(this_index, stream_padding); + if (ret != LZMA_OK) + goto error; + + if (combined_index != NULL) { + // Append the earlier decoded Indexes + // after this_index. + ret = lzma_index_cat( + this_index, combined_index, NULL); + if (ret != LZMA_OK) { + goto error; + } + } + + combined_index = this_index; + this_index = NULL; + + xfi->stream_padding += stream_padding; + + } while (pos > 0); + + lzma_end(&strm); + + // All OK. Make combined_index available to the caller. + xfi->idx = combined_index; + return false; + +error: + // Something went wrong, free the allocated memory. + lzma_end(&strm); + lzma_index_end(combined_index, NULL); + lzma_index_end(this_index, NULL); + return true; +} + +/***************** end of copy form list.c *************************/ + +/* + * Small wrapper to extract total length of a file + */ +off_t +unxz_len(int fd) +{ + xz_file_info xfi = XZ_FILE_INFO_INIT; + if (!parse_indexes(&xfi, fd)) { + off_t res = lzma_index_uncompressed_size(xfi.idx); + lzma_index_end(xfi.idx, NULL); + return res; + } + return 0; +} + |