diff options
Diffstat (limited to 'src/xz/list.c')
| -rw-r--r-- | src/xz/list.c | 742 |
1 files changed, 742 insertions, 0 deletions
diff --git a/src/xz/list.c b/src/xz/list.c new file mode 100644 index 000000000000..91707b918b47 --- /dev/null +++ b/src/xz/list.c @@ -0,0 +1,742 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "tuklib_integer.h" + + +/// Totals that are displayed if there was more than one file. +/// The "files" counter is also used in print_info_adv() to show +/// the file number. +static struct { + uint64_t files; + uint64_t streams; + uint64_t blocks; + uint64_t compressed_size; + uint64_t uncompressed_size; + uint32_t checks; +} totals = { 0, 0, 0, 0, 0, 0 }; + + +/// \brief Parse the Index(es) from the given .xz file +/// +/// \param idx If decoding is successful, *idx will be set to point +/// to lzma_index containing the decoded information. +/// On error, *idx is not modified. +/// \param pair Input file +/// +/// \return On success, false is returned. On error, true is returned. +/// +// TODO: This function is pretty big. liblzma should have a function that +// takes a callback function to parse the Index(es) from a .xz file to make +// it easy for applications. +static bool +parse_indexes(lzma_index **idx, file_pair *pair) +{ + if (pair->src_st.st_size <= 0) { + message_error(_("%s: File is empty"), pair->src_name); + return true; + } + + if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error(_("%s: Too small to be a valid .xz file"), + pair->src_name); + return true; + } + + io_buf buf; + lzma_stream_flags header_flags; + lzma_stream_flags footer_flags; + lzma_ret ret; + + // lzma_stream for the Index decoder + lzma_stream strm = LZMA_STREAM_INIT; + + // All Indexes decoded so far + lzma_index *combined_index = NULL; + + // The Index currently being decoded + lzma_index *this_index = NULL; + + // Current position in the file. We parse the file backwards so + // initialize it to point to the end of the file. + off_t pos = pair->src_st.st_size; + + // Each loop iteration decodes one Index. + do { + // Check that there is enough data left to contain at least + // the Stream Header and Stream Footer. This check cannot + // fail in the first pass of this loop. + if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + pos -= LZMA_STREAM_HEADER_SIZE; + lzma_vli stream_padding = 0; + + // Locate the Stream Footer. There may be Stream Padding which + // we must skip when reading backwards. + while (true) { + if (pos < LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm( + LZMA_DATA_ERROR)); + goto error; + } + + if (io_pread(pair, &buf, + LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + // Stream Padding is always a multiple of four bytes. + int i = 2; + if (buf.u32[i] != 0) + break; + + // To avoid calling io_pread() for every four bytes + // of Stream Padding, take advantage that we read + // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and + // check them too before calling io_pread() again. + do { + stream_padding += 4; + pos -= 4; + --i; + } while (i >= 0 && buf.u32[i] == 0); + } + + // Decode the Stream Footer. + ret = lzma_stream_footer_decode(&footer_flags, buf.u8); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + // Check that the size of the Index field looks sane. + lzma_vli index_size = footer_flags.backward_size; + if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + // Set pos to the beginning of the Index. + pos -= index_size; + + // See how much memory we can use for decoding this Index. + uint64_t memlimit = hardware_memlimit_get(); + uint64_t memused = 0; + if (combined_index != NULL) { + memused = lzma_index_memused(combined_index); + if (memused > memlimit) + message_bug(); + + memlimit -= memused; + } + + // Decode the Index. + ret = lzma_index_decoder(&strm, &this_index, memlimit); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + do { + // Don't give the decoder more input than the + // Index size. + strm.avail_in = MIN(IO_BUFFER_SIZE, index_size); + if (io_pread(pair, &buf, strm.avail_in, pos)) + goto error; + + pos += strm.avail_in; + index_size -= strm.avail_in; + + strm.next_in = buf.u8; + ret = lzma_code(&strm, LZMA_RUN); + + } while (ret == LZMA_OK); + + // If the decoding seems to be successful, check also that + // the Index decoder consumed as much input as indicated + // by the Backward Size field. + if (ret == LZMA_STREAM_END) + if (index_size != 0 || strm.avail_in != 0) + ret = LZMA_DATA_ERROR; + + if (ret != LZMA_STREAM_END) { + // LZMA_BUFFER_ERROR means that the Index decoder + // would have liked more input than what the Index + // size should be according to Stream Footer. + // The message for LZMA_DATA_ERROR makes more + // sense in that case. + if (ret == LZMA_BUF_ERROR) + ret = LZMA_DATA_ERROR; + + message_error("%s: %s", pair->src_name, + message_strm(ret)); + + // If the error was too low memory usage limit, + // show also how much memory would have been needed. + if (ret == LZMA_MEMLIMIT_ERROR) { + uint64_t needed = lzma_memusage(&strm); + if (UINT64_MAX - needed < memused) + needed = UINT64_MAX; + else + needed += memused; + + message_mem_needed(V_ERROR, needed); + } + + goto error; + } + + // Decode the Stream Header and check that its Stream Flags + // match the Stream Footer. + pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; + if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + pos -= lzma_index_total_size(this_index); + if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + ret = lzma_stream_header_decode(&header_flags, buf.u8); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + ret = lzma_stream_flags_compare(&header_flags, &footer_flags); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + // Store the decoded Stream Flags into this_index. This is + // needed so that we can print which Check is used in each + // Stream. + ret = lzma_index_stream_flags(this_index, &footer_flags); + if (ret != LZMA_OK) + message_bug(); + + // Store also the size of the Stream Padding field. It is + // needed to show the offsets of the Streams correctly. + ret = lzma_index_stream_padding(this_index, stream_padding); + if (ret != LZMA_OK) + message_bug(); + + if (combined_index != NULL) { + // Append the earlier decoded Indexes + // after this_index. + ret = lzma_index_cat( + this_index, combined_index, NULL); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + } + + combined_index = this_index; + this_index = NULL; + + } while (pos > 0); + + lzma_end(&strm); + + // All OK. Make combined_index available to the caller. + *idx = combined_index; + return false; + +error: + // Something went wrong, free the allocated memory. + lzma_end(&strm); + lzma_index_end(combined_index, NULL); + lzma_index_end(this_index, NULL); + return true; +} + + +/// \brief Get the compression ratio +/// +/// This has slightly different format than that is used by in message.c. +static const char * +get_ratio(uint64_t compressed_size, uint64_t uncompressed_size) +{ + if (uncompressed_size == 0) + return "---"; + + const double ratio = (double)(compressed_size) + / (double)(uncompressed_size); + if (ratio > 9.999) + return "---"; + + static char buf[6]; + snprintf(buf, sizeof(buf), "%.3f", ratio); + return buf; +} + + +static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = { + "None", + "CRC32", + "Unknown-2", + "Unknown-3", + "CRC64", + "Unknown-5", + "Unknown-6", + "Unknown-7", + "Unknown-8", + "Unknown-9", + "SHA-256", + "Unknown-11", + "Unknown-12", + "Unknown-13", + "Unknown-14", + "Unknown-15", +}; + + +/// \brief Get a comma-separated list of Check names +/// +/// \param checks Bit mask of Checks to print +/// \param space_after_comma +/// It's better to not use spaces in table-like listings, +/// but in more verbose formats a space after a comma +/// is good for readability. +static const char * +get_check_names(uint32_t checks, bool space_after_comma) +{ + assert(checks != 0); + + static char buf[sizeof(check_names)]; + char *pos = buf; + size_t left = sizeof(buf); + + const char *sep = space_after_comma ? ", " : ","; + bool comma = false; + + for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) { + if (checks & (UINT32_C(1) << i)) { + my_snprintf(&pos, &left, "%s%s", + comma ? sep : "", check_names[i]); + comma = true; + } + } + + return buf; +} + + +/// \brief Read the Check value from the .xz file and print it +/// +/// Since this requires a seek, listing all Check values for all Blocks can +/// be slow. +/// +/// \param pair Input file +/// \param iter Location of the Block whose Check value should +/// be printed. +/// +/// \return False on success, true on I/O error. +static bool +print_check_value(file_pair *pair, const lzma_index_iter *iter) +{ + // Don't read anything from the file if there is no integrity Check. + if (iter->stream.flags->check == LZMA_CHECK_NONE) { + printf("---"); + return false; + } + + // Locate and read the Check field. + const uint32_t size = lzma_check_size(iter->stream.flags->check); + const off_t offset = iter->block.compressed_file_offset + + iter->block.total_size - size; + io_buf buf; + if (io_pread(pair, &buf, size, offset)) + return true; + + // CRC32 and CRC64 are in little endian. Guess that all the future + // 32-bit and 64-bit Check values are little endian too. It shouldn't + // be a too big problem if this guess is wrong. + if (size == 4) { + printf("%08" PRIx32, conv32le(buf.u32[0])); + } else if (size == 8) { + printf("%016" PRIx64, conv64le(buf.u64[0])); + } else { + for (size_t i = 0; i < size; ++i) + printf("%02x", buf.u8[i]); + } + + return false; +} + + +static void +print_info_basic(const lzma_index *idx, file_pair *pair) +{ + static bool headings_displayed = false; + if (!headings_displayed) { + headings_displayed = true; + // TRANSLATORS: These are column titles. From Strms (Streams) + // to Ratio, the columns are right aligned. Check and Filename + // are left aligned. If you need longer words, it's OK to + // use two lines here. Test with xz --list. + puts(_("Strms Blocks Compressed Uncompressed Ratio " + "Check Filename")); + } + + printf("%5s %7s %11s %11s %5s %-7s %s\n", + uint64_to_str(lzma_index_stream_count(idx), 0), + uint64_to_str(lzma_index_block_count(idx), 1), + uint64_to_nicestr(lzma_index_file_size(idx), + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(lzma_index_uncompressed_size(idx), + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx)), + get_check_names(lzma_index_checks(idx), false), + pair->src_name); + + return; +} + + +static void +print_adv_helper(uint64_t stream_count, uint64_t block_count, + uint64_t compressed_size, uint64_t uncompressed_size, + uint32_t checks) +{ + printf(_(" Stream count: %s\n"), + uint64_to_str(stream_count, 0)); + printf(_(" Block count: %s\n"), + uint64_to_str(block_count, 0)); + printf(_(" Compressed size: %s\n"), + uint64_to_nicestr(compressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(_(" Uncompressed size: %s\n"), + uint64_to_nicestr(uncompressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(_(" Ratio: %s\n"), + get_ratio(compressed_size, uncompressed_size)); + printf(_(" Check: %s\n"), + get_check_names(checks, true)); + return; +} + + +static void +print_info_adv(const lzma_index *idx, file_pair *pair) +{ + // Print the overall information. + print_adv_helper(lzma_index_stream_count(idx), + lzma_index_block_count(idx), + lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx), + lzma_index_checks(idx)); + + // TODO: The rest of this function needs some work. Currently + // the offsets are not printed, which could be useful even when + // printed in a less accurate format. On the other hand, maybe + // this should print the information with exact byte values, + // or maybe there should be at least an option to do that. + // + // We could also display some other info. E.g. it could be useful + // to quickly see how big is the biggest Block (uncompressed size) + // and if all Blocks have Compressed Size and Uncompressed Size + // fields present, which can be used e.g. for multithreaded + // decompression. + + // Avoid printing Stream and Block lists when they wouldn't be useful. + bool show_blocks = false; + if (lzma_index_stream_count(idx) > 1) { + puts(_(" Streams:")); + puts(_(" Number Blocks Compressed " + "Uncompressed Ratio Check")); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) { + if (iter.stream.block_count > 1) + show_blocks = true; + + printf(" %8s %10s %11s %11s %5s %s\n", + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.stream.block_count, 1), + uint64_to_nicestr( + iter.stream.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr( + iter.stream.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + check_names[iter.stream.flags->check]); + } + } + + if (show_blocks || lzma_index_block_count(idx) + > lzma_index_stream_count(idx) + || message_verbosity_get() >= V_DEBUG) { + puts(_(" Blocks:")); + // FIXME: Number in Stream/file, which one is better? + puts(_(" Stream Number Compressed " + "Uncompressed Ratio Check")); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + printf(" %8s %10s %11s %11s %5s %-7s", + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.block.number_in_stream, 1), + uint64_to_nicestr(iter.block.total_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr( + iter.block.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); + + if (message_verbosity_get() >= V_DEBUG) + if (print_check_value(pair, &iter)) + return; + + putchar('\n'); + } + } +} + + +static void +print_info_robot(const lzma_index *idx, file_pair *pair) +{ + printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%s\n", + lzma_index_stream_count(idx), + lzma_index_block_count(idx), + lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx), + get_ratio(lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx)), + get_check_names(lzma_index_checks(idx), false), + pair->src_name); + + if (message_verbosity_get() >= V_VERBOSE) { + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) + printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%" PRIu64 "\t%s\n", + iter.stream.number, + iter.stream.compressed_offset, + iter.stream.uncompressed_offset, + iter.stream.compressed_size, + iter.stream.uncompressed_size, + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + iter.stream.padding, + check_names[iter.stream.flags->check]); + + lzma_index_iter_rewind(&iter); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s", + iter.stream.number, + iter.block.number_in_stream, + iter.block.number_in_file, + iter.block.compressed_file_offset, + iter.block.uncompressed_file_offset, + iter.block.total_size, + iter.block.uncompressed_size, + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); + + if (message_verbosity_get() >= V_DEBUG) { + putchar('\t'); + if (print_check_value(pair, &iter)) + return; + } + + putchar('\n'); + } + } + + return; +} + + +static void +update_totals(const lzma_index *idx) +{ + // TODO: Integer overflow checks + ++totals.files; + totals.streams += lzma_index_stream_count(idx); + totals.blocks += lzma_index_block_count(idx); + totals.compressed_size += lzma_index_file_size(idx); + totals.uncompressed_size += lzma_index_uncompressed_size(idx); + totals.checks |= lzma_index_checks(idx); + return; +} + + +static void +print_totals_basic(void) +{ + // Print a separator line. + char line[80]; + memset(line, '-', sizeof(line)); + line[sizeof(line) - 1] = '\0'; + puts(line); + + // Print the totals except the file count, which needs + // special handling. + printf("%5s %7s %11s %11s %5s %-7s ", + uint64_to_str(totals.streams, 0), + uint64_to_str(totals.blocks, 1), + uint64_to_nicestr(totals.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(totals.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(totals.compressed_size, + totals.uncompressed_size), + get_check_names(totals.checks, false)); + + // Since we print totals only when there are at least two files, + // the English message will always use "%s files". But some other + // languages need different forms for different plurals so we + // have to translate this string still. + // + // TRANSLATORS: This simply indicates the number of files shown + // by --list even though the format string uses %s. + printf(N_("%s file", "%s files\n", + totals.files <= ULONG_MAX ? totals.files + : (totals.files % 1000000) + 1000000), + uint64_to_str(totals.files, 0)); + + return; +} + + +static void +print_totals_adv(void) +{ + putchar('\n'); + puts(_("Totals:")); + printf(_(" Number of files: %s\n"), + uint64_to_str(totals.files, 0)); + print_adv_helper(totals.streams, totals.blocks, + totals.compressed_size, totals.uncompressed_size, + totals.checks); + + return; +} + + +static void +print_totals_robot(void) +{ + printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\n", + totals.streams, + totals.blocks, + totals.compressed_size, + totals.uncompressed_size, + get_ratio(totals.compressed_size, + totals.uncompressed_size), + get_check_names(totals.checks, false), + totals.files); + + return; +} + + +extern void +list_totals(void) +{ + if (opt_robot) { + // Always print totals in --robot mode. It can be convenient + // in some cases and doesn't complicate usage of the + // single-file case much. + print_totals_robot(); + + } else if (totals.files > 1) { + // For non-robot mode, totals are printed only if there + // is more than one file. + if (message_verbosity_get() <= V_WARNING) + print_totals_basic(); + else + print_totals_adv(); + } + + return; +} + + +extern void +list_file(const char *filename) +{ + if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO) + message_fatal(_("--list works only on .xz files " + "(--format=xz or --format=auto)")); + + message_filename(filename); + + if (filename == stdin_filename) { + message_error(_("--list does not support reading from " + "standard input")); + return; + } + + // Unset opt_stdout so that io_open_src() won't accept special files. + // Set opt_force so that io_open_src() will follow symlinks. + opt_stdout = false; + opt_force = true; + file_pair *pair = io_open_src(filename); + if (pair == NULL) + return; + + lzma_index *idx; + if (!parse_indexes(&idx, pair)) { + // Update the totals that are displayed after all + // the individual files have been listed. + update_totals(idx); + + // We have three main modes: + // - --robot, which has submodes if --verbose is specified + // once or twice + // - Normal --list without --verbose + // - --list with one or two --verbose + if (opt_robot) + print_info_robot(idx, pair); + else if (message_verbosity_get() <= V_WARNING) + print_info_basic(idx, pair); + else + print_info_adv(idx, pair); + + lzma_index_end(idx, NULL); + } + + io_close(pair, false); + return; +} |
