diff options
Diffstat (limited to 'src/xz')
-rw-r--r-- | src/xz/args.c | 110 | ||||
-rw-r--r-- | src/xz/args.h | 2 | ||||
-rw-r--r-- | src/xz/coder.c | 432 | ||||
-rw-r--r-- | src/xz/coder.h | 15 | ||||
-rw-r--r-- | src/xz/file_io.c | 355 | ||||
-rw-r--r-- | src/xz/file_io.h | 21 | ||||
-rw-r--r-- | src/xz/hardware.c | 32 | ||||
-rw-r--r-- | src/xz/hardware.h | 9 | ||||
-rw-r--r-- | src/xz/list.c | 63 | ||||
-rw-r--r-- | src/xz/main.c | 5 | ||||
-rw-r--r-- | src/xz/message.c | 105 | ||||
-rw-r--r-- | src/xz/mytime.c | 89 | ||||
-rw-r--r-- | src/xz/mytime.h | 47 | ||||
-rw-r--r-- | src/xz/options.c | 4 | ||||
-rw-r--r-- | src/xz/private.h | 2 | ||||
-rw-r--r-- | src/xz/signals.c | 5 | ||||
-rw-r--r-- | src/xz/suffix.c | 176 | ||||
-rw-r--r-- | src/xz/xz.1 | 211 |
18 files changed, 1425 insertions, 258 deletions
diff --git a/src/xz/args.c b/src/xz/args.c index 34761d45d67e2..041c80073e6de 100644 --- a/src/xz/args.c +++ b/src/xz/args.c @@ -22,6 +22,7 @@ bool opt_stdout = false; bool opt_force = false; bool opt_keep_original = false; bool opt_robot = false; +bool opt_ignore_check = false; // We don't modify or free() this, but we need to assign it in some // non-const pointers. @@ -55,6 +56,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str, static void +parse_block_list(char *str) +{ + // It must be non-empty and not begin with a comma. + if (str[0] == '\0' || str[0] == ',') + message_fatal(_("%s: Invalid argument to --block-list"), str); + + // Count the number of comma-separated strings. + size_t count = 1; + for (size_t i = 0; str[i] != '\0'; ++i) + if (str[i] == ',') + ++count; + + // Prevent an unlikely integer overflow. + if (count > SIZE_MAX / sizeof(uint64_t) - 1) + message_fatal(_("%s: Too many arguments to --block-list"), + str); + + // Allocate memory to hold all the sizes specified. + // If --block-list was specified already, its value is forgotten. + free(opt_block_list); + opt_block_list = xmalloc((count + 1) * sizeof(uint64_t)); + + for (size_t i = 0; i < count; ++i) { + // Locate the next comma and replace it with \0. + char *p = strchr(str, ','); + if (p != NULL) + *p = '\0'; + + if (str[0] == '\0') { + // There is no string, that is, a comma follows + // another comma. Use the previous value. + // + // NOTE: We checked earler that the first char + // of the whole list cannot be a comma. + assert(i > 0); + opt_block_list[i] = opt_block_list[i - 1]; + } else { + opt_block_list[i] = str_to_uint64("block-list", str, + 0, UINT64_MAX); + + // Zero indicates no more new Blocks. + if (opt_block_list[i] == 0) { + if (i + 1 != count) + message_fatal(_("0 can only be used " + "as the last element " + "in --block-list")); + + opt_block_list[i] = UINT64_MAX; + } + } + + str = p + 1; + } + + // Terminate the array. + opt_block_list[count] = 0; + return; +} + + +static void parse_real(args_info *args, int argc, char **argv) { enum { @@ -68,14 +130,19 @@ parse_real(args_info *args, int argc, char **argv) OPT_LZMA1, OPT_LZMA2, + OPT_SINGLE_STREAM, OPT_NO_SPARSE, OPT_FILES, OPT_FILES0, + OPT_BLOCK_SIZE, + OPT_BLOCK_LIST, OPT_MEM_COMPRESS, OPT_MEM_DECOMPRESS, OPT_NO_ADJUST, OPT_INFO_MEMORY, OPT_ROBOT, + OPT_FLUSH_TIMEOUT, + OPT_IGNORE_CHECK, }; static const char short_opts[] @@ -94,6 +161,7 @@ parse_real(args_info *args, int argc, char **argv) { "force", no_argument, NULL, 'f' }, { "stdout", no_argument, NULL, 'c' }, { "to-stdout", no_argument, NULL, 'c' }, + { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM }, { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, { "suffix", required_argument, NULL, 'S' }, // { "recursive", no_argument, NULL, 'r' }, // TODO @@ -103,12 +171,16 @@ parse_real(args_info *args, int argc, char **argv) // Basic compression settings { "format", required_argument, NULL, 'F' }, { "check", required_argument, NULL, 'C' }, + { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK }, + { "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, + { "block-list", required_argument, NULL, OPT_BLOCK_LIST }, { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, { "memlimit", required_argument, NULL, 'M' }, { "memory", required_argument, NULL, 'M' }, // Old alias { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, { "threads", required_argument, NULL, 'T' }, + { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT }, { "extreme", no_argument, NULL, 'e' }, { "fast", no_argument, NULL, '0' }, @@ -175,8 +247,9 @@ parse_real(args_info *args, int argc, char **argv) break; case 'T': - hardware_threadlimit_set(str_to_uint64( - "threads", optarg, 0, UINT32_MAX)); + // The max is from src/liblzma/common/common.h. + hardware_threads_set(str_to_uint64("threads", + optarg, 0, 16384)); break; // --version @@ -368,6 +441,24 @@ parse_real(args_info *args, int argc, char **argv) break; } + case OPT_IGNORE_CHECK: + opt_ignore_check = true; + break; + + case OPT_BLOCK_SIZE: + opt_block_size = str_to_uint64("block-size", optarg, + 0, LZMA_VLI_MAX); + break; + + case OPT_BLOCK_LIST: { + parse_block_list(optarg); + break; + } + + case OPT_SINGLE_STREAM: + opt_single_stream = true; + break; + case OPT_NO_SPARSE: io_no_sparse(); break; @@ -401,6 +492,11 @@ parse_real(args_info *args, int argc, char **argv) opt_auto_adjust = false; break; + case OPT_FLUSH_TIMEOUT: + opt_flush_timeout = str_to_uint64("flush-timeout", + optarg, 0, UINT64_MAX); + break; + default: message_try_help(); tuklib_exit(E_ERROR, E_ERROR, false); @@ -576,3 +672,13 @@ args_parse(args_info *args, int argc, char **argv) return; } + + +#ifndef NDEBUG +extern void +args_free(void) +{ + free(opt_block_list); + return; +} +#endif diff --git a/src/xz/args.h b/src/xz/args.h index b23f4ef12c923..1defad12e0dd5 100644 --- a/src/xz/args.h +++ b/src/xz/args.h @@ -36,7 +36,9 @@ extern bool opt_force; extern bool opt_keep_original; // extern bool opt_recursive; extern bool opt_robot; +extern bool opt_ignore_check; extern const char stdin_filename[]; extern void args_parse(args_info *args, int argc, char **argv); +extern void args_free(void); diff --git a/src/xz/coder.c b/src/xz/coder.c index 017e04127e952..a94bdb83266fa 100644 --- a/src/xz/coder.c +++ b/src/xz/coder.c @@ -24,6 +24,9 @@ enum coder_init_ret { enum operation_mode opt_mode = MODE_COMPRESS; enum format_type opt_format = FORMAT_AUTO; bool opt_auto_adjust = true; +bool opt_single_stream = false; +uint64_t opt_block_size = 0; +uint64_t *opt_block_list = NULL; /// Stream used to communicate with liblzma @@ -48,6 +51,14 @@ static lzma_check check; /// This becomes false if the --check=CHECK option is used. static bool check_default = true; +#ifdef MYTHREAD_ENABLED +static lzma_mt mt_options = { + .flags = 0, + .timeout = 300, + .filters = filters, +}; +#endif + extern void coder_set_check(lzma_check new_check) @@ -125,6 +136,15 @@ memlimit_too_small(uint64_t memory_usage) extern void coder_set_compression_settings(void) { + // The default check type is CRC64, but fallback to CRC32 + // if CRC64 isn't supported by the copy of liblzma we are + // using. CRC32 is always supported. + if (check_default) { + check = LZMA_CHECK_CRC64; + if (!lzma_check_is_supported(check)) + check = LZMA_CHECK_CRC32; + } + // Options for LZMA1 or LZMA2 in case we are using a preset. static lzma_options_lzma opt_lzma; @@ -175,15 +195,53 @@ coder_set_compression_settings(void) // Print the selected filter chain. message_filters_show(V_DEBUG, filters); - // If using --format=raw, we can be decoding. The memusage function - // also validates the filter chain and the options used for the - // filters. + // The --flush-timeout option requires LZMA_SYNC_FLUSH support + // from the filter chain. Currently threaded encoder doesn't support + // LZMA_SYNC_FLUSH so single-threaded mode must be used. + if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) { + for (size_t i = 0; i < filters_count; ++i) { + switch (filters[i].id) { + case LZMA_FILTER_LZMA2: + case LZMA_FILTER_DELTA: + break; + + default: + message_fatal(_("The filter chain is " + "incompatible with --flush-timeout")); + } + } + + if (hardware_threads_get() > 1) { + message(V_WARNING, _("Switching to single-threaded " + "mode due to --flush-timeout")); + hardware_threads_set(1); + } + } + + // Get the memory usage. Note that if --format=raw was used, + // we can be decompressing. const uint64_t memory_limit = hardware_memlimit_get(opt_mode); uint64_t memory_usage; - if (opt_mode == MODE_COMPRESS) - memory_usage = lzma_raw_encoder_memusage(filters); - else + if (opt_mode == MODE_COMPRESS) { +#ifdef MYTHREAD_ENABLED + if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) { + mt_options.threads = hardware_threads_get(); + mt_options.block_size = opt_block_size; + mt_options.check = check; + memory_usage = lzma_stream_encoder_mt_memusage( + &mt_options); + if (memory_usage != UINT64_MAX) + message(V_DEBUG, _("Using up to %" PRIu32 + " threads."), + mt_options.threads); + } else +#endif + { + memory_usage = lzma_raw_encoder_memusage(filters); + } + } else { memory_usage = lzma_raw_decoder_memusage(filters); + } if (memory_usage == UINT64_MAX) message_fatal(_("Unsupported filter chain or filter options")); @@ -199,90 +257,99 @@ coder_set_compression_settings(void) round_up_to_mib(decmem), 0)); } - if (memory_usage > memory_limit) { - // If --no-adjust was used or we didn't find LZMA1 or - // LZMA2 as the last filter, give an error immediately. - // --format=raw implies --no-adjust. - if (!opt_auto_adjust || opt_format == FORMAT_RAW) - memlimit_too_small(memory_usage); - - assert(opt_mode == MODE_COMPRESS); - - // Look for the last filter if it is LZMA2 or LZMA1, so - // we can make it use less RAM. With other filters we don't - // know what to do. - size_t i = 0; - while (filters[i].id != LZMA_FILTER_LZMA2 - && filters[i].id != LZMA_FILTER_LZMA1) { - if (filters[i].id == LZMA_VLI_UNKNOWN) - memlimit_too_small(memory_usage); - - ++i; - } + if (memory_usage <= memory_limit) + return; - // Decrease the dictionary size until we meet the memory - // usage limit. First round down to full mebibytes. - lzma_options_lzma *opt = filters[i].options; - const uint32_t orig_dict_size = opt->dict_size; - opt->dict_size &= ~((UINT32_C(1) << 20) - 1); - while (true) { - // If it is below 1 MiB, auto-adjusting failed. We - // could be more sophisticated and scale it down even - // more, but let's see if many complain about this - // version. - // - // FIXME: Displays the scaled memory usage instead - // of the original. - if (opt->dict_size < (UINT32_C(1) << 20)) + // If --no-adjust was used or we didn't find LZMA1 or + // LZMA2 as the last filter, give an error immediately. + // --format=raw implies --no-adjust. + if (!opt_auto_adjust || opt_format == FORMAT_RAW) + memlimit_too_small(memory_usage); + + assert(opt_mode == MODE_COMPRESS); + +#ifdef MYTHREAD_ENABLED + if (opt_format == FORMAT_XZ && mt_options.threads > 1) { + // Try to reduce the number of threads before + // adjusting the compression settings down. + do { + // FIXME? The real single-threaded mode has + // lower memory usage, but it's not comparable + // because it doesn't write the size info + // into Block Headers. + if (--mt_options.threads == 0) memlimit_too_small(memory_usage); - memory_usage = lzma_raw_encoder_memusage(filters); + memory_usage = lzma_stream_encoder_mt_memusage( + &mt_options); if (memory_usage == UINT64_MAX) message_bug(); - // Accept it if it is low enough. - if (memory_usage <= memory_limit) - break; + } while (memory_usage > memory_limit); - // Otherwise 1 MiB down and try again. I hope this - // isn't too slow method for cases where the original - // dict_size is very big. - opt->dict_size -= UINT32_C(1) << 20; - } + message(V_WARNING, _("Adjusted the number of threads " + "from %s to %s to not exceed " + "the memory usage limit of %s MiB"), + uint64_to_str(hardware_threads_get(), 0), + uint64_to_str(mt_options.threads, 1), + uint64_to_str(round_up_to_mib( + memory_limit), 2)); + } +#endif + + if (memory_usage <= memory_limit) + return; - // Tell the user that we decreased the dictionary size. - message(V_WARNING, _("Adjusted LZMA%c dictionary size " - "from %s MiB to %s MiB to not exceed " - "the memory usage limit of %s MiB"), - filters[i].id == LZMA_FILTER_LZMA2 - ? '2' : '1', - uint64_to_str(orig_dict_size >> 20, 0), - uint64_to_str(opt->dict_size >> 20, 1), - uint64_to_str(round_up_to_mib( - memory_limit), 2)); + // Look for the last filter if it is LZMA2 or LZMA1, so we can make + // it use less RAM. With other filters we don't know what to do. + size_t i = 0; + while (filters[i].id != LZMA_FILTER_LZMA2 + && filters[i].id != LZMA_FILTER_LZMA1) { + if (filters[i].id == LZMA_VLI_UNKNOWN) + memlimit_too_small(memory_usage); + + ++i; } -/* - // Limit the number of worker threads so that memory usage - // limit isn't exceeded. - assert(memory_usage > 0); - size_t thread_limit = memory_limit / memory_usage; - if (thread_limit == 0) - thread_limit = 1; + // Decrease the dictionary size until we meet the memory + // usage limit. First round down to full mebibytes. + lzma_options_lzma *opt = filters[i].options; + const uint32_t orig_dict_size = opt->dict_size; + opt->dict_size &= ~((UINT32_C(1) << 20) - 1); + while (true) { + // If it is below 1 MiB, auto-adjusting failed. We could be + // more sophisticated and scale it down even more, but let's + // see if many complain about this version. + // + // FIXME: Displays the scaled memory usage instead + // of the original. + if (opt->dict_size < (UINT32_C(1) << 20)) + memlimit_too_small(memory_usage); - if (opt_threads > thread_limit) - opt_threads = thread_limit; -*/ + memory_usage = lzma_raw_encoder_memusage(filters); + if (memory_usage == UINT64_MAX) + message_bug(); - if (check_default) { - // The default check type is CRC64, but fallback to CRC32 - // if CRC64 isn't supported by the copy of liblzma we are - // using. CRC32 is always supported. - check = LZMA_CHECK_CRC64; - if (!lzma_check_is_supported(check)) - check = LZMA_CHECK_CRC32; + // Accept it if it is low enough. + if (memory_usage <= memory_limit) + break; + + // Otherwise 1 MiB down and try again. I hope this + // isn't too slow method for cases where the original + // dict_size is very big. + opt->dict_size -= UINT32_C(1) << 20; } + // Tell the user that we decreased the dictionary size. + message(V_WARNING, _("Adjusted LZMA%c dictionary size " + "from %s MiB to %s MiB to not exceed " + "the memory usage limit of %s MiB"), + filters[i].id == LZMA_FILTER_LZMA2 + ? '2' : '1', + uint64_to_str(orig_dict_size >> 20, 0), + uint64_to_str(opt->dict_size >> 20, 1), + uint64_to_str(round_up_to_mib(memory_limit), 2)); + return; } @@ -364,7 +431,14 @@ coder_init(file_pair *pair) break; case FORMAT_XZ: - ret = lzma_stream_encoder(&strm, filters, check); +#ifdef MYTHREAD_ENABLED + if (hardware_threads_get() > 1) + ret = lzma_stream_encoder_mt( + &strm, &mt_options); + else +#endif + ret = lzma_stream_encoder( + &strm, filters, check); break; case FORMAT_LZMA: @@ -376,8 +450,17 @@ coder_init(file_pair *pair) break; } } else { - const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK - | LZMA_CONCATENATED; + uint32_t flags = 0; + + // It seems silly to warn about unsupported check if the + // check won't be verified anyway due to --ignore-check. + if (opt_ignore_check) + flags |= LZMA_IGNORE_CHECK; + else + flags |= LZMA_TELL_UNSUPPORTED_CHECK; + + if (!opt_single_stream) + flags |= LZMA_CONCATENATED; // We abuse FORMAT_AUTO to indicate unknown file format, // for which we may consider passthru mode. @@ -408,7 +491,7 @@ coder_init(file_pair *pair) switch (init_format) { case FORMAT_AUTO: - // Uknown file format. If --decompress --stdout + // Unknown file format. If --decompress --stdout // --force have been given, then we copy the input // as is to stdout. Checking for MODE_DECOMPRESS // is needed, because we don't want to do use @@ -462,6 +545,56 @@ coder_init(file_pair *pair) } +/// Resolve conflicts between opt_block_size and opt_block_list in single +/// threaded mode. We want to default to opt_block_list, except when it is +/// larger than opt_block_size. If this is the case for the current Block +/// at *list_pos, then we break into smaller Blocks. Otherwise advance +/// to the next Block in opt_block_list, and break apart if needed. +static void +split_block(uint64_t *block_remaining, + uint64_t *next_block_remaining, + size_t *list_pos) +{ + if (*next_block_remaining > 0) { + // The Block at *list_pos has previously been split up. + assert(hardware_threads_get() == 1); + assert(opt_block_size > 0); + assert(opt_block_list != NULL); + + if (*next_block_remaining > opt_block_size) { + // We have to split the current Block at *list_pos + // into another opt_block_size length Block. + *block_remaining = opt_block_size; + } else { + // This is the last remaining split Block for the + // Block at *list_pos. + *block_remaining = *next_block_remaining; + } + + *next_block_remaining -= *block_remaining; + + } else { + // The Block at *list_pos has been finished. Go to the next + // entry in the list. If the end of the list has been reached, + // reuse the size of the last Block. + if (opt_block_list[*list_pos + 1] != 0) + ++*list_pos; + + *block_remaining = opt_block_list[*list_pos]; + + // If in single-threaded mode, split up the Block if needed. + // This is not needed in multi-threaded mode because liblzma + // will do this due to how threaded encoding works. + if (hardware_threads_get() == 1 && opt_block_size > 0 + && *block_remaining > opt_block_size) { + *next_block_remaining + = *block_remaining - opt_block_size; + *block_remaining = opt_block_size; + } + } +} + + /// Compress or decompress using liblzma. static bool coder_normal(file_pair *pair) @@ -469,8 +602,8 @@ coder_normal(file_pair *pair) // Encoder needs to know when we have given all the input to it. // The decoders need to know it too when we are using // LZMA_CONCATENATED. We need to check for src_eof here, because - // the first input chunk has been already read, and that may - // have been the only chunk we will read. + // the first input chunk has been already read if decompressing, + // and that may have been the only chunk we will read. lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; lzma_ret ret; @@ -478,22 +611,77 @@ coder_normal(file_pair *pair) // Assume that something goes wrong. bool success = false; + // block_remaining indicates how many input bytes to encode before + // finishing the current .xz Block. The Block size is set with + // --block-size=SIZE and --block-list. They have an effect only when + // compressing to the .xz format. If block_remaining == UINT64_MAX, + // only a single block is created. + uint64_t block_remaining = UINT64_MAX; + + // next_block_remining for when we are in single-threaded mode and + // the Block in --block-list is larger than the --block-size=SIZE. + uint64_t next_block_remaining = 0; + + // Position in opt_block_list. Unused if --block-list wasn't used. + size_t list_pos = 0; + + // Handle --block-size for single-threaded mode and the first step + // of --block-list. + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) { + // --block-size doesn't do anything here in threaded mode, + // because the threaded encoder will take care of splitting + // to fixed-sized Blocks. + if (hardware_threads_get() == 1 && opt_block_size > 0) + block_remaining = opt_block_size; + + // If --block-list was used, start with the first size. + // + // For threaded case, --block-size specifies how big Blocks + // the encoder needs to be prepared to create at maximum + // and --block-list will simultaneously cause new Blocks + // to be started at specified intervals. To keep things + // logical, the same is done in single-threaded mode. The + // output is still not identical because in single-threaded + // mode the size info isn't written into Block Headers. + if (opt_block_list != NULL) { + if (block_remaining < opt_block_list[list_pos]) { + assert(hardware_threads_get() == 1); + next_block_remaining = opt_block_list[list_pos] + - block_remaining; + } else { + block_remaining = opt_block_list[list_pos]; + } + } + } + strm.next_out = out_buf.u8; strm.avail_out = IO_BUFFER_SIZE; while (!user_abort) { - // Fill the input buffer if it is empty and we haven't reached - // end of file yet. - if (strm.avail_in == 0 && !pair->src_eof) { + // Fill the input buffer if it is empty and we aren't + // flushing or finishing. + if (strm.avail_in == 0 && action == LZMA_RUN) { strm.next_in = in_buf.u8; - strm.avail_in = io_read( - pair, &in_buf, IO_BUFFER_SIZE); + strm.avail_in = io_read(pair, &in_buf, + my_min(block_remaining, + IO_BUFFER_SIZE)); if (strm.avail_in == SIZE_MAX) break; - if (pair->src_eof) + if (pair->src_eof) { action = LZMA_FINISH; + + } else if (block_remaining != UINT64_MAX) { + // Start a new Block after every + // opt_block_size bytes of input. + block_remaining -= strm.avail_in; + if (block_remaining == 0) + action = LZMA_FULL_BARRIER; + } + + if (action == LZMA_RUN && flush_needed) + action = LZMA_SYNC_FLUSH; } // Let liblzma do the actual work. @@ -509,7 +697,39 @@ coder_normal(file_pair *pair) strm.avail_out = IO_BUFFER_SIZE; } - if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH + || action == LZMA_FULL_BARRIER)) { + if (action == LZMA_SYNC_FLUSH) { + // Flushing completed. Write the pending data + // out immediatelly so that the reading side + // can decompress everything compressed so far. + if (io_write(pair, &out_buf, IO_BUFFER_SIZE + - strm.avail_out)) + break; + + strm.next_out = out_buf.u8; + strm.avail_out = IO_BUFFER_SIZE; + + // Set the time of the most recent flushing. + mytime_set_flush_time(); + } else { + // Start a new Block after LZMA_FULL_BARRIER. + if (opt_block_list == NULL) { + assert(hardware_threads_get() == 1); + assert(opt_block_size > 0); + block_remaining = opt_block_size; + } else { + split_block(&block_remaining, + &next_block_remaining, + &list_pos); + } + } + + // Start a new Block after LZMA_FULL_FLUSH or continue + // the same block after LZMA_SYNC_FLUSH. + action = LZMA_RUN; + + } else if (ret != LZMA_OK) { // Determine if the return value indicates that we // won't continue coding. const bool stop = ret != LZMA_NO_CHECK @@ -528,6 +748,12 @@ coder_normal(file_pair *pair) } if (ret == LZMA_STREAM_END) { + if (opt_single_stream) { + io_fix_src_pos(pair, strm.avail_in); + success = true; + break; + } + // Check that there is no trailing garbage. // This is needed for LZMA_Alone and raw // streams. @@ -630,10 +856,15 @@ coder_run(const char *filename) // Assume that something goes wrong. bool success = false; - // Read the first chunk of input data. This is needed to detect - // the input file type (for now, only for decompression). - strm.next_in = in_buf.u8; - strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + if (opt_mode == MODE_COMPRESS) { + strm.next_in = NULL; + strm.avail_in = 0; + } else { + // Read the first chunk of input data. This is needed + // to detect the input file type. + strm.next_in = in_buf.u8; + strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + } if (strm.avail_in != SIZE_MAX) { // Initialize the coder. This will detect the file format @@ -648,6 +879,11 @@ coder_run(const char *filename) // Don't open the destination file when --test // is used. if (opt_mode == MODE_TEST || !io_open_dest(pair)) { + // Remember the current time. It is needed + // for progress indicator and for timed + // flushing. + mytime_set_start_time(); + // Initialize the progress indicator. const uint64_t in_size = pair->src_st.st_size <= 0 @@ -671,3 +907,13 @@ coder_run(const char *filename) return; } + + +#ifndef NDEBUG +extern void +coder_free(void) +{ + lzma_end(&strm); + return; +} +#endif diff --git a/src/xz/coder.h b/src/xz/coder.h index 2d3add9727451..583da8f68d50a 100644 --- a/src/xz/coder.h +++ b/src/xz/coder.h @@ -41,6 +41,16 @@ extern enum format_type opt_format; /// they exceed the memory usage limit. extern bool opt_auto_adjust; +/// If true, stop after decoding the first stream. +extern bool opt_single_stream; + +/// If non-zero, start a new .xz Block after every opt_block_size bytes +/// of input. This has an effect only when compressing to the .xz format. +extern uint64_t opt_block_size; + +/// This is non-NULL if --block-list was used. This contains the Block sizes +/// as an array that is terminated with 0. +extern uint64_t *opt_block_list; /// Set the integrity check type used when compressing extern void coder_set_check(lzma_check check); @@ -59,3 +69,8 @@ extern void coder_set_compression_settings(void); /// Compress or decompress the given file extern void coder_run(const char *filename); + +#ifndef NDEBUG +/// Free the memory allocated for the coder and kill the worker threads. +extern void coder_free(void); +#endif diff --git a/src/xz/file_io.c b/src/xz/file_io.c index 871a099b61939..f135cf7cb6bd8 100644 --- a/src/xz/file_io.c +++ b/src/xz/file_io.c @@ -17,6 +17,7 @@ #ifdef TUKLIB_DOSLIKE # include <io.h> #else +# include <poll.h> static bool warn_fchown; #endif @@ -37,14 +38,30 @@ static bool warn_fchown; #endif +typedef enum { + IO_WAIT_MORE, // Reading or writing is possible. + IO_WAIT_ERROR, // Error or user_abort + IO_WAIT_TIMEOUT, // poll() timed out +} io_wait_ret; + + /// If true, try to create sparse files when decompressing. static bool try_sparse = true; #ifndef TUKLIB_DOSLIKE +/// File status flags of standard input. This is used by io_open_src() +/// and io_close_src(). +static int stdin_flags; +static bool restore_stdin_flags = false; + /// Original file status flags of standard output. This is used by /// io_open_dest() and io_close_dest() to save and restore the flags. static int stdout_flags; static bool restore_stdout_flags = false; + +/// Self-pipe used together with the user_abort variable to avoid +/// race conditions with signal handling. +static int user_abort_pipe[2]; #endif @@ -64,19 +81,43 @@ io_init(void) // If fchown() fails setting the owner, we warn about it only if // we are root. warn_fchown = geteuid() == 0; + + if (pipe(user_abort_pipe) + || fcntl(user_abort_pipe[0], F_SETFL, O_NONBLOCK) + == -1 + || fcntl(user_abort_pipe[1], F_SETFL, O_NONBLOCK) + == -1) + message_fatal(_("Error creating a pipe: %s"), + strerror(errno)); #endif #ifdef __DJGPP__ // Avoid doing useless things when statting files. // This isn't important but doesn't hurt. - _djstat_flags = _STAT_INODE | _STAT_EXEC_EXT - | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; + _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; #endif return; } +#ifndef TUKLIB_DOSLIKE +extern void +io_write_to_user_abort_pipe(void) +{ + // If the write() fails, it's probably due to the pipe being full. + // Failing in that case is fine. If the reason is something else, + // there's not much we can do since this is called in a signal + // handler. So ignore the errors and try to avoid warnings with + // GCC and glibc when _FORTIFY_SOURCE=2 is used. + uint8_t b = '\0'; + const int ret = write(user_abort_pipe[1], &b, 1); + (void)ret; + return; +} +#endif + + extern void io_no_sparse(void) { @@ -85,6 +126,63 @@ io_no_sparse(void) } +#ifndef TUKLIB_DOSLIKE +/// \brief Waits for input or output to become available or for a signal +/// +/// This uses the self-pipe trick to avoid a race condition that can occur +/// if a signal is caught after user_abort has been checked but before e.g. +/// read() has been called. In that situation read() could block unless +/// non-blocking I/O is used. With non-blocking I/O something like select() +/// or poll() is needed to avoid a busy-wait loop, and the same race condition +/// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in +/// POSIX) but neither is portable enough in 2013. The self-pipe trick is +/// old and very portable. +static io_wait_ret +io_wait(file_pair *pair, int timeout, bool is_reading) +{ + struct pollfd pfd[2]; + + if (is_reading) { + pfd[0].fd = pair->src_fd; + pfd[0].events = POLLIN; + } else { + pfd[0].fd = pair->dest_fd; + pfd[0].events = POLLOUT; + } + + pfd[1].fd = user_abort_pipe[0]; + pfd[1].events = POLLIN; + + while (true) { + const int ret = poll(pfd, 2, timeout); + + if (user_abort) + return IO_WAIT_ERROR; + + if (ret == -1) { + if (errno == EINTR || errno == EAGAIN) + continue; + + message_error(_("%s: poll() failed: %s"), + is_reading ? pair->src_name + : pair->dest_name, + strerror(errno)); + return IO_WAIT_ERROR; + } + + if (ret == 0) { + assert(opt_flush_timeout != 0); + flush_needed = true; + return IO_WAIT_TIMEOUT; + } + + if (pfd[0].revents != 0) + return IO_WAIT_MORE; + } +} +#endif + + /// \brief Unlink a file /// /// This tries to verify that the file being unlinked really is the file that @@ -294,6 +392,31 @@ io_open_src_real(file_pair *pair) pair->src_fd = STDIN_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDIN_FILENO, O_BINARY); +#else + // Enable O_NONBLOCK for stdin. + stdin_flags = fcntl(STDIN_FILENO, F_GETFL); + if (stdin_flags == -1) { + message_error(_("Error getting the file status flags " + "from standard input: %s"), + strerror(errno)); + return true; + } + + if ((stdin_flags & O_NONBLOCK) == 0) { + if (fcntl(STDIN_FILENO, F_SETFL, + stdin_flags | O_NONBLOCK) == -1) { + message_error(_("Error setting O_NONBLOCK " + "on standard input: %s"), + strerror(errno)); + return true; + } + + restore_stdin_flags = true; + } +#endif +#ifdef HAVE_POSIX_FADVISE + // It will fail if stdin is a pipe and that's fine. + (void)posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL); #endif return false; } @@ -311,13 +434,12 @@ io_open_src_real(file_pair *pair) int flags = O_RDONLY | O_BINARY | O_NOCTTY; #ifndef TUKLIB_DOSLIKE - // If we accept only regular files, we need to be careful to avoid - // problems with special files like devices and FIFOs. O_NONBLOCK - // prevents blocking when opening such files. When we want to accept - // special files, we must not use O_NONBLOCK, or otherwise we won't - // block waiting e.g. FIFOs to become readable. - if (reg_files_only) - flags |= O_NONBLOCK; + // Use non-blocking I/O: + // - It prevents blocking when opening FIFOs and some other + // special files, which is good if we want to accept only + // regular files. + // - It can help avoiding some race conditions with signal handling. + flags |= O_NONBLOCK; #endif #if defined(O_NOFOLLOW) @@ -345,30 +467,13 @@ io_open_src_real(file_pair *pair) (void)follow_symlinks; #endif - // Try to open the file. If we are accepting non-regular files, - // unblock the caught signals so that open() can be interrupted - // if it blocks e.g. due to a FIFO file. - if (!reg_files_only) - signals_unblock(); - - // Maybe this wouldn't need a loop, since all the signal handlers for - // which we don't use SA_RESTART set user_abort to true. But it - // doesn't hurt to have it just in case. - do { - pair->src_fd = open(pair->src_name, flags); - } while (pair->src_fd == -1 && errno == EINTR && !user_abort); - - if (!reg_files_only) - signals_block(); + // Try to open the file. Signals have been blocked so EINTR shouldn't + // be possible. + pair->src_fd = open(pair->src_name, flags); if (pair->src_fd == -1) { - // If we were interrupted, don't display any error message. - if (errno == EINTR) { - // All the signals that don't have SA_RESTART - // set user_abort. - assert(user_abort); - return true; - } + // Signals (that have a signal handler) have been blocked. + assert(errno != EINTR); #ifdef O_NOFOLLOW // Give an understandable error message if the reason @@ -427,26 +532,20 @@ io_open_src_real(file_pair *pair) return true; } -#ifndef TUKLIB_DOSLIKE - // Drop O_NONBLOCK, which is used only when we are accepting only - // regular files. After the open() call, we want things to block - // instead of giving EAGAIN. - if (reg_files_only) { - flags = fcntl(pair->src_fd, F_GETFL); - if (flags == -1) - goto error_msg; - - flags &= ~O_NONBLOCK; - - if (fcntl(pair->src_fd, F_SETFL, flags) == -1) - goto error_msg; - } -#endif - // Stat the source file. We need the result also when we copy // the permissions, and when unlinking. + // + // NOTE: Use stat() instead of fstat() with DJGPP, because + // then we have a better chance to get st_ino value that can + // be used in io_open_dest_real() to prevent overwriting the + // source file. +#ifdef __DJGPP__ + if (stat(pair->src_name, &pair->src_st)) + goto error_msg; +#else if (fstat(pair->src_fd, &pair->src_st)) goto error_msg; +#endif if (S_ISDIR(pair->src_st.st_mode)) { message_warning(_("%s: Is a directory, skipping"), @@ -492,6 +591,23 @@ io_open_src_real(file_pair *pair) goto error; } } + + // If it is something else than a regular file, wait until + // there is input available. This way reading from FIFOs + // will work when open() is used with O_NONBLOCK. + if (!S_ISREG(pair->src_st.st_mode)) { + signals_unblock(); + const io_wait_ret ret = io_wait(pair, -1, true); + signals_block(); + + if (ret != IO_WAIT_MORE) + goto error; + } +#endif + +#ifdef HAVE_POSIX_FADVISE + // It will fail with some special files like FIFOs but that is fine. + (void)posix_fadvise(pair->src_fd, 0, 0, POSIX_FADV_SEQUENTIAL); #endif return false; @@ -542,6 +658,19 @@ io_open_src(const char *src_name) static void io_close_src(file_pair *pair, bool success) { +#ifndef TUKLIB_DOSLIKE + if (restore_stdin_flags) { + assert(pair->src_fd == STDIN_FILENO); + + restore_stdin_flags = false; + + if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) + message_error(_("Error restoring the status flags " + "to standard input: %s"), + strerror(errno)); + } +#endif + if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { #ifdef TUKLIB_DOSLIKE (void)close(pair->src_fd); @@ -575,12 +704,58 @@ io_open_dest_real(file_pair *pair) pair->dest_fd = STDOUT_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDOUT_FILENO, O_BINARY); +#else + // Set O_NONBLOCK if it isn't already set. + // + // NOTE: O_APPEND may be unset later in this function + // and it relies on stdout_flags being set here. + stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); + if (stdout_flags == -1) { + message_error(_("Error getting the file status flags " + "from standard output: %s"), + strerror(errno)); + return true; + } + + if ((stdout_flags & O_NONBLOCK) == 0) { + if (fcntl(STDOUT_FILENO, F_SETFL, + stdout_flags | O_NONBLOCK) == -1) { + message_error(_("Error setting O_NONBLOCK " + "on standard output: %s"), + strerror(errno)); + return true; + } + + restore_stdout_flags = true; + } #endif } else { pair->dest_name = suffix_get_dest_name(pair->src_name); if (pair->dest_name == NULL) return true; +#ifdef __DJGPP__ + struct stat st; + if (stat(pair->dest_name, &st) == 0) { + // Check that it isn't a special file like "prn". + if (st.st_dev == -1) { + message_error("%s: Refusing to write to " + "a DOS special file", + pair->dest_name); + return true; + } + + // Check that we aren't overwriting the source file. + if (st.st_dev == pair->src_st.st_dev + && st.st_ino == pair->src_st.st_ino) { + message_error("%s: Output file is the same " + "as the input file", + pair->dest_name); + return true; + } + } +#endif + // If --force was used, unlink the target file first. if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { message_error(_("%s: Cannot remove: %s"), @@ -590,8 +765,11 @@ io_open_dest_real(file_pair *pair) } // Open the file. - const int flags = O_WRONLY | O_BINARY | O_NOCTTY + int flags = O_WRONLY | O_BINARY | O_NOCTTY | O_CREAT | O_EXCL; +#ifndef TUKLIB_DOSLIKE + flags |= O_NONBLOCK; +#endif const mode_t mode = S_IRUSR | S_IWUSR; pair->dest_fd = open(pair->dest_name, flags, mode); @@ -603,17 +781,19 @@ io_open_dest_real(file_pair *pair) } } - // If this really fails... well, we have a safe fallback. +#ifndef TUKLIB_DOSLIKE + // dest_st isn't used on DOS-like systems except as a dummy + // argument to io_unlink(), so don't fstat() on such systems. if (fstat(pair->dest_fd, &pair->dest_st)) { -#if defined(__VMS) + // If fstat() really fails, we have a safe fallback here. +# if defined(__VMS) pair->dest_st.st_ino[0] = 0; pair->dest_st.st_ino[1] = 0; pair->dest_st.st_ino[2] = 0; -#elif !defined(TUKLIB_DOSLIKE) +# else pair->dest_st.st_dev = 0; pair->dest_st.st_ino = 0; -#endif -#ifndef TUKLIB_DOSLIKE +# endif } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { // When writing to standard output, we need to be extra // careful: @@ -631,10 +811,6 @@ io_open_dest_real(file_pair *pair) if (!S_ISREG(pair->dest_st.st_mode)) return false; - stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); - if (stdout_flags == -1) - return false; - if (stdout_flags & O_APPEND) { // Creating a sparse file is not possible // when O_APPEND is active (it's used by @@ -653,14 +829,23 @@ io_open_dest_real(file_pair *pair) if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) return false; + // O_NONBLOCK was set earlier in this function + // so it must be kept here too. If this + // fcntl() call fails, we continue but won't + // try to create sparse output. The original + // flags will still be restored if needed (to + // unset O_NONBLOCK) when the file is finished. if (fcntl(STDOUT_FILENO, F_SETFL, - stdout_flags & ~O_APPEND) - == -1) + (stdout_flags | O_NONBLOCK) + & ~O_APPEND) == -1) return false; // Disabling O_APPEND succeeded. Mark // that the flags should be restored - // in io_close_dest(). + // in io_close_dest(). This quite likely was + // already set when enabling O_NONBLOCK but + // just in case O_NONBLOCK was already set, + // set this again here. restore_stdout_flags = true; } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) @@ -673,8 +858,8 @@ io_open_dest_real(file_pair *pair) } pair->dest_try_sparse = true; -#endif } +#endif return false; } @@ -790,6 +975,21 @@ io_close(file_pair *pair, bool success) } +extern void +io_fix_src_pos(file_pair *pair, size_t rewind_size) +{ + assert(rewind_size <= IO_BUFFER_SIZE); + + if (rewind_size > 0) { + // This doesn't need to work on unseekable file descriptors, + // so just ignore possible errors. + (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); + } + + return; +} + + extern size_t io_read(file_pair *pair, io_buf *buf_union, size_t size) { @@ -815,12 +1015,30 @@ io_read(file_pair *pair, io_buf *buf_union, size_t size) continue; } +#ifndef TUKLIB_DOSLIKE + if (errno == EAGAIN || errno == EWOULDBLOCK) { + const io_wait_ret ret = io_wait(pair, + mytime_get_flush_timeout(), + true); + switch (ret) { + case IO_WAIT_MORE: + continue; + + case IO_WAIT_ERROR: + return SIZE_MAX; + + case IO_WAIT_TIMEOUT: + return size - left; + + default: + message_bug(); + } + } +#endif + message_error(_("%s: Read error: %s"), pair->src_name, strerror(errno)); - // FIXME Is this needed? - pair->src_eof = true; - return SIZE_MAX; } @@ -885,6 +1103,15 @@ io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) continue; } +#ifndef TUKLIB_DOSLIKE + if (errno == EAGAIN || errno == EWOULDBLOCK) { + if (io_wait(pair, -1, false) == IO_WAIT_MORE) + continue; + + return true; + } +#endif + // Handle broken pipe specially. gzip and bzip2 // don't print anything on SIGPIPE. In addition, // gzip --quiet uses exit status 2 (warning) on diff --git a/src/xz/file_io.h b/src/xz/file_io.h index 967da868b0797..2de3379238d65 100644 --- a/src/xz/file_io.h +++ b/src/xz/file_io.h @@ -68,6 +68,14 @@ typedef struct { extern void io_init(void); +#ifndef TUKLIB_DOSLIKE +/// \brief Write a byte to user_abort_pipe[1] +/// +/// This is called from a signal handler. +extern void io_write_to_user_abort_pipe(void); +#endif + + /// \brief Disable creation of sparse files when decompressing extern void io_no_sparse(void); @@ -102,6 +110,19 @@ extern void io_close(file_pair *pair, bool success); extern size_t io_read(file_pair *pair, io_buf *buf, size_t size); +/// \brief Fix the position in src_fd +/// +/// This is used when --single-thream has been specified and decompression +/// is successful. If the input file descriptor supports seeking, this +/// function fixes the input position to point to the next byte after the +/// decompressed stream. +/// +/// \param pair File pair having the source file open for reading +/// \param rewind_size How many bytes of extra have been read i.e. +/// how much to seek backwards. +extern void io_fix_src_pos(file_pair *pair, size_t rewind_size); + + /// \brief Read from source file from given offset to a buffer /// /// This is remotely similar to standard pread(). This uses lseek() though, diff --git a/src/xz/hardware.c b/src/xz/hardware.c index a4733c27e1180..ff32f6d301484 100644 --- a/src/xz/hardware.c +++ b/src/xz/hardware.c @@ -11,12 +11,11 @@ /////////////////////////////////////////////////////////////////////////////// #include "private.h" -#include "tuklib_cpucores.h" -/// Maximum number of free *coder* threads. This can be set with +/// Maximum number of worker threads. This can be set with /// the --threads=NUM command line option. -static uint32_t threadlimit; +static uint32_t threads_max = 1; /// Memory usage limit for compression static uint64_t memlimit_compress; @@ -29,15 +28,23 @@ static uint64_t total_ram; extern void -hardware_threadlimit_set(uint32_t new_threadlimit) +hardware_threads_set(uint32_t n) { - if (new_threadlimit == 0) { - // The default is the number of available CPU cores. - threadlimit = tuklib_cpucores(); - if (threadlimit == 0) - threadlimit = 1; + if (n == 0) { + // Automatic number of threads was requested. + // If threading support was enabled at build time, + // use the number of available CPU cores. Otherwise + // use one thread since disabling threading support + // omits lzma_cputhreads() from liblzma. +#ifdef MYTHREAD_ENABLED + threads_max = lzma_cputhreads(); + if (threads_max == 0) + threads_max = 1; +#else + threads_max = 1; +#endif } else { - threadlimit = new_threadlimit; + threads_max = n; } return; @@ -45,9 +52,9 @@ hardware_threadlimit_set(uint32_t new_threadlimit) extern uint32_t -hardware_threadlimit_get(void) +hardware_threads_get(void) { - return threadlimit; + return threads_max; } @@ -139,6 +146,5 @@ hardware_init(void) // Set the defaults. hardware_memlimit_set(0, true, true, false); - hardware_threadlimit_set(0); return; } diff --git a/src/xz/hardware.h b/src/xz/hardware.h index ad526f260bc17..4fae61815656f 100644 --- a/src/xz/hardware.h +++ b/src/xz/hardware.h @@ -15,12 +15,11 @@ extern void hardware_init(void); -/// Set custom value for maximum number of coder threads. -extern void hardware_threadlimit_set(uint32_t threadlimit); +/// Set the maximum number of worker threads. +extern void hardware_threads_set(uint32_t threadlimit); -/// Get the maximum number of coder threads. Some additional helper threads -/// are allowed on top of this). -extern uint32_t hardware_threadlimit_get(void); +/// Get the maximum number of worker threads. +extern uint32_t hardware_threads_get(void); /// Set the memory usage limit. There are separate limits for compression diff --git a/src/xz/list.c b/src/xz/list.c index 0e73d519ea4a1..449c2bc4e02fd 100644 --- a/src/xz/list.c +++ b/src/xz/list.c @@ -29,9 +29,12 @@ typedef struct { /// Uncompressed Size fields bool all_have_sizes; + /// Oldest XZ Utils version that will decompress the file + uint32_t min_version; + } xz_file_info; -#define XZ_FILE_INFO_INIT { NULL, 0, 0, true } +#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } /// Information about a .xz Block @@ -104,8 +107,32 @@ static struct { uint64_t stream_padding; uint64_t memusage_max; uint32_t checks; + uint32_t min_version; bool all_have_sizes; -} totals = { 0, 0, 0, 0, 0, 0, 0, 0, true }; +} totals = { 0, 0, 0, 0, 0, 0, 0, 0, 0, true }; + + +/// Convert XZ Utils version number to a string. +static const char * +xz_ver_to_str(uint32_t ver) +{ + static char buf[32]; + + unsigned int major = ver / 10000000U; + ver -= major * 10000000U; + + unsigned int minor = ver / 10000U; + ver -= minor * 10000U; + + unsigned int patch = ver / 10U; + ver -= patch * 10U; + + const char *stability = ver == 0 ? "alpha" : ver == 1 ? "beta" : ""; + + snprintf(buf, sizeof(buf), "%u.%u.%u%s", + major, minor, patch, stability); + return buf; +} /// \brief Parse the Index(es) from the given .xz file @@ -478,6 +505,21 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter, if (xfi->memusage_max < bhi->memusage) xfi->memusage_max = bhi->memusage; + // Determine the minimum XZ Utils version that supports this Block. + // + // Currently the only thing that 5.0.0 doesn't support is empty + // LZMA2 Block. This decoder bug was fixed in 5.0.2. + { + size_t i = 0; + while (filters[i + 1].id != LZMA_VLI_UNKNOWN) + ++i; + + if (filters[i].id == LZMA_FILTER_LZMA2 + && iter->block.uncompressed_size == 0 + && xfi->min_version < 50000022U) + xfi->min_version = 50000022U; + } + // Convert the filter chain to human readable form. message_filters_to_str(bhi->filter_chain, filters, false); @@ -856,6 +898,8 @@ print_info_adv(xz_file_info *xfi, file_pair *pair) round_up_to_mib(xfi->memusage_max), 0)); printf(_(" Sizes in headers: %s\n"), xfi->all_have_sizes ? _("Yes") : _("No")); + printf(_(" Minimum XZ Utils version: %s\n"), + xz_ver_to_str(xfi->min_version)); } return false; @@ -938,9 +982,10 @@ print_info_robot(xz_file_info *xfi, file_pair *pair) } if (message_verbosity_get() >= V_DEBUG) - printf("summary\t%" PRIu64 "\t%s\n", + printf("summary\t%" PRIu64 "\t%s\t%" PRIu32 "\n", xfi->memusage_max, - xfi->all_have_sizes ? "yes" : "no"); + xfi->all_have_sizes ? "yes" : "no", + xfi->min_version); return false; } @@ -961,6 +1006,9 @@ update_totals(const xz_file_info *xfi) if (totals.memusage_max < xfi->memusage_max) totals.memusage_max = xfi->memusage_max; + if (totals.min_version < xfi->min_version) + totals.min_version = xfi->min_version; + totals.all_have_sizes &= xfi->all_have_sizes; return; @@ -1025,6 +1073,8 @@ print_totals_adv(void) round_up_to_mib(totals.memusage_max), 0)); printf(_(" Sizes in headers: %s\n"), totals.all_have_sizes ? _("Yes") : _("No")); + printf(_(" Minimum XZ Utils version: %s\n"), + xz_ver_to_str(totals.min_version)); } return; @@ -1050,9 +1100,10 @@ print_totals_robot(void) totals.files); if (message_verbosity_get() >= V_DEBUG) - printf("\t%" PRIu64 "\t%s", + printf("\t%" PRIu64 "\t%s\t%" PRIu32, totals.memusage_max, - totals.all_have_sizes ? "yes" : "no"); + totals.all_have_sizes ? "yes" : "no", + totals.min_version); putchar('\n'); diff --git a/src/xz/main.c b/src/xz/main.c index 8196c6e7e7746..a8f0683a47bd4 100644 --- a/src/xz/main.c +++ b/src/xz/main.c @@ -275,6 +275,11 @@ main(int argc, char **argv) list_totals(); } +#ifndef NDEBUG + coder_free(); + args_free(); +#endif + // If we have got a signal, raise it to kill the program instead // of calling tuklib_exit(). signals_exit(); diff --git a/src/xz/message.c b/src/xz/message.c index 0a7a522f7afa7..8a31b00ed89c5 100644 --- a/src/xz/message.c +++ b/src/xz/message.c @@ -12,10 +12,6 @@ #include "private.h" -#ifdef HAVE_SYS_TIME_H -# include <sys/time.h> -#endif - #include <stdarg.h> @@ -64,9 +60,6 @@ static lzma_stream *progress_strm; /// and estimate remaining time. static uint64_t expected_in_size; -/// Time when we started processing the file -static uint64_t start_time; - // Use alarm() and SIGALRM when they are supported. This has two minor // advantages over the alternative of polling gettimeofday(): @@ -112,16 +105,6 @@ static uint64_t progress_next_update; #endif -/// Get the current time as microseconds since epoch -static uint64_t -my_time(void) -{ - struct timeval tv; - gettimeofday(&tv, NULL); - return (uint64_t)(tv.tv_sec) * UINT64_C(1000000) + tv.tv_usec; -} - - extern void message_init(void) { @@ -264,11 +247,10 @@ message_progress_start(lzma_stream *strm, uint64_t in_size) // It is needed to find out the position in the stream. progress_strm = strm; - // Store the processing start time of the file and its expected size. - // If we aren't printing any statistics, then these are unused. But - // since it is possible that the user sends us a signal to show - // statistics, we need to have these available anyway. - start_time = my_time(); + // Store the expected size of the file. If we aren't printing any + // statistics, then is will be unused. But since it is possible + // that the user sends us a signal to show statistics, we need + // to have it available anyway. expected_in_size = in_size; // Indicate that progress info may need to be printed before @@ -290,7 +272,7 @@ message_progress_start(lzma_stream *strm, uint64_t in_size) alarm(1); #else progress_needs_updating = true; - progress_next_update = 1000000; + progress_next_update = 1000; #endif } @@ -364,7 +346,7 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) { // Don't print the speed immediately, since the early values look // somewhat random. - if (elapsed < 3000000) + if (elapsed < 3000) return ""; static const char unit[][8] = { @@ -377,7 +359,7 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) // Calculate the speed as KiB/s. double speed = (double)(uncompressed_pos) - / ((double)(elapsed) * (1024.0 / 1e6)); + / ((double)(elapsed) * (1024.0 / 1000.0)); // Adjust the unit of the speed if needed. while (speed > 999.0) { @@ -402,12 +384,12 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) /// Make a string indicating elapsed or remaining time. The format is either /// M:SS or H:MM:SS depending on if the time is an hour or more. static const char * -progress_time(uint64_t useconds) +progress_time(uint64_t mseconds) { // 9999 hours = 416 days static char buf[sizeof("9999:59:59")]; - uint32_t seconds = useconds / 1000000; + uint32_t seconds = mseconds / 1000; // Don't show anything if the time is zero or ridiculously big. if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59) @@ -445,14 +427,14 @@ progress_remaining(uint64_t in_pos, uint64_t elapsed) // - Only a few seconds has passed since we started (de)compressing, // so estimate would be too inaccurate. if (expected_in_size == 0 || in_pos > expected_in_size - || in_pos < (UINT64_C(1) << 19) || elapsed < 8000000) + || in_pos < (UINT64_C(1) << 19) || elapsed < 8000) return ""; // Calculate the estimate. Don't give an estimate of zero seconds, // since it is possible that all the input has been already passed // to the library, but there is still quite a bit of output pending. uint32_t remaining = (double)(expected_in_size - in_pos) - * ((double)(elapsed) / 1e6) / (double)(in_pos); + * ((double)(elapsed) / 1000.0) / (double)(in_pos); if (remaining < 1) remaining = 1; @@ -518,28 +500,26 @@ progress_remaining(uint64_t in_pos, uint64_t elapsed) } -/// Calculate the elapsed time as microseconds. -static uint64_t -progress_elapsed(void) -{ - return my_time() - start_time; -} - - -/// Get information about position in the stream. This is currently simple, -/// but it will become more complicated once we have multithreading support. +/// Get how much uncompressed and compressed data has been processed. static void progress_pos(uint64_t *in_pos, uint64_t *compressed_pos, uint64_t *uncompressed_pos) { - *in_pos = progress_strm->total_in; + uint64_t out_pos; + lzma_get_progress(progress_strm, in_pos, &out_pos); + + // It cannot have processed more input than it has been given. + assert(*in_pos <= progress_strm->total_in); + + // It cannot have produced more output than it claims to have ready. + assert(out_pos >= progress_strm->total_out); if (opt_mode == MODE_COMPRESS) { - *compressed_pos = progress_strm->total_out; - *uncompressed_pos = progress_strm->total_in; + *compressed_pos = out_pos; + *uncompressed_pos = *in_pos; } else { - *compressed_pos = progress_strm->total_in; - *uncompressed_pos = progress_strm->total_out; + *compressed_pos = *in_pos; + *uncompressed_pos = out_pos; } return; @@ -553,13 +533,13 @@ message_progress_update(void) return; // Calculate how long we have been processing this file. - const uint64_t elapsed = progress_elapsed(); + const uint64_t elapsed = mytime_get_elapsed(); #ifndef SIGALRM if (progress_next_update > elapsed) return; - progress_next_update = elapsed + 1000000; + progress_next_update = elapsed + 1000; #endif // Get our current position in the stream. @@ -652,7 +632,7 @@ progress_flush(bool finished) progress_active = false; - const uint64_t elapsed = progress_elapsed(); + const uint64_t elapsed = mytime_get_elapsed(); signals_block(); @@ -1122,7 +1102,10 @@ message_help(bool long_help) " -f, --force force overwrite of output file and (de)compress links\n" " -c, --stdout write to standard output and don't delete input files")); - if (long_help) + if (long_help) { + puts(_( +" --single-stream decompress only the first stream, and silently\n" +" ignore possible remaining input data")); puts(_( " --no-sparse do not create sparse files when decompressing\n" " -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" @@ -1130,6 +1113,7 @@ message_help(bool long_help) " omitted, filenames are read from the standard input;\n" " filenames must be terminated with the newline character\n" " --files0[=FILE] like --files but use the null character as terminator")); + } if (long_help) { puts(_("\n Basic file format and compression options:\n")); @@ -1138,6 +1122,8 @@ message_help(bool long_help) " `auto' (default), `xz', `lzma', and `raw'\n" " -C, --check=CHECK integrity check type: `none' (use with caution),\n" " `crc32', `crc64' (default), or `sha256'")); + puts(_( +" --ignore-check don't verify the integrity check when decompressing")); } puts(_( @@ -1148,7 +1134,25 @@ message_help(bool long_help) " -e, --extreme try to improve compression ratio by using more CPU time;\n" " does not affect decompressor memory requirements")); + puts(_( +" -T, --threads=NUM use at most NUM threads; the default is 1; set to 0\n" +" to use as many threads as there are processor cores")); + if (long_help) { + puts(_( +" --block-size=SIZE\n" +" start a new .xz block after every SIZE bytes of input;\n" +" use this to set the block size for threaded compression")); + puts(_( +" --block-list=SIZES\n" +" start a new .xz block after the given comma-separated\n" +" intervals of uncompressed data")); + puts(_( +" --flush-timeout=TIMEOUT\n" +" when compressing, if more than TIMEOUT milliseconds has\n" +" passed since the previous flush and reading more input\n" +" would block, all pending data is flushed out" + )); puts(_( // xgettext:no-c-format " --memlimit-compress=LIMIT\n" " --memlimit-decompress=LIMIT\n" @@ -1244,5 +1248,10 @@ message_help(bool long_help) PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); +#if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE + puts(_( +"THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE.")); +#endif + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); } diff --git a/src/xz/mytime.c b/src/xz/mytime.c new file mode 100644 index 0000000000000..4be184fd19daf --- /dev/null +++ b/src/xz/mytime.c @@ -0,0 +1,89 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mytime.c +/// \brief Time handling functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#if !(defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC) +# include <sys/time.h> +#endif + +uint64_t opt_flush_timeout = 0; +bool flush_needed; + +static uint64_t start_time; +static uint64_t next_flush; + + +/// \brief Get the current time as milliseconds +/// +/// It's relative to some point but not necessarily to the UNIX Epoch. +static uint64_t +mytime_now(void) +{ + // NOTE: HAVE_DECL_CLOCK_MONOTONIC is always defined to 0 or 1. +#if defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC + // If CLOCK_MONOTONIC was available at compile time but for some + // reason isn't at runtime, fallback to CLOCK_REALTIME which + // according to POSIX is mandatory for all implementations. + static clockid_t clk_id = CLOCK_MONOTONIC; + struct timespec tv; + while (clock_gettime(clk_id, &tv)) + clk_id = CLOCK_REALTIME; + + return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_nsec / 1000000; +#else + struct timeval tv; + gettimeofday(&tv, NULL); + return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_usec / 1000; +#endif +} + + +extern void +mytime_set_start_time(void) +{ + start_time = mytime_now(); + next_flush = start_time + opt_flush_timeout; + flush_needed = false; + return; +} + + +extern uint64_t +mytime_get_elapsed(void) +{ + return mytime_now() - start_time; +} + + +extern void +mytime_set_flush_time(void) +{ + next_flush = mytime_now() + opt_flush_timeout; + flush_needed = false; + return; +} + + +extern int +mytime_get_flush_timeout(void) +{ + if (opt_flush_timeout == 0 || opt_mode != MODE_COMPRESS) + return -1; + + const uint64_t now = mytime_now(); + if (now >= next_flush) + return 0; + + const uint64_t remaining = next_flush - now; + return remaining > INT_MAX ? INT_MAX : (int)remaining; +} diff --git a/src/xz/mytime.h b/src/xz/mytime.h new file mode 100644 index 0000000000000..ea291eed81c7a --- /dev/null +++ b/src/xz/mytime.h @@ -0,0 +1,47 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mytime.h +/// \brief Time handling functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + + +/// \brief Number of milliseconds to between LZMA_SYNC_FLUSHes +/// +/// If 0, timed flushing is disabled. Otherwise if no more input is available +/// and not at the end of the file and at least opt_flush_timeout milliseconds +/// has elapsed since the start of compression or the previous flushing +/// (LZMA_SYNC_FLUSH or LZMA_FULL_FLUSH), set LZMA_SYNC_FLUSH to flush +/// the pending data. +extern uint64_t opt_flush_timeout; + + +/// \brief True when flushing is needed due to expired timeout +extern bool flush_needed; + + +/// \brief Store the time when (de)compression was started +/// +/// The start time is also stored as the time of the first flush. +extern void mytime_set_start_time(void); + + +/// \brief Get the number of milliseconds since the operation started +extern uint64_t mytime_get_elapsed(void); + + +/// \brief Store the time of when compressor was flushed +extern void mytime_set_flush_time(void); + + +/// \brief Get the number of milliseconds until the next flush +/// +/// This returns -1 if no timed flushing is used. +/// +/// The return value is inteded for use with poll(). +extern int mytime_get_flush_timeout(void); diff --git a/src/xz/options.c b/src/xz/options.c index f21a0ba510651..f9c7ab9e8535b 100644 --- a/src/xz/options.c +++ b/src/xz/options.c @@ -31,8 +31,8 @@ typedef struct { } option_map; -/// Parses option=value pairs that are separated with colons, semicolons, -/// or commas: opt=val:opt=val;opt=val,opt=val +/// Parses option=value pairs that are separated with commas: +/// opt=val,opt=val,opt=val /// /// Each option is a string, that is converted to an integer using the /// index where the option string is in the array. diff --git a/src/xz/private.h b/src/xz/private.h index 6b01e51354e1c..4acfa8dc45583 100644 --- a/src/xz/private.h +++ b/src/xz/private.h @@ -12,6 +12,7 @@ #include "sysdefs.h" #include "mythread.h" + #include "lzma.h" #include <sys/types.h> @@ -45,6 +46,7 @@ #endif #include "main.h" +#include "mytime.h" #include "coder.h" #include "message.h" #include "args.h" diff --git a/src/xz/signals.c b/src/xz/signals.c index 322811f472b5a..5387c424e1a5a 100644 --- a/src/xz/signals.c +++ b/src/xz/signals.c @@ -41,6 +41,11 @@ signal_handler(int sig) { exit_signal = sig; user_abort = true; + +#ifndef TUKLIB_DOSLIKE + io_write_to_user_abort_pipe(); +#endif + return; } diff --git a/src/xz/suffix.c b/src/xz/suffix.c index 8e331a7022a3c..9d4fcd139b8f7 100644 --- a/src/xz/suffix.c +++ b/src/xz/suffix.c @@ -12,6 +12,10 @@ #include "private.h" +#ifdef __DJGPP__ +# include <fcntl.h> +#endif + // For case-insensitive filename suffix on case-insensitive systems #if defined(TUKLIB_DOSLIKE) || defined(__VMS) # define strcmp strcasecmp @@ -45,6 +49,31 @@ has_dir_sep(const char *str) } +#ifdef __DJGPP__ +/// \brief Test for special suffix used for 8.3 short filenames (SFN) +/// +/// \return If str matches *.?- or *.??-, true is returned. Otherwise +/// false is returned. +static bool +has_sfn_suffix(const char *str, size_t len) +{ + if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.' + && !is_dir_sep(str[len - 2])) { + // *.?- + if (str[len - 3] == '.') + return !is_dir_sep(str[len - 4]); + + // *.??- + if (len >= 5 && !is_dir_sep(str[len - 3]) + && str[len - 4] == '.') + return !is_dir_sep(str[len - 5]); + } + + return false; +} +#endif + + /// \brief Checks if src_name has given compressed_suffix /// /// \param suffix Filename suffix to look for @@ -87,6 +116,9 @@ uncompressed_name(const char *src_name, const size_t src_len) { ".xz", "" }, { ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare. { ".lzma", "" }, +#ifdef __DJGPP__ + { ".lzm", "" }, +#endif { ".tlz", ".tar" }, // { ".gz", "" }, // { ".tgz", ".tar" }, @@ -112,6 +144,17 @@ uncompressed_name(const char *src_name, const size_t src_len) break; } } + +#ifdef __DJGPP__ + // Support also *.?- -> *.? and *.??- -> *.?? on DOS. + // This is done also when long filenames are available + // to keep it easy to decompress files created when + // long filename support wasn't available. + if (new_len == 0 && has_sfn_suffix(src_name, src_len)) { + new_suffix = ""; + new_len = src_len - 1; + } +#endif } if (new_len == 0 && custom_suffix != NULL) @@ -134,21 +177,35 @@ uncompressed_name(const char *src_name, const size_t src_len) } +/// This message is needed in multiple places in compressed_name(), +/// so the message has been put into its own function. +static void +msg_suffix(const char *src_name, const char *suffix) +{ + message_warning(_("%s: File already has `%s' suffix, skipping"), + src_name, suffix); + return; +} + + /// \brief Appends suffix to src_name /// /// In contrast to uncompressed_name(), we check only suffixes that are valid /// for the specified file format. static char * -compressed_name(const char *src_name, const size_t src_len) +compressed_name(const char *src_name, size_t src_len) { // The order of these must match the order in args.h. - static const char *const all_suffixes[][3] = { + static const char *const all_suffixes[][4] = { { ".xz", ".txz", NULL }, { ".lzma", +#ifdef __DJGPP__ + ".lzm", +#endif ".tlz", NULL /* @@ -170,20 +227,27 @@ compressed_name(const char *src_name, const size_t src_len) const size_t format = opt_format - 1; const char *const *suffixes = all_suffixes[format]; + // Look for known filename suffixes and refuse to compress them. for (size_t i = 0; suffixes[i] != NULL; ++i) { if (test_suffix(suffixes[i], src_name, src_len) != 0) { - message_warning(_("%s: File already has `%s' " - "suffix, skipping"), src_name, - suffixes[i]); + msg_suffix(src_name, suffixes[i]); return NULL; } } +#ifdef __DJGPP__ + // Recognize also the special suffix that is used when long + // filename (LFN) support isn't available. This suffix is + // recognized on LFN systems too. + if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) { + msg_suffix(src_name, "-"); + return NULL; + } +#endif + if (custom_suffix != NULL) { if (test_suffix(custom_suffix, src_name, src_len) != 0) { - message_warning(_("%s: File already has `%s' " - "suffix, skipping"), src_name, - custom_suffix); + msg_suffix(src_name, custom_suffix); return NULL; } } @@ -199,7 +263,101 @@ compressed_name(const char *src_name, const size_t src_len) const char *suffix = custom_suffix != NULL ? custom_suffix : suffixes[0]; - const size_t suffix_len = strlen(suffix); + size_t suffix_len = strlen(suffix); + +#ifdef __DJGPP__ + if (!_use_lfn(src_name)) { + // Long filename (LFN) support isn't available and we are + // limited to 8.3 short filenames (SFN). + // + // Look for suffix separator from the filename, and make sure + // that it is in the filename, not in a directory name. + const char *sufsep = strrchr(src_name, '.'); + if (sufsep == NULL || sufsep[1] == '\0' + || has_dir_sep(sufsep)) { + // src_name has no filename extension. + // + // Examples: + // xz foo -> foo.xz + // xz -F lzma foo -> foo.lzm + // xz -S x foo -> foox + // xz -S x foo. -> foo.x + // xz -S x.y foo -> foox.y + // xz -S .x foo -> foo.x + // xz -S .x foo. -> foo.x + // + // Avoid double dots: + if (sufsep != NULL && sufsep[1] == '\0' + && suffix[0] == '.') + --src_len; + + } else if (custom_suffix == NULL + && strcasecmp(sufsep, ".tar") == 0) { + // ".tar" is handled specially. + // + // Examples: + // xz foo.tar -> foo.txz + // xz -F lzma foo.tar -> foo.tlz + static const char *const tar_suffixes[] = { + ".txz", + ".tlz", + // ".tgz", + }; + suffix = tar_suffixes[format]; + suffix_len = 4; + src_len -= 4; + + } else { + if (custom_suffix == NULL && opt_format == FORMAT_XZ) { + // Instead of the .xz suffix, use a single + // character at the end of the filename + // extension. This is to minimize name + // conflicts when compressing multiple files + // with the same basename. E.g. foo.txt and + // foo.exe become foo.tx- and foo.ex-. Dash + // is rare as the last character of the + // filename extension, so it seems to be + // quite safe choice and it stands out better + // in directory listings than e.g. x. For + // comparison, gzip uses z. + suffix = "-"; + suffix_len = 1; + } + + if (suffix[0] == '.') { + // The first character of the suffix is a dot. + // Throw away the original filename extension + // and replace it with the new suffix. + // + // Examples: + // xz -F lzma foo.txt -> foo.lzm + // xz -S .x foo.txt -> foo.x + src_len = sufsep - src_name; + + } else { + // The first character of the suffix is not + // a dot. Preserve the first 0-2 characters + // of the original filename extension. + // + // Examples: + // xz foo.txt -> foo.tx- + // xz -S x foo.c -> foo.cx + // xz -S ab foo.c -> foo.cab + // xz -S ab foo.txt -> foo.tab + // xz -S abc foo.txt -> foo.abc + // + // Truncate the suffix to three chars: + if (suffix_len > 3) + suffix_len = 3; + + // If needed, overwrite 1-3 characters. + if (strlen(sufsep) > 4 - suffix_len) + src_len = sufsep - src_name + + 4 - suffix_len; + } + } + } +#endif char *dest_name = xmalloc(src_len + suffix_len + 1); diff --git a/src/xz/xz.1 b/src/xz/xz.1 index 363b90cb4e551..75aead3d04393 100644 --- a/src/xz/xz.1 +++ b/src/xz/xz.1 @@ -5,7 +5,7 @@ .\" This file has been put into the public domain. .\" You can do whatever you want with this file. .\" -.TH XZ 1 "2013-06-21" "Tukaani" "XZ Utils" +.TH XZ 1 "2014-12-16" "Tukaani" "XZ Utils" . .SH NAME xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files @@ -435,6 +435,29 @@ standard output instead of a file. This implies .BR \-\-keep . .TP +.B \-\-single\-stream +Decompress only the first +.B .xz +stream, and +silently ignore possible remaining input data following the stream. +Normally such trailing garbage makes +.B xz +display an error. +.IP "" +.B xz +never decompresses more than one stream from +.B .lzma +files or raw streams, but this option still makes +.B xz +ignore the possible trailing data after the +.B .lzma +file or raw stream. +.IP "" +This option has no effect if the operation mode is not +.B \-\-decompress +or +.BR \-\-test . +.TP .B \-\-no\-sparse Disable creation of sparse files. By default, if decompressing into a regular file, @@ -586,6 +609,25 @@ Integrity of the headers is always verified with CRC32. It is not possible to change or disable it. .TP +.B \-\-ignore\-check +Don't verify the integrity check of the compressed data when decompressing. +The CRC32 values in the +.B .xz +headers will still be verified normally. +.IP "" +.B "Do not use this option unless you know what you are doing." +Possible reasons to use this option: +.RS +.IP \(bu 3 +Trying to recover data from a corrupt .xz file. +.IP \(bu 3 +Speeding up decompression. +This matters mostly with SHA-256 or +with files that have compressed extremely well. +It's recommended to not use this option for this purpose +unless the file integrity is verified externally in some other way. +.RE +.TP .BR \-0 " ... " \-9 Select a compression preset level. The default is @@ -778,6 +820,124 @@ These are provided only for backwards compatibility with LZMA Utils. Avoid using these options. .TP +.BI \-\-block\-size= size +When compressing to the +.B .xz +format, split the input data into blocks of +.I size +bytes. +The blocks are compressed independently from each other, +which helps with multi-threading and +makes limited random-access decompression possible. +This option is typically used to override the default +block size in multi-threaded mode, +but this option can be used in single-threaded mode too. +.IP "" +In multi-threaded mode about three times +.I size +bytes will be allocated in each thread for buffering input and output. +The default +.I size +is three times the LZMA2 dictionary size or 1 MiB, +whichever is more. +Typically a good value is 2\-4 times +the size of the LZMA2 dictionary or at least 1 MiB. +Using +.I size +less than the LZMA2 dictionary size is waste of RAM +because then the LZMA2 dictionary buffer will never get fully used. +The sizes of the blocks are stored in the block headers, +which a future version of +.B xz +will use for multi-threaded decompression. +.IP "" +In single-threaded mode no block splitting is done by default. +Setting this option doesn't affect memory usage. +No size information is stored in block headers, +thus files created in single-threaded mode +won't be identical to files created in multi-threaded mode. +The lack of size information also means that a future version of +.B xz +won't be able decompress the files in multi-threaded mode. +.TP +.BI \-\-block\-list= sizes +When compressing to the +.B .xz +format, start a new block after +the given intervals of uncompressed data. +.IP "" +The uncompressed +.I sizes +of the blocks are specified as a comma-separated list. +Omitting a size (two or more consecutive commas) is a shorthand +to use the size of the previous block. +.IP "" +If the input file is bigger than the sum of +.IR sizes , +the last value in +.I sizes +is repeated until the end of the file. +A special value of +.B 0 +may be used as the last value to indicate that +the rest of the file should be encoded as a single block. +.IP "" +If one specifies +.I sizes +that exceed the encoder's block size +(either the default value in threaded mode or +the value specified with \fB\-\-block\-size=\fIsize\fR), +the encoder will create additional blocks while +keeping the boundaries specified in +.IR sizes . +For example, if one specifies +.B \-\-block\-size=10MiB +.B \-\-block\-list=5MiB,10MiB,8MiB,12MiB,24MiB +and the input file is 80 MiB, +one will get 11 blocks: +5, 10, 8, 10, 2, 10, 10, 4, 10, 10, and 1 MiB. +.IP "" +In multi-threaded mode the sizes of the blocks +are stored in the block headers. +This isn't done in single-threaded mode, +so the encoded output won't be +identical to that of the multi-threaded mode. +.TP +.BI \-\-flush\-timeout= timeout +When compressing, if more than +.I timeout +milliseconds (a positive integer) has passed since the previous flush and +reading more input would block, +all the pending input data is flushed from the encoder and +made available in the output stream. +This can be useful if +.B xz +is used to compress data that is streamed over a network. +Small +.I timeout +values make the data available at the receiving end +with a small delay, but large +.I timeout +values give better compression ratio. +.IP "" +This feature is disabled by default. +If this option is specified more than once, the last one takes effect. +The special +.I timeout +value of +.B 0 +can be used to explicitly disable this feature. +.IP "" +This feature is not available on non-POSIX systems. +.IP "" +.\" FIXME +.B "This feature is still experimental." +Currently +.B xz +is unsuitable for decompressing the stream in real time due to how +.B xz +does buffering. +.TP .BI \-\-memlimit\-compress= limit Set a memory usage limit for compression. If this option is specified multiple times, @@ -876,24 +1036,25 @@ Automatic adjusting is always disabled when creating raw streams .TP \fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads Specify the number of worker threads to use. +Setting +.I threads +to a special value +.B 0 +makes +.B xz +use as many threads as there are CPU cores on the system. The actual number of threads can be less than .I threads +if the input file is not big enough +for threading with the given settings or if using more threads would exceed the memory usage limit. .IP "" -.B "Multithreaded compression and decompression are not" -.B "implemented yet, so this option has no effect for now." -.IP "" -.B "As of writing (2010-09-27), it hasn't been decided" -.B "if threads will be used by default on multicore systems" -.B "once support for threading has been implemented." -.B "Comments are welcome." -The complicating factor is that using many threads -will increase the memory usage dramatically. -Note that if multithreading will be the default, -it will probably be done so that single-threaded and -multithreaded modes produce the same output, -so compression ratio won't be significantly affected -if threading will be enabled by default. +Currently the only threading method is to split the input into +blocks and compress them independently from each other. +The default block size depends on the compression level and +can be overriden with the +.BI \-\-block\-size= size +option. . .SS "Custom compressor filter chains" A custom filter chain allows specifying @@ -1863,6 +2024,14 @@ or .B no indicating if all block headers have both compressed size and uncompressed size stored in them +.PP +.I Since +.B xz +.I 5.1.2alpha: +.IP 4. 4 +Minimum +.B xz +version required to decompress the file .RE .PD .PP @@ -1913,6 +2082,14 @@ or .B no indicating if all block headers have both compressed size and uncompressed size stored in them +.PP +.I Since +.B xz +.I 5.1.2alpha: +.IP 12. 4 +Minimum +.B xz +version required to decompress the file .RE .PD .PP @@ -2173,7 +2350,9 @@ If there is data left after the first .B .lzma stream, .B xz -considers the file to be corrupt. +considers the file to be corrupt unless +.B \-\-single\-stream +was used. This may break obscure scripts which have assumed that trailing garbage is ignored. . |