summaryrefslogtreecommitdiff
path: root/src/xz
diff options
context:
space:
mode:
Diffstat (limited to 'src/xz')
-rw-r--r--src/xz/args.c110
-rw-r--r--src/xz/args.h2
-rw-r--r--src/xz/coder.c432
-rw-r--r--src/xz/coder.h15
-rw-r--r--src/xz/file_io.c355
-rw-r--r--src/xz/file_io.h21
-rw-r--r--src/xz/hardware.c32
-rw-r--r--src/xz/hardware.h9
-rw-r--r--src/xz/list.c63
-rw-r--r--src/xz/main.c5
-rw-r--r--src/xz/message.c105
-rw-r--r--src/xz/mytime.c89
-rw-r--r--src/xz/mytime.h47
-rw-r--r--src/xz/options.c4
-rw-r--r--src/xz/private.h2
-rw-r--r--src/xz/signals.c5
-rw-r--r--src/xz/suffix.c176
-rw-r--r--src/xz/xz.1211
18 files changed, 1425 insertions, 258 deletions
diff --git a/src/xz/args.c b/src/xz/args.c
index 34761d45d67e2..041c80073e6de 100644
--- a/src/xz/args.c
+++ b/src/xz/args.c
@@ -22,6 +22,7 @@ bool opt_stdout = false;
bool opt_force = false;
bool opt_keep_original = false;
bool opt_robot = false;
+bool opt_ignore_check = false;
// We don't modify or free() this, but we need to assign it in some
// non-const pointers.
@@ -55,6 +56,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str,
static void
+parse_block_list(char *str)
+{
+ // It must be non-empty and not begin with a comma.
+ if (str[0] == '\0' || str[0] == ',')
+ message_fatal(_("%s: Invalid argument to --block-list"), str);
+
+ // Count the number of comma-separated strings.
+ size_t count = 1;
+ for (size_t i = 0; str[i] != '\0'; ++i)
+ if (str[i] == ',')
+ ++count;
+
+ // Prevent an unlikely integer overflow.
+ if (count > SIZE_MAX / sizeof(uint64_t) - 1)
+ message_fatal(_("%s: Too many arguments to --block-list"),
+ str);
+
+ // Allocate memory to hold all the sizes specified.
+ // If --block-list was specified already, its value is forgotten.
+ free(opt_block_list);
+ opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
+
+ for (size_t i = 0; i < count; ++i) {
+ // Locate the next comma and replace it with \0.
+ char *p = strchr(str, ',');
+ if (p != NULL)
+ *p = '\0';
+
+ if (str[0] == '\0') {
+ // There is no string, that is, a comma follows
+ // another comma. Use the previous value.
+ //
+ // NOTE: We checked earler that the first char
+ // of the whole list cannot be a comma.
+ assert(i > 0);
+ opt_block_list[i] = opt_block_list[i - 1];
+ } else {
+ opt_block_list[i] = str_to_uint64("block-list", str,
+ 0, UINT64_MAX);
+
+ // Zero indicates no more new Blocks.
+ if (opt_block_list[i] == 0) {
+ if (i + 1 != count)
+ message_fatal(_("0 can only be used "
+ "as the last element "
+ "in --block-list"));
+
+ opt_block_list[i] = UINT64_MAX;
+ }
+ }
+
+ str = p + 1;
+ }
+
+ // Terminate the array.
+ opt_block_list[count] = 0;
+ return;
+}
+
+
+static void
parse_real(args_info *args, int argc, char **argv)
{
enum {
@@ -68,14 +130,19 @@ parse_real(args_info *args, int argc, char **argv)
OPT_LZMA1,
OPT_LZMA2,
+ OPT_SINGLE_STREAM,
OPT_NO_SPARSE,
OPT_FILES,
OPT_FILES0,
+ OPT_BLOCK_SIZE,
+ OPT_BLOCK_LIST,
OPT_MEM_COMPRESS,
OPT_MEM_DECOMPRESS,
OPT_NO_ADJUST,
OPT_INFO_MEMORY,
OPT_ROBOT,
+ OPT_FLUSH_TIMEOUT,
+ OPT_IGNORE_CHECK,
};
static const char short_opts[]
@@ -94,6 +161,7 @@ parse_real(args_info *args, int argc, char **argv)
{ "force", no_argument, NULL, 'f' },
{ "stdout", no_argument, NULL, 'c' },
{ "to-stdout", no_argument, NULL, 'c' },
+ { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM },
{ "no-sparse", no_argument, NULL, OPT_NO_SPARSE },
{ "suffix", required_argument, NULL, 'S' },
// { "recursive", no_argument, NULL, 'r' }, // TODO
@@ -103,12 +171,16 @@ parse_real(args_info *args, int argc, char **argv)
// Basic compression settings
{ "format", required_argument, NULL, 'F' },
{ "check", required_argument, NULL, 'C' },
+ { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK },
+ { "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
+ { "block-list", required_argument, NULL, OPT_BLOCK_LIST },
{ "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
{ "memlimit", required_argument, NULL, 'M' },
{ "memory", required_argument, NULL, 'M' }, // Old alias
{ "no-adjust", no_argument, NULL, OPT_NO_ADJUST },
{ "threads", required_argument, NULL, 'T' },
+ { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
{ "extreme", no_argument, NULL, 'e' },
{ "fast", no_argument, NULL, '0' },
@@ -175,8 +247,9 @@ parse_real(args_info *args, int argc, char **argv)
break;
case 'T':
- hardware_threadlimit_set(str_to_uint64(
- "threads", optarg, 0, UINT32_MAX));
+ // The max is from src/liblzma/common/common.h.
+ hardware_threads_set(str_to_uint64("threads",
+ optarg, 0, 16384));
break;
// --version
@@ -368,6 +441,24 @@ parse_real(args_info *args, int argc, char **argv)
break;
}
+ case OPT_IGNORE_CHECK:
+ opt_ignore_check = true;
+ break;
+
+ case OPT_BLOCK_SIZE:
+ opt_block_size = str_to_uint64("block-size", optarg,
+ 0, LZMA_VLI_MAX);
+ break;
+
+ case OPT_BLOCK_LIST: {
+ parse_block_list(optarg);
+ break;
+ }
+
+ case OPT_SINGLE_STREAM:
+ opt_single_stream = true;
+ break;
+
case OPT_NO_SPARSE:
io_no_sparse();
break;
@@ -401,6 +492,11 @@ parse_real(args_info *args, int argc, char **argv)
opt_auto_adjust = false;
break;
+ case OPT_FLUSH_TIMEOUT:
+ opt_flush_timeout = str_to_uint64("flush-timeout",
+ optarg, 0, UINT64_MAX);
+ break;
+
default:
message_try_help();
tuklib_exit(E_ERROR, E_ERROR, false);
@@ -576,3 +672,13 @@ args_parse(args_info *args, int argc, char **argv)
return;
}
+
+
+#ifndef NDEBUG
+extern void
+args_free(void)
+{
+ free(opt_block_list);
+ return;
+}
+#endif
diff --git a/src/xz/args.h b/src/xz/args.h
index b23f4ef12c923..1defad12e0dd5 100644
--- a/src/xz/args.h
+++ b/src/xz/args.h
@@ -36,7 +36,9 @@ extern bool opt_force;
extern bool opt_keep_original;
// extern bool opt_recursive;
extern bool opt_robot;
+extern bool opt_ignore_check;
extern const char stdin_filename[];
extern void args_parse(args_info *args, int argc, char **argv);
+extern void args_free(void);
diff --git a/src/xz/coder.c b/src/xz/coder.c
index 017e04127e952..a94bdb83266fa 100644
--- a/src/xz/coder.c
+++ b/src/xz/coder.c
@@ -24,6 +24,9 @@ enum coder_init_ret {
enum operation_mode opt_mode = MODE_COMPRESS;
enum format_type opt_format = FORMAT_AUTO;
bool opt_auto_adjust = true;
+bool opt_single_stream = false;
+uint64_t opt_block_size = 0;
+uint64_t *opt_block_list = NULL;
/// Stream used to communicate with liblzma
@@ -48,6 +51,14 @@ static lzma_check check;
/// This becomes false if the --check=CHECK option is used.
static bool check_default = true;
+#ifdef MYTHREAD_ENABLED
+static lzma_mt mt_options = {
+ .flags = 0,
+ .timeout = 300,
+ .filters = filters,
+};
+#endif
+
extern void
coder_set_check(lzma_check new_check)
@@ -125,6 +136,15 @@ memlimit_too_small(uint64_t memory_usage)
extern void
coder_set_compression_settings(void)
{
+ // The default check type is CRC64, but fallback to CRC32
+ // if CRC64 isn't supported by the copy of liblzma we are
+ // using. CRC32 is always supported.
+ if (check_default) {
+ check = LZMA_CHECK_CRC64;
+ if (!lzma_check_is_supported(check))
+ check = LZMA_CHECK_CRC32;
+ }
+
// Options for LZMA1 or LZMA2 in case we are using a preset.
static lzma_options_lzma opt_lzma;
@@ -175,15 +195,53 @@ coder_set_compression_settings(void)
// Print the selected filter chain.
message_filters_show(V_DEBUG, filters);
- // If using --format=raw, we can be decoding. The memusage function
- // also validates the filter chain and the options used for the
- // filters.
+ // The --flush-timeout option requires LZMA_SYNC_FLUSH support
+ // from the filter chain. Currently threaded encoder doesn't support
+ // LZMA_SYNC_FLUSH so single-threaded mode must be used.
+ if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
+ for (size_t i = 0; i < filters_count; ++i) {
+ switch (filters[i].id) {
+ case LZMA_FILTER_LZMA2:
+ case LZMA_FILTER_DELTA:
+ break;
+
+ default:
+ message_fatal(_("The filter chain is "
+ "incompatible with --flush-timeout"));
+ }
+ }
+
+ if (hardware_threads_get() > 1) {
+ message(V_WARNING, _("Switching to single-threaded "
+ "mode due to --flush-timeout"));
+ hardware_threads_set(1);
+ }
+ }
+
+ // Get the memory usage. Note that if --format=raw was used,
+ // we can be decompressing.
const uint64_t memory_limit = hardware_memlimit_get(opt_mode);
uint64_t memory_usage;
- if (opt_mode == MODE_COMPRESS)
- memory_usage = lzma_raw_encoder_memusage(filters);
- else
+ if (opt_mode == MODE_COMPRESS) {
+#ifdef MYTHREAD_ENABLED
+ if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) {
+ mt_options.threads = hardware_threads_get();
+ mt_options.block_size = opt_block_size;
+ mt_options.check = check;
+ memory_usage = lzma_stream_encoder_mt_memusage(
+ &mt_options);
+ if (memory_usage != UINT64_MAX)
+ message(V_DEBUG, _("Using up to %" PRIu32
+ " threads."),
+ mt_options.threads);
+ } else
+#endif
+ {
+ memory_usage = lzma_raw_encoder_memusage(filters);
+ }
+ } else {
memory_usage = lzma_raw_decoder_memusage(filters);
+ }
if (memory_usage == UINT64_MAX)
message_fatal(_("Unsupported filter chain or filter options"));
@@ -199,90 +257,99 @@ coder_set_compression_settings(void)
round_up_to_mib(decmem), 0));
}
- if (memory_usage > memory_limit) {
- // If --no-adjust was used or we didn't find LZMA1 or
- // LZMA2 as the last filter, give an error immediately.
- // --format=raw implies --no-adjust.
- if (!opt_auto_adjust || opt_format == FORMAT_RAW)
- memlimit_too_small(memory_usage);
-
- assert(opt_mode == MODE_COMPRESS);
-
- // Look for the last filter if it is LZMA2 or LZMA1, so
- // we can make it use less RAM. With other filters we don't
- // know what to do.
- size_t i = 0;
- while (filters[i].id != LZMA_FILTER_LZMA2
- && filters[i].id != LZMA_FILTER_LZMA1) {
- if (filters[i].id == LZMA_VLI_UNKNOWN)
- memlimit_too_small(memory_usage);
-
- ++i;
- }
+ if (memory_usage <= memory_limit)
+ return;
- // Decrease the dictionary size until we meet the memory
- // usage limit. First round down to full mebibytes.
- lzma_options_lzma *opt = filters[i].options;
- const uint32_t orig_dict_size = opt->dict_size;
- opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
- while (true) {
- // If it is below 1 MiB, auto-adjusting failed. We
- // could be more sophisticated and scale it down even
- // more, but let's see if many complain about this
- // version.
- //
- // FIXME: Displays the scaled memory usage instead
- // of the original.
- if (opt->dict_size < (UINT32_C(1) << 20))
+ // If --no-adjust was used or we didn't find LZMA1 or
+ // LZMA2 as the last filter, give an error immediately.
+ // --format=raw implies --no-adjust.
+ if (!opt_auto_adjust || opt_format == FORMAT_RAW)
+ memlimit_too_small(memory_usage);
+
+ assert(opt_mode == MODE_COMPRESS);
+
+#ifdef MYTHREAD_ENABLED
+ if (opt_format == FORMAT_XZ && mt_options.threads > 1) {
+ // Try to reduce the number of threads before
+ // adjusting the compression settings down.
+ do {
+ // FIXME? The real single-threaded mode has
+ // lower memory usage, but it's not comparable
+ // because it doesn't write the size info
+ // into Block Headers.
+ if (--mt_options.threads == 0)
memlimit_too_small(memory_usage);
- memory_usage = lzma_raw_encoder_memusage(filters);
+ memory_usage = lzma_stream_encoder_mt_memusage(
+ &mt_options);
if (memory_usage == UINT64_MAX)
message_bug();
- // Accept it if it is low enough.
- if (memory_usage <= memory_limit)
- break;
+ } while (memory_usage > memory_limit);
- // Otherwise 1 MiB down and try again. I hope this
- // isn't too slow method for cases where the original
- // dict_size is very big.
- opt->dict_size -= UINT32_C(1) << 20;
- }
+ message(V_WARNING, _("Adjusted the number of threads "
+ "from %s to %s to not exceed "
+ "the memory usage limit of %s MiB"),
+ uint64_to_str(hardware_threads_get(), 0),
+ uint64_to_str(mt_options.threads, 1),
+ uint64_to_str(round_up_to_mib(
+ memory_limit), 2));
+ }
+#endif
+
+ if (memory_usage <= memory_limit)
+ return;
- // Tell the user that we decreased the dictionary size.
- message(V_WARNING, _("Adjusted LZMA%c dictionary size "
- "from %s MiB to %s MiB to not exceed "
- "the memory usage limit of %s MiB"),
- filters[i].id == LZMA_FILTER_LZMA2
- ? '2' : '1',
- uint64_to_str(orig_dict_size >> 20, 0),
- uint64_to_str(opt->dict_size >> 20, 1),
- uint64_to_str(round_up_to_mib(
- memory_limit), 2));
+ // Look for the last filter if it is LZMA2 or LZMA1, so we can make
+ // it use less RAM. With other filters we don't know what to do.
+ size_t i = 0;
+ while (filters[i].id != LZMA_FILTER_LZMA2
+ && filters[i].id != LZMA_FILTER_LZMA1) {
+ if (filters[i].id == LZMA_VLI_UNKNOWN)
+ memlimit_too_small(memory_usage);
+
+ ++i;
}
-/*
- // Limit the number of worker threads so that memory usage
- // limit isn't exceeded.
- assert(memory_usage > 0);
- size_t thread_limit = memory_limit / memory_usage;
- if (thread_limit == 0)
- thread_limit = 1;
+ // Decrease the dictionary size until we meet the memory
+ // usage limit. First round down to full mebibytes.
+ lzma_options_lzma *opt = filters[i].options;
+ const uint32_t orig_dict_size = opt->dict_size;
+ opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
+ while (true) {
+ // If it is below 1 MiB, auto-adjusting failed. We could be
+ // more sophisticated and scale it down even more, but let's
+ // see if many complain about this version.
+ //
+ // FIXME: Displays the scaled memory usage instead
+ // of the original.
+ if (opt->dict_size < (UINT32_C(1) << 20))
+ memlimit_too_small(memory_usage);
- if (opt_threads > thread_limit)
- opt_threads = thread_limit;
-*/
+ memory_usage = lzma_raw_encoder_memusage(filters);
+ if (memory_usage == UINT64_MAX)
+ message_bug();
- if (check_default) {
- // The default check type is CRC64, but fallback to CRC32
- // if CRC64 isn't supported by the copy of liblzma we are
- // using. CRC32 is always supported.
- check = LZMA_CHECK_CRC64;
- if (!lzma_check_is_supported(check))
- check = LZMA_CHECK_CRC32;
+ // Accept it if it is low enough.
+ if (memory_usage <= memory_limit)
+ break;
+
+ // Otherwise 1 MiB down and try again. I hope this
+ // isn't too slow method for cases where the original
+ // dict_size is very big.
+ opt->dict_size -= UINT32_C(1) << 20;
}
+ // Tell the user that we decreased the dictionary size.
+ message(V_WARNING, _("Adjusted LZMA%c dictionary size "
+ "from %s MiB to %s MiB to not exceed "
+ "the memory usage limit of %s MiB"),
+ filters[i].id == LZMA_FILTER_LZMA2
+ ? '2' : '1',
+ uint64_to_str(orig_dict_size >> 20, 0),
+ uint64_to_str(opt->dict_size >> 20, 1),
+ uint64_to_str(round_up_to_mib(memory_limit), 2));
+
return;
}
@@ -364,7 +431,14 @@ coder_init(file_pair *pair)
break;
case FORMAT_XZ:
- ret = lzma_stream_encoder(&strm, filters, check);
+#ifdef MYTHREAD_ENABLED
+ if (hardware_threads_get() > 1)
+ ret = lzma_stream_encoder_mt(
+ &strm, &mt_options);
+ else
+#endif
+ ret = lzma_stream_encoder(
+ &strm, filters, check);
break;
case FORMAT_LZMA:
@@ -376,8 +450,17 @@ coder_init(file_pair *pair)
break;
}
} else {
- const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK
- | LZMA_CONCATENATED;
+ uint32_t flags = 0;
+
+ // It seems silly to warn about unsupported check if the
+ // check won't be verified anyway due to --ignore-check.
+ if (opt_ignore_check)
+ flags |= LZMA_IGNORE_CHECK;
+ else
+ flags |= LZMA_TELL_UNSUPPORTED_CHECK;
+
+ if (!opt_single_stream)
+ flags |= LZMA_CONCATENATED;
// We abuse FORMAT_AUTO to indicate unknown file format,
// for which we may consider passthru mode.
@@ -408,7 +491,7 @@ coder_init(file_pair *pair)
switch (init_format) {
case FORMAT_AUTO:
- // Uknown file format. If --decompress --stdout
+ // Unknown file format. If --decompress --stdout
// --force have been given, then we copy the input
// as is to stdout. Checking for MODE_DECOMPRESS
// is needed, because we don't want to do use
@@ -462,6 +545,56 @@ coder_init(file_pair *pair)
}
+/// Resolve conflicts between opt_block_size and opt_block_list in single
+/// threaded mode. We want to default to opt_block_list, except when it is
+/// larger than opt_block_size. If this is the case for the current Block
+/// at *list_pos, then we break into smaller Blocks. Otherwise advance
+/// to the next Block in opt_block_list, and break apart if needed.
+static void
+split_block(uint64_t *block_remaining,
+ uint64_t *next_block_remaining,
+ size_t *list_pos)
+{
+ if (*next_block_remaining > 0) {
+ // The Block at *list_pos has previously been split up.
+ assert(hardware_threads_get() == 1);
+ assert(opt_block_size > 0);
+ assert(opt_block_list != NULL);
+
+ if (*next_block_remaining > opt_block_size) {
+ // We have to split the current Block at *list_pos
+ // into another opt_block_size length Block.
+ *block_remaining = opt_block_size;
+ } else {
+ // This is the last remaining split Block for the
+ // Block at *list_pos.
+ *block_remaining = *next_block_remaining;
+ }
+
+ *next_block_remaining -= *block_remaining;
+
+ } else {
+ // The Block at *list_pos has been finished. Go to the next
+ // entry in the list. If the end of the list has been reached,
+ // reuse the size of the last Block.
+ if (opt_block_list[*list_pos + 1] != 0)
+ ++*list_pos;
+
+ *block_remaining = opt_block_list[*list_pos];
+
+ // If in single-threaded mode, split up the Block if needed.
+ // This is not needed in multi-threaded mode because liblzma
+ // will do this due to how threaded encoding works.
+ if (hardware_threads_get() == 1 && opt_block_size > 0
+ && *block_remaining > opt_block_size) {
+ *next_block_remaining
+ = *block_remaining - opt_block_size;
+ *block_remaining = opt_block_size;
+ }
+ }
+}
+
+
/// Compress or decompress using liblzma.
static bool
coder_normal(file_pair *pair)
@@ -469,8 +602,8 @@ coder_normal(file_pair *pair)
// Encoder needs to know when we have given all the input to it.
// The decoders need to know it too when we are using
// LZMA_CONCATENATED. We need to check for src_eof here, because
- // the first input chunk has been already read, and that may
- // have been the only chunk we will read.
+ // the first input chunk has been already read if decompressing,
+ // and that may have been the only chunk we will read.
lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN;
lzma_ret ret;
@@ -478,22 +611,77 @@ coder_normal(file_pair *pair)
// Assume that something goes wrong.
bool success = false;
+ // block_remaining indicates how many input bytes to encode before
+ // finishing the current .xz Block. The Block size is set with
+ // --block-size=SIZE and --block-list. They have an effect only when
+ // compressing to the .xz format. If block_remaining == UINT64_MAX,
+ // only a single block is created.
+ uint64_t block_remaining = UINT64_MAX;
+
+ // next_block_remining for when we are in single-threaded mode and
+ // the Block in --block-list is larger than the --block-size=SIZE.
+ uint64_t next_block_remaining = 0;
+
+ // Position in opt_block_list. Unused if --block-list wasn't used.
+ size_t list_pos = 0;
+
+ // Handle --block-size for single-threaded mode and the first step
+ // of --block-list.
+ if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
+ // --block-size doesn't do anything here in threaded mode,
+ // because the threaded encoder will take care of splitting
+ // to fixed-sized Blocks.
+ if (hardware_threads_get() == 1 && opt_block_size > 0)
+ block_remaining = opt_block_size;
+
+ // If --block-list was used, start with the first size.
+ //
+ // For threaded case, --block-size specifies how big Blocks
+ // the encoder needs to be prepared to create at maximum
+ // and --block-list will simultaneously cause new Blocks
+ // to be started at specified intervals. To keep things
+ // logical, the same is done in single-threaded mode. The
+ // output is still not identical because in single-threaded
+ // mode the size info isn't written into Block Headers.
+ if (opt_block_list != NULL) {
+ if (block_remaining < opt_block_list[list_pos]) {
+ assert(hardware_threads_get() == 1);
+ next_block_remaining = opt_block_list[list_pos]
+ - block_remaining;
+ } else {
+ block_remaining = opt_block_list[list_pos];
+ }
+ }
+ }
+
strm.next_out = out_buf.u8;
strm.avail_out = IO_BUFFER_SIZE;
while (!user_abort) {
- // Fill the input buffer if it is empty and we haven't reached
- // end of file yet.
- if (strm.avail_in == 0 && !pair->src_eof) {
+ // Fill the input buffer if it is empty and we aren't
+ // flushing or finishing.
+ if (strm.avail_in == 0 && action == LZMA_RUN) {
strm.next_in = in_buf.u8;
- strm.avail_in = io_read(
- pair, &in_buf, IO_BUFFER_SIZE);
+ strm.avail_in = io_read(pair, &in_buf,
+ my_min(block_remaining,
+ IO_BUFFER_SIZE));
if (strm.avail_in == SIZE_MAX)
break;
- if (pair->src_eof)
+ if (pair->src_eof) {
action = LZMA_FINISH;
+
+ } else if (block_remaining != UINT64_MAX) {
+ // Start a new Block after every
+ // opt_block_size bytes of input.
+ block_remaining -= strm.avail_in;
+ if (block_remaining == 0)
+ action = LZMA_FULL_BARRIER;
+ }
+
+ if (action == LZMA_RUN && flush_needed)
+ action = LZMA_SYNC_FLUSH;
}
// Let liblzma do the actual work.
@@ -509,7 +697,39 @@ coder_normal(file_pair *pair)
strm.avail_out = IO_BUFFER_SIZE;
}
- if (ret != LZMA_OK) {
+ if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
+ || action == LZMA_FULL_BARRIER)) {
+ if (action == LZMA_SYNC_FLUSH) {
+ // Flushing completed. Write the pending data
+ // out immediatelly so that the reading side
+ // can decompress everything compressed so far.
+ if (io_write(pair, &out_buf, IO_BUFFER_SIZE
+ - strm.avail_out))
+ break;
+
+ strm.next_out = out_buf.u8;
+ strm.avail_out = IO_BUFFER_SIZE;
+
+ // Set the time of the most recent flushing.
+ mytime_set_flush_time();
+ } else {
+ // Start a new Block after LZMA_FULL_BARRIER.
+ if (opt_block_list == NULL) {
+ assert(hardware_threads_get() == 1);
+ assert(opt_block_size > 0);
+ block_remaining = opt_block_size;
+ } else {
+ split_block(&block_remaining,
+ &next_block_remaining,
+ &list_pos);
+ }
+ }
+
+ // Start a new Block after LZMA_FULL_FLUSH or continue
+ // the same block after LZMA_SYNC_FLUSH.
+ action = LZMA_RUN;
+
+ } else if (ret != LZMA_OK) {
// Determine if the return value indicates that we
// won't continue coding.
const bool stop = ret != LZMA_NO_CHECK
@@ -528,6 +748,12 @@ coder_normal(file_pair *pair)
}
if (ret == LZMA_STREAM_END) {
+ if (opt_single_stream) {
+ io_fix_src_pos(pair, strm.avail_in);
+ success = true;
+ break;
+ }
+
// Check that there is no trailing garbage.
// This is needed for LZMA_Alone and raw
// streams.
@@ -630,10 +856,15 @@ coder_run(const char *filename)
// Assume that something goes wrong.
bool success = false;
- // Read the first chunk of input data. This is needed to detect
- // the input file type (for now, only for decompression).
- strm.next_in = in_buf.u8;
- strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
+ if (opt_mode == MODE_COMPRESS) {
+ strm.next_in = NULL;
+ strm.avail_in = 0;
+ } else {
+ // Read the first chunk of input data. This is needed
+ // to detect the input file type.
+ strm.next_in = in_buf.u8;
+ strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
+ }
if (strm.avail_in != SIZE_MAX) {
// Initialize the coder. This will detect the file format
@@ -648,6 +879,11 @@ coder_run(const char *filename)
// Don't open the destination file when --test
// is used.
if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
+ // Remember the current time. It is needed
+ // for progress indicator and for timed
+ // flushing.
+ mytime_set_start_time();
+
// Initialize the progress indicator.
const uint64_t in_size
= pair->src_st.st_size <= 0
@@ -671,3 +907,13 @@ coder_run(const char *filename)
return;
}
+
+
+#ifndef NDEBUG
+extern void
+coder_free(void)
+{
+ lzma_end(&strm);
+ return;
+}
+#endif
diff --git a/src/xz/coder.h b/src/xz/coder.h
index 2d3add9727451..583da8f68d50a 100644
--- a/src/xz/coder.h
+++ b/src/xz/coder.h
@@ -41,6 +41,16 @@ extern enum format_type opt_format;
/// they exceed the memory usage limit.
extern bool opt_auto_adjust;
+/// If true, stop after decoding the first stream.
+extern bool opt_single_stream;
+
+/// If non-zero, start a new .xz Block after every opt_block_size bytes
+/// of input. This has an effect only when compressing to the .xz format.
+extern uint64_t opt_block_size;
+
+/// This is non-NULL if --block-list was used. This contains the Block sizes
+/// as an array that is terminated with 0.
+extern uint64_t *opt_block_list;
/// Set the integrity check type used when compressing
extern void coder_set_check(lzma_check check);
@@ -59,3 +69,8 @@ extern void coder_set_compression_settings(void);
/// Compress or decompress the given file
extern void coder_run(const char *filename);
+
+#ifndef NDEBUG
+/// Free the memory allocated for the coder and kill the worker threads.
+extern void coder_free(void);
+#endif
diff --git a/src/xz/file_io.c b/src/xz/file_io.c
index 871a099b61939..f135cf7cb6bd8 100644
--- a/src/xz/file_io.c
+++ b/src/xz/file_io.c
@@ -17,6 +17,7 @@
#ifdef TUKLIB_DOSLIKE
# include <io.h>
#else
+# include <poll.h>
static bool warn_fchown;
#endif
@@ -37,14 +38,30 @@ static bool warn_fchown;
#endif
+typedef enum {
+ IO_WAIT_MORE, // Reading or writing is possible.
+ IO_WAIT_ERROR, // Error or user_abort
+ IO_WAIT_TIMEOUT, // poll() timed out
+} io_wait_ret;
+
+
/// If true, try to create sparse files when decompressing.
static bool try_sparse = true;
#ifndef TUKLIB_DOSLIKE
+/// File status flags of standard input. This is used by io_open_src()
+/// and io_close_src().
+static int stdin_flags;
+static bool restore_stdin_flags = false;
+
/// Original file status flags of standard output. This is used by
/// io_open_dest() and io_close_dest() to save and restore the flags.
static int stdout_flags;
static bool restore_stdout_flags = false;
+
+/// Self-pipe used together with the user_abort variable to avoid
+/// race conditions with signal handling.
+static int user_abort_pipe[2];
#endif
@@ -64,19 +81,43 @@ io_init(void)
// If fchown() fails setting the owner, we warn about it only if
// we are root.
warn_fchown = geteuid() == 0;
+
+ if (pipe(user_abort_pipe)
+ || fcntl(user_abort_pipe[0], F_SETFL, O_NONBLOCK)
+ == -1
+ || fcntl(user_abort_pipe[1], F_SETFL, O_NONBLOCK)
+ == -1)
+ message_fatal(_("Error creating a pipe: %s"),
+ strerror(errno));
#endif
#ifdef __DJGPP__
// Avoid doing useless things when statting files.
// This isn't important but doesn't hurt.
- _djstat_flags = _STAT_INODE | _STAT_EXEC_EXT
- | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
+ _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
#endif
return;
}
+#ifndef TUKLIB_DOSLIKE
+extern void
+io_write_to_user_abort_pipe(void)
+{
+ // If the write() fails, it's probably due to the pipe being full.
+ // Failing in that case is fine. If the reason is something else,
+ // there's not much we can do since this is called in a signal
+ // handler. So ignore the errors and try to avoid warnings with
+ // GCC and glibc when _FORTIFY_SOURCE=2 is used.
+ uint8_t b = '\0';
+ const int ret = write(user_abort_pipe[1], &b, 1);
+ (void)ret;
+ return;
+}
+#endif
+
+
extern void
io_no_sparse(void)
{
@@ -85,6 +126,63 @@ io_no_sparse(void)
}
+#ifndef TUKLIB_DOSLIKE
+/// \brief Waits for input or output to become available or for a signal
+///
+/// This uses the self-pipe trick to avoid a race condition that can occur
+/// if a signal is caught after user_abort has been checked but before e.g.
+/// read() has been called. In that situation read() could block unless
+/// non-blocking I/O is used. With non-blocking I/O something like select()
+/// or poll() is needed to avoid a busy-wait loop, and the same race condition
+/// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
+/// POSIX) but neither is portable enough in 2013. The self-pipe trick is
+/// old and very portable.
+static io_wait_ret
+io_wait(file_pair *pair, int timeout, bool is_reading)
+{
+ struct pollfd pfd[2];
+
+ if (is_reading) {
+ pfd[0].fd = pair->src_fd;
+ pfd[0].events = POLLIN;
+ } else {
+ pfd[0].fd = pair->dest_fd;
+ pfd[0].events = POLLOUT;
+ }
+
+ pfd[1].fd = user_abort_pipe[0];
+ pfd[1].events = POLLIN;
+
+ while (true) {
+ const int ret = poll(pfd, 2, timeout);
+
+ if (user_abort)
+ return IO_WAIT_ERROR;
+
+ if (ret == -1) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+
+ message_error(_("%s: poll() failed: %s"),
+ is_reading ? pair->src_name
+ : pair->dest_name,
+ strerror(errno));
+ return IO_WAIT_ERROR;
+ }
+
+ if (ret == 0) {
+ assert(opt_flush_timeout != 0);
+ flush_needed = true;
+ return IO_WAIT_TIMEOUT;
+ }
+
+ if (pfd[0].revents != 0)
+ return IO_WAIT_MORE;
+ }
+}
+#endif
+
+
/// \brief Unlink a file
///
/// This tries to verify that the file being unlinked really is the file that
@@ -294,6 +392,31 @@ io_open_src_real(file_pair *pair)
pair->src_fd = STDIN_FILENO;
#ifdef TUKLIB_DOSLIKE
setmode(STDIN_FILENO, O_BINARY);
+#else
+ // Enable O_NONBLOCK for stdin.
+ stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
+ if (stdin_flags == -1) {
+ message_error(_("Error getting the file status flags "
+ "from standard input: %s"),
+ strerror(errno));
+ return true;
+ }
+
+ if ((stdin_flags & O_NONBLOCK) == 0) {
+ if (fcntl(STDIN_FILENO, F_SETFL,
+ stdin_flags | O_NONBLOCK) == -1) {
+ message_error(_("Error setting O_NONBLOCK "
+ "on standard input: %s"),
+ strerror(errno));
+ return true;
+ }
+
+ restore_stdin_flags = true;
+ }
+#endif
+#ifdef HAVE_POSIX_FADVISE
+ // It will fail if stdin is a pipe and that's fine.
+ (void)posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL);
#endif
return false;
}
@@ -311,13 +434,12 @@ io_open_src_real(file_pair *pair)
int flags = O_RDONLY | O_BINARY | O_NOCTTY;
#ifndef TUKLIB_DOSLIKE
- // If we accept only regular files, we need to be careful to avoid
- // problems with special files like devices and FIFOs. O_NONBLOCK
- // prevents blocking when opening such files. When we want to accept
- // special files, we must not use O_NONBLOCK, or otherwise we won't
- // block waiting e.g. FIFOs to become readable.
- if (reg_files_only)
- flags |= O_NONBLOCK;
+ // Use non-blocking I/O:
+ // - It prevents blocking when opening FIFOs and some other
+ // special files, which is good if we want to accept only
+ // regular files.
+ // - It can help avoiding some race conditions with signal handling.
+ flags |= O_NONBLOCK;
#endif
#if defined(O_NOFOLLOW)
@@ -345,30 +467,13 @@ io_open_src_real(file_pair *pair)
(void)follow_symlinks;
#endif
- // Try to open the file. If we are accepting non-regular files,
- // unblock the caught signals so that open() can be interrupted
- // if it blocks e.g. due to a FIFO file.
- if (!reg_files_only)
- signals_unblock();
-
- // Maybe this wouldn't need a loop, since all the signal handlers for
- // which we don't use SA_RESTART set user_abort to true. But it
- // doesn't hurt to have it just in case.
- do {
- pair->src_fd = open(pair->src_name, flags);
- } while (pair->src_fd == -1 && errno == EINTR && !user_abort);
-
- if (!reg_files_only)
- signals_block();
+ // Try to open the file. Signals have been blocked so EINTR shouldn't
+ // be possible.
+ pair->src_fd = open(pair->src_name, flags);
if (pair->src_fd == -1) {
- // If we were interrupted, don't display any error message.
- if (errno == EINTR) {
- // All the signals that don't have SA_RESTART
- // set user_abort.
- assert(user_abort);
- return true;
- }
+ // Signals (that have a signal handler) have been blocked.
+ assert(errno != EINTR);
#ifdef O_NOFOLLOW
// Give an understandable error message if the reason
@@ -427,26 +532,20 @@ io_open_src_real(file_pair *pair)
return true;
}
-#ifndef TUKLIB_DOSLIKE
- // Drop O_NONBLOCK, which is used only when we are accepting only
- // regular files. After the open() call, we want things to block
- // instead of giving EAGAIN.
- if (reg_files_only) {
- flags = fcntl(pair->src_fd, F_GETFL);
- if (flags == -1)
- goto error_msg;
-
- flags &= ~O_NONBLOCK;
-
- if (fcntl(pair->src_fd, F_SETFL, flags) == -1)
- goto error_msg;
- }
-#endif
-
// Stat the source file. We need the result also when we copy
// the permissions, and when unlinking.
+ //
+ // NOTE: Use stat() instead of fstat() with DJGPP, because
+ // then we have a better chance to get st_ino value that can
+ // be used in io_open_dest_real() to prevent overwriting the
+ // source file.
+#ifdef __DJGPP__
+ if (stat(pair->src_name, &pair->src_st))
+ goto error_msg;
+#else
if (fstat(pair->src_fd, &pair->src_st))
goto error_msg;
+#endif
if (S_ISDIR(pair->src_st.st_mode)) {
message_warning(_("%s: Is a directory, skipping"),
@@ -492,6 +591,23 @@ io_open_src_real(file_pair *pair)
goto error;
}
}
+
+ // If it is something else than a regular file, wait until
+ // there is input available. This way reading from FIFOs
+ // will work when open() is used with O_NONBLOCK.
+ if (!S_ISREG(pair->src_st.st_mode)) {
+ signals_unblock();
+ const io_wait_ret ret = io_wait(pair, -1, true);
+ signals_block();
+
+ if (ret != IO_WAIT_MORE)
+ goto error;
+ }
+#endif
+
+#ifdef HAVE_POSIX_FADVISE
+ // It will fail with some special files like FIFOs but that is fine.
+ (void)posix_fadvise(pair->src_fd, 0, 0, POSIX_FADV_SEQUENTIAL);
#endif
return false;
@@ -542,6 +658,19 @@ io_open_src(const char *src_name)
static void
io_close_src(file_pair *pair, bool success)
{
+#ifndef TUKLIB_DOSLIKE
+ if (restore_stdin_flags) {
+ assert(pair->src_fd == STDIN_FILENO);
+
+ restore_stdin_flags = false;
+
+ if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
+ message_error(_("Error restoring the status flags "
+ "to standard input: %s"),
+ strerror(errno));
+ }
+#endif
+
if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
#ifdef TUKLIB_DOSLIKE
(void)close(pair->src_fd);
@@ -575,12 +704,58 @@ io_open_dest_real(file_pair *pair)
pair->dest_fd = STDOUT_FILENO;
#ifdef TUKLIB_DOSLIKE
setmode(STDOUT_FILENO, O_BINARY);
+#else
+ // Set O_NONBLOCK if it isn't already set.
+ //
+ // NOTE: O_APPEND may be unset later in this function
+ // and it relies on stdout_flags being set here.
+ stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
+ if (stdout_flags == -1) {
+ message_error(_("Error getting the file status flags "
+ "from standard output: %s"),
+ strerror(errno));
+ return true;
+ }
+
+ if ((stdout_flags & O_NONBLOCK) == 0) {
+ if (fcntl(STDOUT_FILENO, F_SETFL,
+ stdout_flags | O_NONBLOCK) == -1) {
+ message_error(_("Error setting O_NONBLOCK "
+ "on standard output: %s"),
+ strerror(errno));
+ return true;
+ }
+
+ restore_stdout_flags = true;
+ }
#endif
} else {
pair->dest_name = suffix_get_dest_name(pair->src_name);
if (pair->dest_name == NULL)
return true;
+#ifdef __DJGPP__
+ struct stat st;
+ if (stat(pair->dest_name, &st) == 0) {
+ // Check that it isn't a special file like "prn".
+ if (st.st_dev == -1) {
+ message_error("%s: Refusing to write to "
+ "a DOS special file",
+ pair->dest_name);
+ return true;
+ }
+
+ // Check that we aren't overwriting the source file.
+ if (st.st_dev == pair->src_st.st_dev
+ && st.st_ino == pair->src_st.st_ino) {
+ message_error("%s: Output file is the same "
+ "as the input file",
+ pair->dest_name);
+ return true;
+ }
+ }
+#endif
+
// If --force was used, unlink the target file first.
if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
message_error(_("%s: Cannot remove: %s"),
@@ -590,8 +765,11 @@ io_open_dest_real(file_pair *pair)
}
// Open the file.
- const int flags = O_WRONLY | O_BINARY | O_NOCTTY
+ int flags = O_WRONLY | O_BINARY | O_NOCTTY
| O_CREAT | O_EXCL;
+#ifndef TUKLIB_DOSLIKE
+ flags |= O_NONBLOCK;
+#endif
const mode_t mode = S_IRUSR | S_IWUSR;
pair->dest_fd = open(pair->dest_name, flags, mode);
@@ -603,17 +781,19 @@ io_open_dest_real(file_pair *pair)
}
}
- // If this really fails... well, we have a safe fallback.
+#ifndef TUKLIB_DOSLIKE
+ // dest_st isn't used on DOS-like systems except as a dummy
+ // argument to io_unlink(), so don't fstat() on such systems.
if (fstat(pair->dest_fd, &pair->dest_st)) {
-#if defined(__VMS)
+ // If fstat() really fails, we have a safe fallback here.
+# if defined(__VMS)
pair->dest_st.st_ino[0] = 0;
pair->dest_st.st_ino[1] = 0;
pair->dest_st.st_ino[2] = 0;
-#elif !defined(TUKLIB_DOSLIKE)
+# else
pair->dest_st.st_dev = 0;
pair->dest_st.st_ino = 0;
-#endif
-#ifndef TUKLIB_DOSLIKE
+# endif
} else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
// When writing to standard output, we need to be extra
// careful:
@@ -631,10 +811,6 @@ io_open_dest_real(file_pair *pair)
if (!S_ISREG(pair->dest_st.st_mode))
return false;
- stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
- if (stdout_flags == -1)
- return false;
-
if (stdout_flags & O_APPEND) {
// Creating a sparse file is not possible
// when O_APPEND is active (it's used by
@@ -653,14 +829,23 @@ io_open_dest_real(file_pair *pair)
if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
return false;
+ // O_NONBLOCK was set earlier in this function
+ // so it must be kept here too. If this
+ // fcntl() call fails, we continue but won't
+ // try to create sparse output. The original
+ // flags will still be restored if needed (to
+ // unset O_NONBLOCK) when the file is finished.
if (fcntl(STDOUT_FILENO, F_SETFL,
- stdout_flags & ~O_APPEND)
- == -1)
+ (stdout_flags | O_NONBLOCK)
+ & ~O_APPEND) == -1)
return false;
// Disabling O_APPEND succeeded. Mark
// that the flags should be restored
- // in io_close_dest().
+ // in io_close_dest(). This quite likely was
+ // already set when enabling O_NONBLOCK but
+ // just in case O_NONBLOCK was already set,
+ // set this again here.
restore_stdout_flags = true;
} else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
@@ -673,8 +858,8 @@ io_open_dest_real(file_pair *pair)
}
pair->dest_try_sparse = true;
-#endif
}
+#endif
return false;
}
@@ -790,6 +975,21 @@ io_close(file_pair *pair, bool success)
}
+extern void
+io_fix_src_pos(file_pair *pair, size_t rewind_size)
+{
+ assert(rewind_size <= IO_BUFFER_SIZE);
+
+ if (rewind_size > 0) {
+ // This doesn't need to work on unseekable file descriptors,
+ // so just ignore possible errors.
+ (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
+ }
+
+ return;
+}
+
+
extern size_t
io_read(file_pair *pair, io_buf *buf_union, size_t size)
{
@@ -815,12 +1015,30 @@ io_read(file_pair *pair, io_buf *buf_union, size_t size)
continue;
}
+#ifndef TUKLIB_DOSLIKE
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ const io_wait_ret ret = io_wait(pair,
+ mytime_get_flush_timeout(),
+ true);
+ switch (ret) {
+ case IO_WAIT_MORE:
+ continue;
+
+ case IO_WAIT_ERROR:
+ return SIZE_MAX;
+
+ case IO_WAIT_TIMEOUT:
+ return size - left;
+
+ default:
+ message_bug();
+ }
+ }
+#endif
+
message_error(_("%s: Read error: %s"),
pair->src_name, strerror(errno));
- // FIXME Is this needed?
- pair->src_eof = true;
-
return SIZE_MAX;
}
@@ -885,6 +1103,15 @@ io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
continue;
}
+#ifndef TUKLIB_DOSLIKE
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ if (io_wait(pair, -1, false) == IO_WAIT_MORE)
+ continue;
+
+ return true;
+ }
+#endif
+
// Handle broken pipe specially. gzip and bzip2
// don't print anything on SIGPIPE. In addition,
// gzip --quiet uses exit status 2 (warning) on
diff --git a/src/xz/file_io.h b/src/xz/file_io.h
index 967da868b0797..2de3379238d65 100644
--- a/src/xz/file_io.h
+++ b/src/xz/file_io.h
@@ -68,6 +68,14 @@ typedef struct {
extern void io_init(void);
+#ifndef TUKLIB_DOSLIKE
+/// \brief Write a byte to user_abort_pipe[1]
+///
+/// This is called from a signal handler.
+extern void io_write_to_user_abort_pipe(void);
+#endif
+
+
/// \brief Disable creation of sparse files when decompressing
extern void io_no_sparse(void);
@@ -102,6 +110,19 @@ extern void io_close(file_pair *pair, bool success);
extern size_t io_read(file_pair *pair, io_buf *buf, size_t size);
+/// \brief Fix the position in src_fd
+///
+/// This is used when --single-thream has been specified and decompression
+/// is successful. If the input file descriptor supports seeking, this
+/// function fixes the input position to point to the next byte after the
+/// decompressed stream.
+///
+/// \param pair File pair having the source file open for reading
+/// \param rewind_size How many bytes of extra have been read i.e.
+/// how much to seek backwards.
+extern void io_fix_src_pos(file_pair *pair, size_t rewind_size);
+
+
/// \brief Read from source file from given offset to a buffer
///
/// This is remotely similar to standard pread(). This uses lseek() though,
diff --git a/src/xz/hardware.c b/src/xz/hardware.c
index a4733c27e1180..ff32f6d301484 100644
--- a/src/xz/hardware.c
+++ b/src/xz/hardware.c
@@ -11,12 +11,11 @@
///////////////////////////////////////////////////////////////////////////////
#include "private.h"
-#include "tuklib_cpucores.h"
-/// Maximum number of free *coder* threads. This can be set with
+/// Maximum number of worker threads. This can be set with
/// the --threads=NUM command line option.
-static uint32_t threadlimit;
+static uint32_t threads_max = 1;
/// Memory usage limit for compression
static uint64_t memlimit_compress;
@@ -29,15 +28,23 @@ static uint64_t total_ram;
extern void
-hardware_threadlimit_set(uint32_t new_threadlimit)
+hardware_threads_set(uint32_t n)
{
- if (new_threadlimit == 0) {
- // The default is the number of available CPU cores.
- threadlimit = tuklib_cpucores();
- if (threadlimit == 0)
- threadlimit = 1;
+ if (n == 0) {
+ // Automatic number of threads was requested.
+ // If threading support was enabled at build time,
+ // use the number of available CPU cores. Otherwise
+ // use one thread since disabling threading support
+ // omits lzma_cputhreads() from liblzma.
+#ifdef MYTHREAD_ENABLED
+ threads_max = lzma_cputhreads();
+ if (threads_max == 0)
+ threads_max = 1;
+#else
+ threads_max = 1;
+#endif
} else {
- threadlimit = new_threadlimit;
+ threads_max = n;
}
return;
@@ -45,9 +52,9 @@ hardware_threadlimit_set(uint32_t new_threadlimit)
extern uint32_t
-hardware_threadlimit_get(void)
+hardware_threads_get(void)
{
- return threadlimit;
+ return threads_max;
}
@@ -139,6 +146,5 @@ hardware_init(void)
// Set the defaults.
hardware_memlimit_set(0, true, true, false);
- hardware_threadlimit_set(0);
return;
}
diff --git a/src/xz/hardware.h b/src/xz/hardware.h
index ad526f260bc17..4fae61815656f 100644
--- a/src/xz/hardware.h
+++ b/src/xz/hardware.h
@@ -15,12 +15,11 @@
extern void hardware_init(void);
-/// Set custom value for maximum number of coder threads.
-extern void hardware_threadlimit_set(uint32_t threadlimit);
+/// Set the maximum number of worker threads.
+extern void hardware_threads_set(uint32_t threadlimit);
-/// Get the maximum number of coder threads. Some additional helper threads
-/// are allowed on top of this).
-extern uint32_t hardware_threadlimit_get(void);
+/// Get the maximum number of worker threads.
+extern uint32_t hardware_threads_get(void);
/// Set the memory usage limit. There are separate limits for compression
diff --git a/src/xz/list.c b/src/xz/list.c
index 0e73d519ea4a1..449c2bc4e02fd 100644
--- a/src/xz/list.c
+++ b/src/xz/list.c
@@ -29,9 +29,12 @@ typedef struct {
/// Uncompressed Size fields
bool all_have_sizes;
+ /// Oldest XZ Utils version that will decompress the file
+ uint32_t min_version;
+
} xz_file_info;
-#define XZ_FILE_INFO_INIT { NULL, 0, 0, true }
+#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
/// Information about a .xz Block
@@ -104,8 +107,32 @@ static struct {
uint64_t stream_padding;
uint64_t memusage_max;
uint32_t checks;
+ uint32_t min_version;
bool all_have_sizes;
-} totals = { 0, 0, 0, 0, 0, 0, 0, 0, true };
+} totals = { 0, 0, 0, 0, 0, 0, 0, 0, 0, true };
+
+
+/// Convert XZ Utils version number to a string.
+static const char *
+xz_ver_to_str(uint32_t ver)
+{
+ static char buf[32];
+
+ unsigned int major = ver / 10000000U;
+ ver -= major * 10000000U;
+
+ unsigned int minor = ver / 10000U;
+ ver -= minor * 10000U;
+
+ unsigned int patch = ver / 10U;
+ ver -= patch * 10U;
+
+ const char *stability = ver == 0 ? "alpha" : ver == 1 ? "beta" : "";
+
+ snprintf(buf, sizeof(buf), "%u.%u.%u%s",
+ major, minor, patch, stability);
+ return buf;
+}
/// \brief Parse the Index(es) from the given .xz file
@@ -478,6 +505,21 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter,
if (xfi->memusage_max < bhi->memusage)
xfi->memusage_max = bhi->memusage;
+ // Determine the minimum XZ Utils version that supports this Block.
+ //
+ // Currently the only thing that 5.0.0 doesn't support is empty
+ // LZMA2 Block. This decoder bug was fixed in 5.0.2.
+ {
+ size_t i = 0;
+ while (filters[i + 1].id != LZMA_VLI_UNKNOWN)
+ ++i;
+
+ if (filters[i].id == LZMA_FILTER_LZMA2
+ && iter->block.uncompressed_size == 0
+ && xfi->min_version < 50000022U)
+ xfi->min_version = 50000022U;
+ }
+
// Convert the filter chain to human readable form.
message_filters_to_str(bhi->filter_chain, filters, false);
@@ -856,6 +898,8 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
round_up_to_mib(xfi->memusage_max), 0));
printf(_(" Sizes in headers: %s\n"),
xfi->all_have_sizes ? _("Yes") : _("No"));
+ printf(_(" Minimum XZ Utils version: %s\n"),
+ xz_ver_to_str(xfi->min_version));
}
return false;
@@ -938,9 +982,10 @@ print_info_robot(xz_file_info *xfi, file_pair *pair)
}
if (message_verbosity_get() >= V_DEBUG)
- printf("summary\t%" PRIu64 "\t%s\n",
+ printf("summary\t%" PRIu64 "\t%s\t%" PRIu32 "\n",
xfi->memusage_max,
- xfi->all_have_sizes ? "yes" : "no");
+ xfi->all_have_sizes ? "yes" : "no",
+ xfi->min_version);
return false;
}
@@ -961,6 +1006,9 @@ update_totals(const xz_file_info *xfi)
if (totals.memusage_max < xfi->memusage_max)
totals.memusage_max = xfi->memusage_max;
+ if (totals.min_version < xfi->min_version)
+ totals.min_version = xfi->min_version;
+
totals.all_have_sizes &= xfi->all_have_sizes;
return;
@@ -1025,6 +1073,8 @@ print_totals_adv(void)
round_up_to_mib(totals.memusage_max), 0));
printf(_(" Sizes in headers: %s\n"),
totals.all_have_sizes ? _("Yes") : _("No"));
+ printf(_(" Minimum XZ Utils version: %s\n"),
+ xz_ver_to_str(totals.min_version));
}
return;
@@ -1050,9 +1100,10 @@ print_totals_robot(void)
totals.files);
if (message_verbosity_get() >= V_DEBUG)
- printf("\t%" PRIu64 "\t%s",
+ printf("\t%" PRIu64 "\t%s\t%" PRIu32,
totals.memusage_max,
- totals.all_have_sizes ? "yes" : "no");
+ totals.all_have_sizes ? "yes" : "no",
+ totals.min_version);
putchar('\n');
diff --git a/src/xz/main.c b/src/xz/main.c
index 8196c6e7e7746..a8f0683a47bd4 100644
--- a/src/xz/main.c
+++ b/src/xz/main.c
@@ -275,6 +275,11 @@ main(int argc, char **argv)
list_totals();
}
+#ifndef NDEBUG
+ coder_free();
+ args_free();
+#endif
+
// If we have got a signal, raise it to kill the program instead
// of calling tuklib_exit().
signals_exit();
diff --git a/src/xz/message.c b/src/xz/message.c
index 0a7a522f7afa7..8a31b00ed89c5 100644
--- a/src/xz/message.c
+++ b/src/xz/message.c
@@ -12,10 +12,6 @@
#include "private.h"
-#ifdef HAVE_SYS_TIME_H
-# include <sys/time.h>
-#endif
-
#include <stdarg.h>
@@ -64,9 +60,6 @@ static lzma_stream *progress_strm;
/// and estimate remaining time.
static uint64_t expected_in_size;
-/// Time when we started processing the file
-static uint64_t start_time;
-
// Use alarm() and SIGALRM when they are supported. This has two minor
// advantages over the alternative of polling gettimeofday():
@@ -112,16 +105,6 @@ static uint64_t progress_next_update;
#endif
-/// Get the current time as microseconds since epoch
-static uint64_t
-my_time(void)
-{
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return (uint64_t)(tv.tv_sec) * UINT64_C(1000000) + tv.tv_usec;
-}
-
-
extern void
message_init(void)
{
@@ -264,11 +247,10 @@ message_progress_start(lzma_stream *strm, uint64_t in_size)
// It is needed to find out the position in the stream.
progress_strm = strm;
- // Store the processing start time of the file and its expected size.
- // If we aren't printing any statistics, then these are unused. But
- // since it is possible that the user sends us a signal to show
- // statistics, we need to have these available anyway.
- start_time = my_time();
+ // Store the expected size of the file. If we aren't printing any
+ // statistics, then is will be unused. But since it is possible
+ // that the user sends us a signal to show statistics, we need
+ // to have it available anyway.
expected_in_size = in_size;
// Indicate that progress info may need to be printed before
@@ -290,7 +272,7 @@ message_progress_start(lzma_stream *strm, uint64_t in_size)
alarm(1);
#else
progress_needs_updating = true;
- progress_next_update = 1000000;
+ progress_next_update = 1000;
#endif
}
@@ -364,7 +346,7 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
{
// Don't print the speed immediately, since the early values look
// somewhat random.
- if (elapsed < 3000000)
+ if (elapsed < 3000)
return "";
static const char unit[][8] = {
@@ -377,7 +359,7 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
// Calculate the speed as KiB/s.
double speed = (double)(uncompressed_pos)
- / ((double)(elapsed) * (1024.0 / 1e6));
+ / ((double)(elapsed) * (1024.0 / 1000.0));
// Adjust the unit of the speed if needed.
while (speed > 999.0) {
@@ -402,12 +384,12 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
/// Make a string indicating elapsed or remaining time. The format is either
/// M:SS or H:MM:SS depending on if the time is an hour or more.
static const char *
-progress_time(uint64_t useconds)
+progress_time(uint64_t mseconds)
{
// 9999 hours = 416 days
static char buf[sizeof("9999:59:59")];
- uint32_t seconds = useconds / 1000000;
+ uint32_t seconds = mseconds / 1000;
// Don't show anything if the time is zero or ridiculously big.
if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59)
@@ -445,14 +427,14 @@ progress_remaining(uint64_t in_pos, uint64_t elapsed)
// - Only a few seconds has passed since we started (de)compressing,
// so estimate would be too inaccurate.
if (expected_in_size == 0 || in_pos > expected_in_size
- || in_pos < (UINT64_C(1) << 19) || elapsed < 8000000)
+ || in_pos < (UINT64_C(1) << 19) || elapsed < 8000)
return "";
// Calculate the estimate. Don't give an estimate of zero seconds,
// since it is possible that all the input has been already passed
// to the library, but there is still quite a bit of output pending.
uint32_t remaining = (double)(expected_in_size - in_pos)
- * ((double)(elapsed) / 1e6) / (double)(in_pos);
+ * ((double)(elapsed) / 1000.0) / (double)(in_pos);
if (remaining < 1)
remaining = 1;
@@ -518,28 +500,26 @@ progress_remaining(uint64_t in_pos, uint64_t elapsed)
}
-/// Calculate the elapsed time as microseconds.
-static uint64_t
-progress_elapsed(void)
-{
- return my_time() - start_time;
-}
-
-
-/// Get information about position in the stream. This is currently simple,
-/// but it will become more complicated once we have multithreading support.
+/// Get how much uncompressed and compressed data has been processed.
static void
progress_pos(uint64_t *in_pos,
uint64_t *compressed_pos, uint64_t *uncompressed_pos)
{
- *in_pos = progress_strm->total_in;
+ uint64_t out_pos;
+ lzma_get_progress(progress_strm, in_pos, &out_pos);
+
+ // It cannot have processed more input than it has been given.
+ assert(*in_pos <= progress_strm->total_in);
+
+ // It cannot have produced more output than it claims to have ready.
+ assert(out_pos >= progress_strm->total_out);
if (opt_mode == MODE_COMPRESS) {
- *compressed_pos = progress_strm->total_out;
- *uncompressed_pos = progress_strm->total_in;
+ *compressed_pos = out_pos;
+ *uncompressed_pos = *in_pos;
} else {
- *compressed_pos = progress_strm->total_in;
- *uncompressed_pos = progress_strm->total_out;
+ *compressed_pos = *in_pos;
+ *uncompressed_pos = out_pos;
}
return;
@@ -553,13 +533,13 @@ message_progress_update(void)
return;
// Calculate how long we have been processing this file.
- const uint64_t elapsed = progress_elapsed();
+ const uint64_t elapsed = mytime_get_elapsed();
#ifndef SIGALRM
if (progress_next_update > elapsed)
return;
- progress_next_update = elapsed + 1000000;
+ progress_next_update = elapsed + 1000;
#endif
// Get our current position in the stream.
@@ -652,7 +632,7 @@ progress_flush(bool finished)
progress_active = false;
- const uint64_t elapsed = progress_elapsed();
+ const uint64_t elapsed = mytime_get_elapsed();
signals_block();
@@ -1122,7 +1102,10 @@ message_help(bool long_help)
" -f, --force force overwrite of output file and (de)compress links\n"
" -c, --stdout write to standard output and don't delete input files"));
- if (long_help)
+ if (long_help) {
+ puts(_(
+" --single-stream decompress only the first stream, and silently\n"
+" ignore possible remaining input data"));
puts(_(
" --no-sparse do not create sparse files when decompressing\n"
" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n"
@@ -1130,6 +1113,7 @@ message_help(bool long_help)
" omitted, filenames are read from the standard input;\n"
" filenames must be terminated with the newline character\n"
" --files0[=FILE] like --files but use the null character as terminator"));
+ }
if (long_help) {
puts(_("\n Basic file format and compression options:\n"));
@@ -1138,6 +1122,8 @@ message_help(bool long_help)
" `auto' (default), `xz', `lzma', and `raw'\n"
" -C, --check=CHECK integrity check type: `none' (use with caution),\n"
" `crc32', `crc64' (default), or `sha256'"));
+ puts(_(
+" --ignore-check don't verify the integrity check when decompressing"));
}
puts(_(
@@ -1148,7 +1134,25 @@ message_help(bool long_help)
" -e, --extreme try to improve compression ratio by using more CPU time;\n"
" does not affect decompressor memory requirements"));
+ puts(_(
+" -T, --threads=NUM use at most NUM threads; the default is 1; set to 0\n"
+" to use as many threads as there are processor cores"));
+
if (long_help) {
+ puts(_(
+" --block-size=SIZE\n"
+" start a new .xz block after every SIZE bytes of input;\n"
+" use this to set the block size for threaded compression"));
+ puts(_(
+" --block-list=SIZES\n"
+" start a new .xz block after the given comma-separated\n"
+" intervals of uncompressed data"));
+ puts(_(
+" --flush-timeout=TIMEOUT\n"
+" when compressing, if more than TIMEOUT milliseconds has\n"
+" passed since the previous flush and reading more input\n"
+" would block, all pending data is flushed out"
+ ));
puts(_( // xgettext:no-c-format
" --memlimit-compress=LIMIT\n"
" --memlimit-decompress=LIMIT\n"
@@ -1244,5 +1248,10 @@ message_help(bool long_help)
PACKAGE_BUGREPORT);
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
+#if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE
+ puts(_(
+"THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE."));
+#endif
+
tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT);
}
diff --git a/src/xz/mytime.c b/src/xz/mytime.c
new file mode 100644
index 0000000000000..4be184fd19daf
--- /dev/null
+++ b/src/xz/mytime.c
@@ -0,0 +1,89 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file mytime.c
+/// \brief Time handling functions
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+#if !(defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC)
+# include <sys/time.h>
+#endif
+
+uint64_t opt_flush_timeout = 0;
+bool flush_needed;
+
+static uint64_t start_time;
+static uint64_t next_flush;
+
+
+/// \brief Get the current time as milliseconds
+///
+/// It's relative to some point but not necessarily to the UNIX Epoch.
+static uint64_t
+mytime_now(void)
+{
+ // NOTE: HAVE_DECL_CLOCK_MONOTONIC is always defined to 0 or 1.
+#if defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC
+ // If CLOCK_MONOTONIC was available at compile time but for some
+ // reason isn't at runtime, fallback to CLOCK_REALTIME which
+ // according to POSIX is mandatory for all implementations.
+ static clockid_t clk_id = CLOCK_MONOTONIC;
+ struct timespec tv;
+ while (clock_gettime(clk_id, &tv))
+ clk_id = CLOCK_REALTIME;
+
+ return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_nsec / 1000000;
+#else
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_usec / 1000;
+#endif
+}
+
+
+extern void
+mytime_set_start_time(void)
+{
+ start_time = mytime_now();
+ next_flush = start_time + opt_flush_timeout;
+ flush_needed = false;
+ return;
+}
+
+
+extern uint64_t
+mytime_get_elapsed(void)
+{
+ return mytime_now() - start_time;
+}
+
+
+extern void
+mytime_set_flush_time(void)
+{
+ next_flush = mytime_now() + opt_flush_timeout;
+ flush_needed = false;
+ return;
+}
+
+
+extern int
+mytime_get_flush_timeout(void)
+{
+ if (opt_flush_timeout == 0 || opt_mode != MODE_COMPRESS)
+ return -1;
+
+ const uint64_t now = mytime_now();
+ if (now >= next_flush)
+ return 0;
+
+ const uint64_t remaining = next_flush - now;
+ return remaining > INT_MAX ? INT_MAX : (int)remaining;
+}
diff --git a/src/xz/mytime.h b/src/xz/mytime.h
new file mode 100644
index 0000000000000..ea291eed81c7a
--- /dev/null
+++ b/src/xz/mytime.h
@@ -0,0 +1,47 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file mytime.h
+/// \brief Time handling functions
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+/// \brief Number of milliseconds to between LZMA_SYNC_FLUSHes
+///
+/// If 0, timed flushing is disabled. Otherwise if no more input is available
+/// and not at the end of the file and at least opt_flush_timeout milliseconds
+/// has elapsed since the start of compression or the previous flushing
+/// (LZMA_SYNC_FLUSH or LZMA_FULL_FLUSH), set LZMA_SYNC_FLUSH to flush
+/// the pending data.
+extern uint64_t opt_flush_timeout;
+
+
+/// \brief True when flushing is needed due to expired timeout
+extern bool flush_needed;
+
+
+/// \brief Store the time when (de)compression was started
+///
+/// The start time is also stored as the time of the first flush.
+extern void mytime_set_start_time(void);
+
+
+/// \brief Get the number of milliseconds since the operation started
+extern uint64_t mytime_get_elapsed(void);
+
+
+/// \brief Store the time of when compressor was flushed
+extern void mytime_set_flush_time(void);
+
+
+/// \brief Get the number of milliseconds until the next flush
+///
+/// This returns -1 if no timed flushing is used.
+///
+/// The return value is inteded for use with poll().
+extern int mytime_get_flush_timeout(void);
diff --git a/src/xz/options.c b/src/xz/options.c
index f21a0ba510651..f9c7ab9e8535b 100644
--- a/src/xz/options.c
+++ b/src/xz/options.c
@@ -31,8 +31,8 @@ typedef struct {
} option_map;
-/// Parses option=value pairs that are separated with colons, semicolons,
-/// or commas: opt=val:opt=val;opt=val,opt=val
+/// Parses option=value pairs that are separated with commas:
+/// opt=val,opt=val,opt=val
///
/// Each option is a string, that is converted to an integer using the
/// index where the option string is in the array.
diff --git a/src/xz/private.h b/src/xz/private.h
index 6b01e51354e1c..4acfa8dc45583 100644
--- a/src/xz/private.h
+++ b/src/xz/private.h
@@ -12,6 +12,7 @@
#include "sysdefs.h"
#include "mythread.h"
+
#include "lzma.h"
#include <sys/types.h>
@@ -45,6 +46,7 @@
#endif
#include "main.h"
+#include "mytime.h"
#include "coder.h"
#include "message.h"
#include "args.h"
diff --git a/src/xz/signals.c b/src/xz/signals.c
index 322811f472b5a..5387c424e1a5a 100644
--- a/src/xz/signals.c
+++ b/src/xz/signals.c
@@ -41,6 +41,11 @@ signal_handler(int sig)
{
exit_signal = sig;
user_abort = true;
+
+#ifndef TUKLIB_DOSLIKE
+ io_write_to_user_abort_pipe();
+#endif
+
return;
}
diff --git a/src/xz/suffix.c b/src/xz/suffix.c
index 8e331a7022a3c..9d4fcd139b8f7 100644
--- a/src/xz/suffix.c
+++ b/src/xz/suffix.c
@@ -12,6 +12,10 @@
#include "private.h"
+#ifdef __DJGPP__
+# include <fcntl.h>
+#endif
+
// For case-insensitive filename suffix on case-insensitive systems
#if defined(TUKLIB_DOSLIKE) || defined(__VMS)
# define strcmp strcasecmp
@@ -45,6 +49,31 @@ has_dir_sep(const char *str)
}
+#ifdef __DJGPP__
+/// \brief Test for special suffix used for 8.3 short filenames (SFN)
+///
+/// \return If str matches *.?- or *.??-, true is returned. Otherwise
+/// false is returned.
+static bool
+has_sfn_suffix(const char *str, size_t len)
+{
+ if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
+ && !is_dir_sep(str[len - 2])) {
+ // *.?-
+ if (str[len - 3] == '.')
+ return !is_dir_sep(str[len - 4]);
+
+ // *.??-
+ if (len >= 5 && !is_dir_sep(str[len - 3])
+ && str[len - 4] == '.')
+ return !is_dir_sep(str[len - 5]);
+ }
+
+ return false;
+}
+#endif
+
+
/// \brief Checks if src_name has given compressed_suffix
///
/// \param suffix Filename suffix to look for
@@ -87,6 +116,9 @@ uncompressed_name(const char *src_name, const size_t src_len)
{ ".xz", "" },
{ ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare.
{ ".lzma", "" },
+#ifdef __DJGPP__
+ { ".lzm", "" },
+#endif
{ ".tlz", ".tar" },
// { ".gz", "" },
// { ".tgz", ".tar" },
@@ -112,6 +144,17 @@ uncompressed_name(const char *src_name, const size_t src_len)
break;
}
}
+
+#ifdef __DJGPP__
+ // Support also *.?- -> *.? and *.??- -> *.?? on DOS.
+ // This is done also when long filenames are available
+ // to keep it easy to decompress files created when
+ // long filename support wasn't available.
+ if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
+ new_suffix = "";
+ new_len = src_len - 1;
+ }
+#endif
}
if (new_len == 0 && custom_suffix != NULL)
@@ -134,21 +177,35 @@ uncompressed_name(const char *src_name, const size_t src_len)
}
+/// This message is needed in multiple places in compressed_name(),
+/// so the message has been put into its own function.
+static void
+msg_suffix(const char *src_name, const char *suffix)
+{
+ message_warning(_("%s: File already has `%s' suffix, skipping"),
+ src_name, suffix);
+ return;
+}
+
+
/// \brief Appends suffix to src_name
///
/// In contrast to uncompressed_name(), we check only suffixes that are valid
/// for the specified file format.
static char *
-compressed_name(const char *src_name, const size_t src_len)
+compressed_name(const char *src_name, size_t src_len)
{
// The order of these must match the order in args.h.
- static const char *const all_suffixes[][3] = {
+ static const char *const all_suffixes[][4] = {
{
".xz",
".txz",
NULL
}, {
".lzma",
+#ifdef __DJGPP__
+ ".lzm",
+#endif
".tlz",
NULL
/*
@@ -170,20 +227,27 @@ compressed_name(const char *src_name, const size_t src_len)
const size_t format = opt_format - 1;
const char *const *suffixes = all_suffixes[format];
+ // Look for known filename suffixes and refuse to compress them.
for (size_t i = 0; suffixes[i] != NULL; ++i) {
if (test_suffix(suffixes[i], src_name, src_len) != 0) {
- message_warning(_("%s: File already has `%s' "
- "suffix, skipping"), src_name,
- suffixes[i]);
+ msg_suffix(src_name, suffixes[i]);
return NULL;
}
}
+#ifdef __DJGPP__
+ // Recognize also the special suffix that is used when long
+ // filename (LFN) support isn't available. This suffix is
+ // recognized on LFN systems too.
+ if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
+ msg_suffix(src_name, "-");
+ return NULL;
+ }
+#endif
+
if (custom_suffix != NULL) {
if (test_suffix(custom_suffix, src_name, src_len) != 0) {
- message_warning(_("%s: File already has `%s' "
- "suffix, skipping"), src_name,
- custom_suffix);
+ msg_suffix(src_name, custom_suffix);
return NULL;
}
}
@@ -199,7 +263,101 @@ compressed_name(const char *src_name, const size_t src_len)
const char *suffix = custom_suffix != NULL
? custom_suffix : suffixes[0];
- const size_t suffix_len = strlen(suffix);
+ size_t suffix_len = strlen(suffix);
+
+#ifdef __DJGPP__
+ if (!_use_lfn(src_name)) {
+ // Long filename (LFN) support isn't available and we are
+ // limited to 8.3 short filenames (SFN).
+ //
+ // Look for suffix separator from the filename, and make sure
+ // that it is in the filename, not in a directory name.
+ const char *sufsep = strrchr(src_name, '.');
+ if (sufsep == NULL || sufsep[1] == '\0'
+ || has_dir_sep(sufsep)) {
+ // src_name has no filename extension.
+ //
+ // Examples:
+ // xz foo -> foo.xz
+ // xz -F lzma foo -> foo.lzm
+ // xz -S x foo -> foox
+ // xz -S x foo. -> foo.x
+ // xz -S x.y foo -> foox.y
+ // xz -S .x foo -> foo.x
+ // xz -S .x foo. -> foo.x
+ //
+ // Avoid double dots:
+ if (sufsep != NULL && sufsep[1] == '\0'
+ && suffix[0] == '.')
+ --src_len;
+
+ } else if (custom_suffix == NULL
+ && strcasecmp(sufsep, ".tar") == 0) {
+ // ".tar" is handled specially.
+ //
+ // Examples:
+ // xz foo.tar -> foo.txz
+ // xz -F lzma foo.tar -> foo.tlz
+ static const char *const tar_suffixes[] = {
+ ".txz",
+ ".tlz",
+ // ".tgz",
+ };
+ suffix = tar_suffixes[format];
+ suffix_len = 4;
+ src_len -= 4;
+
+ } else {
+ if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
+ // Instead of the .xz suffix, use a single
+ // character at the end of the filename
+ // extension. This is to minimize name
+ // conflicts when compressing multiple files
+ // with the same basename. E.g. foo.txt and
+ // foo.exe become foo.tx- and foo.ex-. Dash
+ // is rare as the last character of the
+ // filename extension, so it seems to be
+ // quite safe choice and it stands out better
+ // in directory listings than e.g. x. For
+ // comparison, gzip uses z.
+ suffix = "-";
+ suffix_len = 1;
+ }
+
+ if (suffix[0] == '.') {
+ // The first character of the suffix is a dot.
+ // Throw away the original filename extension
+ // and replace it with the new suffix.
+ //
+ // Examples:
+ // xz -F lzma foo.txt -> foo.lzm
+ // xz -S .x foo.txt -> foo.x
+ src_len = sufsep - src_name;
+
+ } else {
+ // The first character of the suffix is not
+ // a dot. Preserve the first 0-2 characters
+ // of the original filename extension.
+ //
+ // Examples:
+ // xz foo.txt -> foo.tx-
+ // xz -S x foo.c -> foo.cx
+ // xz -S ab foo.c -> foo.cab
+ // xz -S ab foo.txt -> foo.tab
+ // xz -S abc foo.txt -> foo.abc
+ //
+ // Truncate the suffix to three chars:
+ if (suffix_len > 3)
+ suffix_len = 3;
+
+ // If needed, overwrite 1-3 characters.
+ if (strlen(sufsep) > 4 - suffix_len)
+ src_len = sufsep - src_name
+ + 4 - suffix_len;
+ }
+ }
+ }
+#endif
char *dest_name = xmalloc(src_len + suffix_len + 1);
diff --git a/src/xz/xz.1 b/src/xz/xz.1
index 363b90cb4e551..75aead3d04393 100644
--- a/src/xz/xz.1
+++ b/src/xz/xz.1
@@ -5,7 +5,7 @@
.\" This file has been put into the public domain.
.\" You can do whatever you want with this file.
.\"
-.TH XZ 1 "2013-06-21" "Tukaani" "XZ Utils"
+.TH XZ 1 "2014-12-16" "Tukaani" "XZ Utils"
.
.SH NAME
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
@@ -435,6 +435,29 @@ standard output instead of a file.
This implies
.BR \-\-keep .
.TP
+.B \-\-single\-stream
+Decompress only the first
+.B .xz
+stream, and
+silently ignore possible remaining input data following the stream.
+Normally such trailing garbage makes
+.B xz
+display an error.
+.IP ""
+.B xz
+never decompresses more than one stream from
+.B .lzma
+files or raw streams, but this option still makes
+.B xz
+ignore the possible trailing data after the
+.B .lzma
+file or raw stream.
+.IP ""
+This option has no effect if the operation mode is not
+.B \-\-decompress
+or
+.BR \-\-test .
+.TP
.B \-\-no\-sparse
Disable creation of sparse files.
By default, if decompressing into a regular file,
@@ -586,6 +609,25 @@ Integrity of the
headers is always verified with CRC32.
It is not possible to change or disable it.
.TP
+.B \-\-ignore\-check
+Don't verify the integrity check of the compressed data when decompressing.
+The CRC32 values in the
+.B .xz
+headers will still be verified normally.
+.IP ""
+.B "Do not use this option unless you know what you are doing."
+Possible reasons to use this option:
+.RS
+.IP \(bu 3
+Trying to recover data from a corrupt .xz file.
+.IP \(bu 3
+Speeding up decompression.
+This matters mostly with SHA-256 or
+with files that have compressed extremely well.
+It's recommended to not use this option for this purpose
+unless the file integrity is verified externally in some other way.
+.RE
+.TP
.BR \-0 " ... " \-9
Select a compression preset level.
The default is
@@ -778,6 +820,124 @@ These are provided only for backwards compatibility
with LZMA Utils.
Avoid using these options.
.TP
+.BI \-\-block\-size= size
+When compressing to the
+.B .xz
+format, split the input data into blocks of
+.I size
+bytes.
+The blocks are compressed independently from each other,
+which helps with multi-threading and
+makes limited random-access decompression possible.
+This option is typically used to override the default
+block size in multi-threaded mode,
+but this option can be used in single-threaded mode too.
+.IP ""
+In multi-threaded mode about three times
+.I size
+bytes will be allocated in each thread for buffering input and output.
+The default
+.I size
+is three times the LZMA2 dictionary size or 1 MiB,
+whichever is more.
+Typically a good value is 2\-4 times
+the size of the LZMA2 dictionary or at least 1 MiB.
+Using
+.I size
+less than the LZMA2 dictionary size is waste of RAM
+because then the LZMA2 dictionary buffer will never get fully used.
+The sizes of the blocks are stored in the block headers,
+which a future version of
+.B xz
+will use for multi-threaded decompression.
+.IP ""
+In single-threaded mode no block splitting is done by default.
+Setting this option doesn't affect memory usage.
+No size information is stored in block headers,
+thus files created in single-threaded mode
+won't be identical to files created in multi-threaded mode.
+The lack of size information also means that a future version of
+.B xz
+won't be able decompress the files in multi-threaded mode.
+.TP
+.BI \-\-block\-list= sizes
+When compressing to the
+.B .xz
+format, start a new block after
+the given intervals of uncompressed data.
+.IP ""
+The uncompressed
+.I sizes
+of the blocks are specified as a comma-separated list.
+Omitting a size (two or more consecutive commas) is a shorthand
+to use the size of the previous block.
+.IP ""
+If the input file is bigger than the sum of
+.IR sizes ,
+the last value in
+.I sizes
+is repeated until the end of the file.
+A special value of
+.B 0
+may be used as the last value to indicate that
+the rest of the file should be encoded as a single block.
+.IP ""
+If one specifies
+.I sizes
+that exceed the encoder's block size
+(either the default value in threaded mode or
+the value specified with \fB\-\-block\-size=\fIsize\fR),
+the encoder will create additional blocks while
+keeping the boundaries specified in
+.IR sizes .
+For example, if one specifies
+.B \-\-block\-size=10MiB
+.B \-\-block\-list=5MiB,10MiB,8MiB,12MiB,24MiB
+and the input file is 80 MiB,
+one will get 11 blocks:
+5, 10, 8, 10, 2, 10, 10, 4, 10, 10, and 1 MiB.
+.IP ""
+In multi-threaded mode the sizes of the blocks
+are stored in the block headers.
+This isn't done in single-threaded mode,
+so the encoded output won't be
+identical to that of the multi-threaded mode.
+.TP
+.BI \-\-flush\-timeout= timeout
+When compressing, if more than
+.I timeout
+milliseconds (a positive integer) has passed since the previous flush and
+reading more input would block,
+all the pending input data is flushed from the encoder and
+made available in the output stream.
+This can be useful if
+.B xz
+is used to compress data that is streamed over a network.
+Small
+.I timeout
+values make the data available at the receiving end
+with a small delay, but large
+.I timeout
+values give better compression ratio.
+.IP ""
+This feature is disabled by default.
+If this option is specified more than once, the last one takes effect.
+The special
+.I timeout
+value of
+.B 0
+can be used to explicitly disable this feature.
+.IP ""
+This feature is not available on non-POSIX systems.
+.IP ""
+.\" FIXME
+.B "This feature is still experimental."
+Currently
+.B xz
+is unsuitable for decompressing the stream in real time due to how
+.B xz
+does buffering.
+.TP
.BI \-\-memlimit\-compress= limit
Set a memory usage limit for compression.
If this option is specified multiple times,
@@ -876,24 +1036,25 @@ Automatic adjusting is always disabled when creating raw streams
.TP
\fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads
Specify the number of worker threads to use.
+Setting
+.I threads
+to a special value
+.B 0
+makes
+.B xz
+use as many threads as there are CPU cores on the system.
The actual number of threads can be less than
.I threads
+if the input file is not big enough
+for threading with the given settings or
if using more threads would exceed the memory usage limit.
.IP ""
-.B "Multithreaded compression and decompression are not"
-.B "implemented yet, so this option has no effect for now."
-.IP ""
-.B "As of writing (2010-09-27), it hasn't been decided"
-.B "if threads will be used by default on multicore systems"
-.B "once support for threading has been implemented."
-.B "Comments are welcome."
-The complicating factor is that using many threads
-will increase the memory usage dramatically.
-Note that if multithreading will be the default,
-it will probably be done so that single-threaded and
-multithreaded modes produce the same output,
-so compression ratio won't be significantly affected
-if threading will be enabled by default.
+Currently the only threading method is to split the input into
+blocks and compress them independently from each other.
+The default block size depends on the compression level and
+can be overriden with the
+.BI \-\-block\-size= size
+option.
.
.SS "Custom compressor filter chains"
A custom filter chain allows specifying
@@ -1863,6 +2024,14 @@ or
.B no
indicating if all block headers have both compressed size and
uncompressed size stored in them
+.PP
+.I Since
+.B xz
+.I 5.1.2alpha:
+.IP 4. 4
+Minimum
+.B xz
+version required to decompress the file
.RE
.PD
.PP
@@ -1913,6 +2082,14 @@ or
.B no
indicating if all block headers have both compressed size and
uncompressed size stored in them
+.PP
+.I Since
+.B xz
+.I 5.1.2alpha:
+.IP 12. 4
+Minimum
+.B xz
+version required to decompress the file
.RE
.PD
.PP
@@ -2173,7 +2350,9 @@ If there is data left after the first
.B .lzma
stream,
.B xz
-considers the file to be corrupt.
+considers the file to be corrupt unless
+.B \-\-single\-stream
+was used.
This may break obscure scripts which have
assumed that trailing garbage is ignored.
.