18 files changed, 1425 insertions, 258 deletions
diff --git a/src/xz/args.c b/src/xz/args.c
index 34761d45d67e2..041c80073e6de 100644
--- a/src/xz/args.c
+++ b/src/xz/args.c
@@ -22,6 +22,7 @@ bool opt_stdout = false;
 bool opt_force = false;
 bool opt_keep_original = false;
 bool opt_robot = false;
+bool opt_ignore_check = false;
 
 // We don't modify or free() this, but we need to assign it in some
 // non-const pointers.
@@ -55,6 +56,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str,
 
 
 static void
+parse_block_list(char *str)
+{
+	// It must be non-empty and not begin with a comma.
+	if (str[0] == '\0' || str[0] == ',')
+		message_fatal(_("%s: Invalid argument to --block-list"), str);
+
+	// Count the number of comma-separated strings.
+	size_t count = 1;
+	for (size_t i = 0; str[i] != '\0'; ++i)
+		if (str[i] == ',')
+			++count;
+
+	// Prevent an unlikely integer overflow.
+	if (count > SIZE_MAX / sizeof(uint64_t) - 1)
+		message_fatal(_("%s: Too many arguments to --block-list"),
+				str);
+
+	// Allocate memory to hold all the sizes specified.
+	// If --block-list was specified already, its value is forgotten.
+	free(opt_block_list);
+	opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
+
+	for (size_t i = 0; i < count; ++i) {
+		// Locate the next comma and replace it with \0.
+		char *p = strchr(str, ',');
+		if (p != NULL)
+			*p = '\0';
+
+		if (str[0] == '\0') {
+			// There is no string, that is, a comma follows
+			// another comma. Use the previous value.
+			//
+			// NOTE: We checked earler that the first char
+			// of the whole list cannot be a comma.
+			assert(i > 0);
+			opt_block_list[i] = opt_block_list[i - 1];
+		} else {
+			opt_block_list[i] = str_to_uint64("block-list", str,
+					0, UINT64_MAX);
+
+			// Zero indicates no more new Blocks.
+			if (opt_block_list[i] == 0) {
+				if (i + 1 != count)
+					message_fatal(_("0 can only be used "
+							"as the last element "
+							"in --block-list"));
+
+				opt_block_list[i] = UINT64_MAX;
+			}
+		}
+
+		str = p + 1;
+	}
+
+	// Terminate the array.
+	opt_block_list[count] = 0;
+	return;
+}
+
+
+static void
 parse_real(args_info *args, int argc, char **argv)
 {
 	enum {
@@ -68,14 +130,19 @@ parse_real(args_info *args, int argc, char **argv)
 		OPT_LZMA1,
 		OPT_LZMA2,
 
+		OPT_SINGLE_STREAM,
 		OPT_NO_SPARSE,
 		OPT_FILES,
 		OPT_FILES0,
+		OPT_BLOCK_SIZE,
+		OPT_BLOCK_LIST,
 		OPT_MEM_COMPRESS,
 		OPT_MEM_DECOMPRESS,
 		OPT_NO_ADJUST,
 		OPT_INFO_MEMORY,
 		OPT_ROBOT,
+		OPT_FLUSH_TIMEOUT,
+		OPT_IGNORE_CHECK,
 	};
 
 	static const char short_opts[]
@@ -94,6 +161,7 @@ parse_real(args_info *args, int argc, char **argv)
 		{ "force",        no_argument,       NULL,  'f' },
 		{ "stdout",       no_argument,       NULL,  'c' },
 		{ "to-stdout",    no_argument,       NULL,  'c' },
+		{ "single-stream", no_argument,      NULL,  OPT_SINGLE_STREAM },
 		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
 		{ "suffix",       required_argument, NULL,  'S' },
 		// { "recursive",      no_argument,       NULL,  'r' }, // TODO
@@ -103,12 +171,16 @@ parse_real(args_info *args, int argc, char **argv)
 		// Basic compression settings
 		{ "format",       required_argument, NULL,  'F' },
 		{ "check",        required_argument, NULL,  'C' },
+		{ "ignore-check", no_argument,       NULL,  OPT_IGNORE_CHECK },
+		{ "block-size",   required_argument, NULL,  OPT_BLOCK_SIZE },
+		{ "block-list",  required_argument, NULL,  OPT_BLOCK_LIST },
 		{ "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
 		{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
 		{ "memlimit",     required_argument, NULL,  'M' },
 		{ "memory",       required_argument, NULL,  'M' }, // Old alias
 		{ "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
 		{ "threads",      required_argument, NULL,  'T' },
+		{ "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
 
 		{ "extreme",      no_argument,       NULL,  'e' },
 		{ "fast",         no_argument,       NULL,  '0' },
@@ -175,8 +247,9 @@ parse_real(args_info *args, int argc, char **argv)
 			break;
 
 		case 'T':
-			hardware_threadlimit_set(str_to_uint64(
-					"threads", optarg, 0, UINT32_MAX));
+			// The max is from src/liblzma/common/common.h.
+			hardware_threads_set(str_to_uint64("threads",
+					optarg, 0, 16384));
 			break;
 
 		// --version
@@ -368,6 +441,24 @@ parse_real(args_info *args, int argc, char **argv)
 			break;
 		}
 
+		case OPT_IGNORE_CHECK:
+			opt_ignore_check = true;
+			break;
+
+		case OPT_BLOCK_SIZE:
+			opt_block_size = str_to_uint64("block-size", optarg,
+					0, LZMA_VLI_MAX);
+			break;
+
+		case OPT_BLOCK_LIST: {
+			parse_block_list(optarg);
+			break;
+		}
+
+		case OPT_SINGLE_STREAM:
+			opt_single_stream = true;
+			break;
+
 		case OPT_NO_SPARSE:
 			io_no_sparse();
 			break;
@@ -401,6 +492,11 @@ parse_real(args_info *args, int argc, char **argv)
 			opt_auto_adjust = false;
 			break;
 
+		case OPT_FLUSH_TIMEOUT:
+			opt_flush_timeout = str_to_uint64("flush-timeout",
+					optarg, 0, UINT64_MAX);
+			break;
+
 		default:
 			message_try_help();
 			tuklib_exit(E_ERROR, E_ERROR, false);
@@ -576,3 +672,13 @@ args_parse(args_info *args, int argc, char **argv)
 
 	return;
 }
+
+
+#ifndef NDEBUG
+extern void
+args_free(void)
+{
+	free(opt_block_list);
+	return;
+}
+#endif
diff --git a/src/xz/args.h b/src/xz/args.h
index b23f4ef12c923..1defad12e0dd5 100644
--- a/src/xz/args.h
+++ b/src/xz/args.h
@@ -36,7 +36,9 @@ extern bool opt_force;
 extern bool opt_keep_original;
 // extern bool opt_recursive;
 extern bool opt_robot;
+extern bool opt_ignore_check;
 
 extern const char stdin_filename[];
 
 extern void args_parse(args_info *args, int argc, char **argv);
+extern void args_free(void);
diff --git a/src/xz/coder.c b/src/xz/coder.c
index 017e04127e952..a94bdb83266fa 100644
--- a/src/xz/coder.c
+++ b/src/xz/coder.c
@@ -24,6 +24,9 @@ enum coder_init_ret {
 enum operation_mode opt_mode = MODE_COMPRESS;
 enum format_type opt_format = FORMAT_AUTO;
 bool opt_auto_adjust = true;
+bool opt_single_stream = false;
+uint64_t opt_block_size = 0;
+uint64_t *opt_block_list = NULL;
 
 
 /// Stream used to communicate with liblzma
@@ -48,6 +51,14 @@ static lzma_check check;
 /// This becomes false if the --check=CHECK option is used.
 static bool check_default = true;
 
+#ifdef MYTHREAD_ENABLED
+static lzma_mt mt_options = {
+	.flags = 0,
+	.timeout = 300,
+	.filters = filters,
+};
+#endif
+
 
 extern void
 coder_set_check(lzma_check new_check)
@@ -125,6 +136,15 @@ memlimit_too_small(uint64_t memory_usage)
 extern void
 coder_set_compression_settings(void)
 {
+	// The default check type is CRC64, but fallback to CRC32
+	// if CRC64 isn't supported by the copy of liblzma we are
+	// using. CRC32 is always supported.
+	if (check_default) {
+		check = LZMA_CHECK_CRC64;
+		if (!lzma_check_is_supported(check))
+			check = LZMA_CHECK_CRC32;
+	}
+
 	// Options for LZMA1 or LZMA2 in case we are using a preset.
 	static lzma_options_lzma opt_lzma;
 
@@ -175,15 +195,53 @@ coder_set_compression_settings(void)
 	// Print the selected filter chain.
 	message_filters_show(V_DEBUG, filters);
 
-	// If using --format=raw, we can be decoding. The memusage function
-	// also validates the filter chain and the options used for the
-	// filters.
+	// The --flush-timeout option requires LZMA_SYNC_FLUSH support
+	// from the filter chain. Currently threaded encoder doesn't support
+	// LZMA_SYNC_FLUSH so single-threaded mode must be used.
+	if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
+		for (size_t i = 0; i < filters_count; ++i) {
+			switch (filters[i].id) {
+			case LZMA_FILTER_LZMA2:
+			case LZMA_FILTER_DELTA:
+				break;
+
+			default:
+				message_fatal(_("The filter chain is "
+					"incompatible with --flush-timeout"));
+			}
+		}
+
+		if (hardware_threads_get() > 1) {
+			message(V_WARNING, _("Switching to single-threaded "
+					"mode due to --flush-timeout"));
+			hardware_threads_set(1);
+		}
+	}
+
+	// Get the memory usage. Note that if --format=raw was used,
+	// we can be decompressing.
 	const uint64_t memory_limit = hardware_memlimit_get(opt_mode);
 	uint64_t memory_usage;
-	if (opt_mode == MODE_COMPRESS)
-		memory_usage = lzma_raw_encoder_memusage(filters);
-	else
+	if (opt_mode == MODE_COMPRESS) {
+#ifdef MYTHREAD_ENABLED
+		if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) {
+			mt_options.threads = hardware_threads_get();
+			mt_options.block_size = opt_block_size;
+			mt_options.check = check;
+			memory_usage = lzma_stream_encoder_mt_memusage(
+					&mt_options);
+			if (memory_usage != UINT64_MAX)
+				message(V_DEBUG, _("Using up to %" PRIu32
+						" threads."),
+						mt_options.threads);
+		} else
+#endif
+		{
+			memory_usage = lzma_raw_encoder_memusage(filters);
+		}
+	} else {
 		memory_usage = lzma_raw_decoder_memusage(filters);
+	}
 
 	if (memory_usage == UINT64_MAX)
 		message_fatal(_("Unsupported filter chain or filter options"));
@@ -199,90 +257,99 @@ coder_set_compression_settings(void)
 						round_up_to_mib(decmem), 0));
 	}
 
-	if (memory_usage > memory_limit) {
-		// If --no-adjust was used or we didn't find LZMA1 or
-		// LZMA2 as the last filter, give an error immediately.
-		// --format=raw implies --no-adjust.
-		if (!opt_auto_adjust || opt_format == FORMAT_RAW)
-			memlimit_too_small(memory_usage);
-
-		assert(opt_mode == MODE_COMPRESS);
-
-		// Look for the last filter if it is LZMA2 or LZMA1, so
-		// we can make it use less RAM. With other filters we don't
-		// know what to do.
-		size_t i = 0;
-		while (filters[i].id != LZMA_FILTER_LZMA2
-				&& filters[i].id != LZMA_FILTER_LZMA1) {
-			if (filters[i].id == LZMA_VLI_UNKNOWN)
-				memlimit_too_small(memory_usage);
-
-			++i;
-		}
+	if (memory_usage <= memory_limit)
+		return;
 
-		// Decrease the dictionary size until we meet the memory
-		// usage limit. First round down to full mebibytes.
-		lzma_options_lzma *opt = filters[i].options;
-		const uint32_t orig_dict_size = opt->dict_size;
-		opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
-		while (true) {
-			// If it is below 1 MiB, auto-adjusting failed. We
-			// could be more sophisticated and scale it down even
-			// more, but let's see if many complain about this
-			// version.
-			//
-			// FIXME: Displays the scaled memory usage instead
-			// of the original.
-			if (opt->dict_size < (UINT32_C(1) << 20))
+	// If --no-adjust was used or we didn't find LZMA1 or
+	// LZMA2 as the last filter, give an error immediately.
+	// --format=raw implies --no-adjust.
+	if (!opt_auto_adjust || opt_format == FORMAT_RAW)
+		memlimit_too_small(memory_usage);
+
+	assert(opt_mode == MODE_COMPRESS);
+
+#ifdef MYTHREAD_ENABLED
+	if (opt_format == FORMAT_XZ && mt_options.threads > 1) {
+		// Try to reduce the number of threads before
+		// adjusting the compression settings down.
+		do {
+			// FIXME? The real single-threaded mode has
+			// lower memory usage, but it's not comparable
+			// because it doesn't write the size info
+			// into Block Headers.
+			if (--mt_options.threads == 0)
 				memlimit_too_small(memory_usage);
 
-			memory_usage = lzma_raw_encoder_memusage(filters);
+			memory_usage = lzma_stream_encoder_mt_memusage(
+					&mt_options);
 			if (memory_usage == UINT64_MAX)
 				message_bug();
 
-			// Accept it if it is low enough.
-			if (memory_usage <= memory_limit)
-				break;
+		} while (memory_usage > memory_limit);
 
-			// Otherwise 1 MiB down and try again. I hope this
-			// isn't too slow method for cases where the original
-			// dict_size is very big.
-			opt->dict_size -= UINT32_C(1) << 20;
-		}
+		message(V_WARNING, _("Adjusted the number of threads "
+			"from %s to %s to not exceed "
+			"the memory usage limit of %s MiB"),
+			uint64_to_str(hardware_threads_get(), 0),
+			uint64_to_str(mt_options.threads, 1),
+			uint64_to_str(round_up_to_mib(
+				memory_limit), 2));
+	}
+#endif
+
+	if (memory_usage <= memory_limit)
+		return;
 
-		// Tell the user that we decreased the dictionary size.
-		message(V_WARNING, _("Adjusted LZMA%c dictionary size "
-				"from %s MiB to %s MiB to not exceed "
-				"the memory usage limit of %s MiB"),
-				filters[i].id == LZMA_FILTER_LZMA2
-					? '2' : '1',
-				uint64_to_str(orig_dict_size >> 20, 0),
-				uint64_to_str(opt->dict_size >> 20, 1),
-				uint64_to_str(round_up_to_mib(
-					memory_limit), 2));
+	// Look for the last filter if it is LZMA2 or LZMA1, so we can make
+	// it use less RAM. With other filters we don't know what to do.
+	size_t i = 0;
+	while (filters[i].id != LZMA_FILTER_LZMA2
+			&& filters[i].id != LZMA_FILTER_LZMA1) {
+		if (filters[i].id == LZMA_VLI_UNKNOWN)
+			memlimit_too_small(memory_usage);
+
+		++i;
 	}
 
-/*
-	// Limit the number of worker threads so that memory usage
-	// limit isn't exceeded.
-	assert(memory_usage > 0);
-	size_t thread_limit = memory_limit / memory_usage;
-	if (thread_limit == 0)
-		thread_limit = 1;
+	// Decrease the dictionary size until we meet the memory
+	// usage limit. First round down to full mebibytes.
+	lzma_options_lzma *opt = filters[i].options;
+	const uint32_t orig_dict_size = opt->dict_size;
+	opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
+	while (true) {
+		// If it is below 1 MiB, auto-adjusting failed. We could be
+		// more sophisticated and scale it down even more, but let's
+		// see if many complain about this version.
+		//
+		// FIXME: Displays the scaled memory usage instead
+		// of the original.
+		if (opt->dict_size < (UINT32_C(1) << 20))
+			memlimit_too_small(memory_usage);
 
-	if (opt_threads > thread_limit)
-		opt_threads = thread_limit;
-*/
+		memory_usage = lzma_raw_encoder_memusage(filters);
+		if (memory_usage == UINT64_MAX)
+			message_bug();
 
-	if (check_default) {
-		// The default check type is CRC64, but fallback to CRC32
-		// if CRC64 isn't supported by the copy of liblzma we are
-		// using. CRC32 is always supported.
-		check = LZMA_CHECK_CRC64;
-		if (!lzma_check_is_supported(check))
-			check = LZMA_CHECK_CRC32;
+		// Accept it if it is low enough.
+		if (memory_usage <= memory_limit)
+			break;
+
+		// Otherwise 1 MiB down and try again. I hope this
+		// isn't too slow method for cases where the original
+		// dict_size is very big.
+		opt->dict_size -= UINT32_C(1) << 20;
 	}
 
+	// Tell the user that we decreased the dictionary size.
+	message(V_WARNING, _("Adjusted LZMA%c dictionary size "
+			"from %s MiB to %s MiB to not exceed "
+			"the memory usage limit of %s MiB"),
+			filters[i].id == LZMA_FILTER_LZMA2
+				? '2' : '1',
+			uint64_to_str(orig_dict_size >> 20, 0),
+			uint64_to_str(opt->dict_size >> 20, 1),
+			uint64_to_str(round_up_to_mib(memory_limit), 2));
+
 	return;
 }
 
@@ -364,7 +431,14 @@ coder_init(file_pair *pair)
 			break;
 
 		case FORMAT_XZ:
-			ret = lzma_stream_encoder(&strm, filters, check);
+#ifdef MYTHREAD_ENABLED
+			if (hardware_threads_get() > 1)
+				ret = lzma_stream_encoder_mt(
+						&strm, &mt_options);
+			else
+#endif
+				ret = lzma_stream_encoder(
+						&strm, filters, check);
 			break;
 
 		case FORMAT_LZMA:
@@ -376,8 +450,17 @@ coder_init(file_pair *pair)
 			break;
 		}
 	} else {
-		const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK
-				| LZMA_CONCATENATED;
+		uint32_t flags = 0;
+
+		// It seems silly to warn about unsupported check if the
+		// check won't be verified anyway due to --ignore-check.
+		if (opt_ignore_check)
+			flags |= LZMA_IGNORE_CHECK;
+		else
+			flags |= LZMA_TELL_UNSUPPORTED_CHECK;
+
+		if (!opt_single_stream)
+			flags |= LZMA_CONCATENATED;
 
 		// We abuse FORMAT_AUTO to indicate unknown file format,
 		// for which we may consider passthru mode.
@@ -408,7 +491,7 @@ coder_init(file_pair *pair)
 
 		switch (init_format) {
 		case FORMAT_AUTO:
-			// Uknown file format. If --decompress --stdout
+			// Unknown file format. If --decompress --stdout
 			// --force have been given, then we copy the input
 			// as is to stdout. Checking for MODE_DECOMPRESS
 			// is needed, because we don't want to do use
@@ -462,6 +545,56 @@ coder_init(file_pair *pair)
 }
 
 
+/// Resolve conflicts between opt_block_size and opt_block_list in single
+/// threaded mode. We want to default to opt_block_list, except when it is
+/// larger than opt_block_size. If this is the case for the current Block
+/// at *list_pos, then we break into smaller Blocks. Otherwise advance
+/// to the next Block in opt_block_list, and break apart if needed.
+static void
+split_block(uint64_t *block_remaining,
+	    uint64_t *next_block_remaining,
+	    size_t *list_pos)
+{
+	if (*next_block_remaining > 0) {
+		// The Block at *list_pos has previously been split up.
+		assert(hardware_threads_get() == 1);
+		assert(opt_block_size > 0);
+		assert(opt_block_list != NULL);
+
+		if (*next_block_remaining > opt_block_size) {
+			// We have to split the current Block at *list_pos
+			// into another opt_block_size length Block.
+			*block_remaining = opt_block_size;
+		} else {
+			// This is the last remaining split Block for the
+			// Block at *list_pos.
+			*block_remaining = *next_block_remaining;
+		}
+
+		*next_block_remaining -= *block_remaining;
+
+	} else {
+		// The Block at *list_pos has been finished. Go to the next
+		// entry in the list. If the end of the list has been reached,
+		// reuse the size of the last Block.
+		if (opt_block_list[*list_pos + 1] != 0)
+			++*list_pos;
+
+		*block_remaining = opt_block_list[*list_pos];
+
+		// If in single-threaded mode, split up the Block if needed.
+		// This is not needed in multi-threaded mode because liblzma
+		// will do this due to how threaded encoding works.
+		if (hardware_threads_get() == 1 && opt_block_size > 0
+				&& *block_remaining > opt_block_size) {
+			*next_block_remaining
+					= *block_remaining - opt_block_size;
+			*block_remaining = opt_block_size;
+		}
+	}
+}
+
+
 /// Compress or decompress using liblzma.
 static bool
 coder_normal(file_pair *pair)
@@ -469,8 +602,8 @@ coder_normal(file_pair *pair)
 	// Encoder needs to know when we have given all the input to it.
 	// The decoders need to know it too when we are using
 	// LZMA_CONCATENATED. We need to check for src_eof here, because
-	// the first input chunk has been already read, and that may
-	// have been the only chunk we will read.
+	// the first input chunk has been already read if decompressing,
+	// and that may have been the only chunk we will read.
 	lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN;
 
 	lzma_ret ret;
@@ -478,22 +611,77 @@ coder_normal(file_pair *pair)
 	// Assume that something goes wrong.
 	bool success = false;
 
+	// block_remaining indicates how many input bytes to encode before
+	// finishing the current .xz Block. The Block size is set with
+	// --block-size=SIZE and --block-list. They have an effect only when
+	// compressing to the .xz format. If block_remaining == UINT64_MAX,
+	// only a single block is created.
+	uint64_t block_remaining = UINT64_MAX;
+
+	// next_block_remining for when we are in single-threaded mode and
+	// the Block in --block-list is larger than the --block-size=SIZE.
+	uint64_t next_block_remaining = 0;
+
+	// Position in opt_block_list. Unused if --block-list wasn't used.
+	size_t list_pos = 0;
+
+	// Handle --block-size for single-threaded mode and the first step
+	// of --block-list.
+	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
+		// --block-size doesn't do anything here in threaded mode,
+		// because the threaded encoder will take care of splitting
+		// to fixed-sized Blocks.
+		if (hardware_threads_get() == 1 && opt_block_size > 0)
+			block_remaining = opt_block_size;
+
+		// If --block-list was used, start with the first size.
+		//
+		// For threaded case, --block-size specifies how big Blocks
+		// the encoder needs to be prepared to create at maximum
+		// and --block-list will simultaneously cause new Blocks
+		// to be started at specified intervals. To keep things
+		// logical, the same is done in single-threaded mode. The
+		// output is still not identical because in single-threaded
+		// mode the size info isn't written into Block Headers.
+		if (opt_block_list != NULL) {
+			if (block_remaining < opt_block_list[list_pos]) {
+				assert(hardware_threads_get() == 1);
+				next_block_remaining = opt_block_list[list_pos]
+						- block_remaining;
+			} else {
+				block_remaining = opt_block_list[list_pos];
+			}
+		}
+	}
+
 	strm.next_out = out_buf.u8;
 	strm.avail_out = IO_BUFFER_SIZE;
 
 	while (!user_abort) {
-		// Fill the input buffer if it is empty and we haven't reached
-		// end of file yet.
-		if (strm.avail_in == 0 && !pair->src_eof) {
+		// Fill the input buffer if it is empty and we aren't
+		// flushing or finishing.
+		if (strm.avail_in == 0 && action == LZMA_RUN) {
 			strm.next_in = in_buf.u8;
-			strm.avail_in = io_read(
-					pair, &in_buf, IO_BUFFER_SIZE);
+			strm.avail_in = io_read(pair, &in_buf,
+					my_min(block_remaining,
+						IO_BUFFER_SIZE));
 
 			if (strm.avail_in == SIZE_MAX)
 				break;
 
-			if (pair->src_eof)
+			if (pair->src_eof) {
 				action = LZMA_FINISH;
+
+			} else if (block_remaining != UINT64_MAX) {
+				// Start a new Block after every
+				// opt_block_size bytes of input.
+				block_remaining -= strm.avail_in;
+				if (block_remaining == 0)
+					action = LZMA_FULL_BARRIER;
+			}
+
+			if (action == LZMA_RUN && flush_needed)
+				action = LZMA_SYNC_FLUSH;
 		}
 
 		// Let liblzma do the actual work.
@@ -509,7 +697,39 @@ coder_normal(file_pair *pair)
 			strm.avail_out = IO_BUFFER_SIZE;
 		}
 
-		if (ret != LZMA_OK) {
+		if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
+				|| action == LZMA_FULL_BARRIER)) {
+			if (action == LZMA_SYNC_FLUSH) {
+				// Flushing completed. Write the pending data
+				// out immediatelly so that the reading side
+				// can decompress everything compressed so far.
+				if (io_write(pair, &out_buf, IO_BUFFER_SIZE
+						- strm.avail_out))
+					break;
+
+				strm.next_out = out_buf.u8;
+				strm.avail_out = IO_BUFFER_SIZE;
+
+				// Set the time of the most recent flushing.
+				mytime_set_flush_time();
+			} else {
+				// Start a new Block after LZMA_FULL_BARRIER.
+				if (opt_block_list == NULL) {
+					assert(hardware_threads_get() == 1);
+					assert(opt_block_size > 0);
+					block_remaining = opt_block_size;
+				} else {
+					split_block(&block_remaining,
+							&next_block_remaining,
+							&list_pos);
+				}
+			}
+
+			// Start a new Block after LZMA_FULL_FLUSH or continue
+			// the same block after LZMA_SYNC_FLUSH.
+			action = LZMA_RUN;
+
+		} else if (ret != LZMA_OK) {
 			// Determine if the return value indicates that we
 			// won't continue coding.
 			const bool stop = ret != LZMA_NO_CHECK
@@ -528,6 +748,12 @@ coder_normal(file_pair *pair)
 			}
 
 			if (ret == LZMA_STREAM_END) {
+				if (opt_single_stream) {
+					io_fix_src_pos(pair, strm.avail_in);
+					success = true;
+					break;
+				}
+
 				// Check that there is no trailing garbage.
 				// This is needed for LZMA_Alone and raw
 				// streams.
@@ -630,10 +856,15 @@ coder_run(const char *filename)
 	// Assume that something goes wrong.
 	bool success = false;
 
-	// Read the first chunk of input data. This is needed to detect
-	// the input file type (for now, only for decompression).
-	strm.next_in = in_buf.u8;
-	strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
+	if (opt_mode == MODE_COMPRESS) {
+		strm.next_in = NULL;
+		strm.avail_in = 0;
+	} else {
+		// Read the first chunk of input data. This is needed
+		// to detect the input file type.
+		strm.next_in = in_buf.u8;
+		strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
+	}
 
 	if (strm.avail_in != SIZE_MAX) {
 		// Initialize the coder. This will detect the file format
@@ -648,6 +879,11 @@ coder_run(const char *filename)
 			// Don't open the destination file when --test
 			// is used.
 			if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
+				// Remember the current time. It is needed
+				// for progress indicator and for timed
+				// flushing.
+				mytime_set_start_time();
+
 				// Initialize the progress indicator.
 				const uint64_t in_size
 						= pair->src_st.st_size <= 0
@@ -671,3 +907,13 @@ coder_run(const char *filename)
 
 	return;
 }
+
+
+#ifndef NDEBUG
+extern void
+coder_free(void)
+{
+	lzma_end(&strm);
+	return;
+}
+#endif
diff --git a/src/xz/coder.h b/src/xz/coder.h
index 2d3add9727451..583da8f68d50a 100644
--- a/src/xz/coder.h
+++ b/src/xz/coder.h
@@ -41,6 +41,16 @@ extern enum format_type opt_format;
 /// they exceed the memory usage limit.
 extern bool opt_auto_adjust;
 
+/// If true, stop after decoding the first stream.
+extern bool opt_single_stream;
+
+/// If non-zero, start a new .xz Block after every opt_block_size bytes
+/// of input. This has an effect only when compressing to the .xz format.
+extern uint64_t opt_block_size;
+
+/// This is non-NULL if --block-list was used. This contains the Block sizes
+/// as an array that is terminated with 0.
+extern uint64_t *opt_block_list;
 
 /// Set the integrity check type used when compressing
 extern void coder_set_check(lzma_check check);
@@ -59,3 +69,8 @@ extern void coder_set_compression_settings(void);
 
 /// Compress or decompress the given file
 extern void coder_run(const char *filename);
+
+#ifndef NDEBUG
+/// Free the memory allocated for the coder and kill the worker threads.
+extern void coder_free(void);
+#endif
diff --git a/src/xz/file_io.c b/src/xz/file_io.c
index 871a099b61939..f135cf7cb6bd8 100644
--- a/src/xz/file_io.c
+++ b/src/xz/file_io.c
@@ -17,6 +17,7 @@
 #ifdef TUKLIB_DOSLIKE
 #	include <io.h>
 #else
+#	include <poll.h>
 static bool warn_fchown;
 #endif
 
@@ -37,14 +38,30 @@ static bool warn_fchown;
 #endif
 
 
+typedef enum {
+	IO_WAIT_MORE,    // Reading or writing is possible.
+	IO_WAIT_ERROR,   // Error or user_abort
+	IO_WAIT_TIMEOUT, // poll() timed out
+} io_wait_ret;
+
+
 /// If true, try to create sparse files when decompressing.
 static bool try_sparse = true;
 
 #ifndef TUKLIB_DOSLIKE
+/// File status flags of standard input. This is used by io_open_src()
+/// and io_close_src().
+static int stdin_flags;
+static bool restore_stdin_flags = false;
+
 /// Original file status flags of standard output. This is used by
 /// io_open_dest() and io_close_dest() to save and restore the flags.
 static int stdout_flags;
 static bool restore_stdout_flags = false;
+
+/// Self-pipe used together with the user_abort variable to avoid
+/// race conditions with signal handling.
+static int user_abort_pipe[2];
 #endif
 
 
@@ -64,19 +81,43 @@ io_init(void)
 	// If fchown() fails setting the owner, we warn about it only if
 	// we are root.
 	warn_fchown = geteuid() == 0;
+
+	if (pipe(user_abort_pipe)
+			|| fcntl(user_abort_pipe[0], F_SETFL, O_NONBLOCK)
+				== -1
+			|| fcntl(user_abort_pipe[1], F_SETFL, O_NONBLOCK)
+				== -1)
+		message_fatal(_("Error creating a pipe: %s"),
+				strerror(errno));
 #endif
 
 #ifdef __DJGPP__
 	// Avoid doing useless things when statting files.
 	// This isn't important but doesn't hurt.
-	_djstat_flags = _STAT_INODE | _STAT_EXEC_EXT
-			| _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
+	_djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
 #endif
 
 	return;
 }
 
 
+#ifndef TUKLIB_DOSLIKE
+extern void
+io_write_to_user_abort_pipe(void)
+{
+	// If the write() fails, it's probably due to the pipe being full.
+	// Failing in that case is fine. If the reason is something else,
+	// there's not much we can do since this is called in a signal
+	// handler. So ignore the errors and try to avoid warnings with
+	// GCC and glibc when _FORTIFY_SOURCE=2 is used.
+	uint8_t b = '\0';
+	const int ret = write(user_abort_pipe[1], &b, 1);
+	(void)ret;
+	return;
+}
+#endif
+
+
 extern void
 io_no_sparse(void)
 {
@@ -85,6 +126,63 @@ io_no_sparse(void)
 }
 
 
+#ifndef TUKLIB_DOSLIKE
+/// \brief      Waits for input or output to become available or for a signal
+///
+/// This uses the self-pipe trick to avoid a race condition that can occur
+/// if a signal is caught after user_abort has been checked but before e.g.
+/// read() has been called. In that situation read() could block unless
+/// non-blocking I/O is used. With non-blocking I/O something like select()
+/// or poll() is needed to avoid a busy-wait loop, and the same race condition
+/// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
+/// POSIX) but neither is portable enough in 2013. The self-pipe trick is
+/// old and very portable.
+static io_wait_ret
+io_wait(file_pair *pair, int timeout, bool is_reading)
+{
+	struct pollfd pfd[2];
+
+	if (is_reading) {
+		pfd[0].fd = pair->src_fd;
+		pfd[0].events = POLLIN;
+	} else {
+		pfd[0].fd = pair->dest_fd;
+		pfd[0].events = POLLOUT;
+	}
+
+	pfd[1].fd = user_abort_pipe[0];
+	pfd[1].events = POLLIN;
+
+	while (true) {
+		const int ret = poll(pfd, 2, timeout);
+
+		if (user_abort)
+			return IO_WAIT_ERROR;
+
+		if (ret == -1) {
+			if (errno == EINTR || errno == EAGAIN)
+				continue;
+
+			message_error(_("%s: poll() failed: %s"),
+					is_reading ? pair->src_name
+						: pair->dest_name,
+					strerror(errno));
+			return IO_WAIT_ERROR;
+		}
+
+		if (ret == 0) {
+			assert(opt_flush_timeout != 0);
+			flush_needed = true;
+			return IO_WAIT_TIMEOUT;
+		}
+
+		if (pfd[0].revents != 0)
+			return IO_WAIT_MORE;
+	}
+}
+#endif
+
+
 /// \brief      Unlink a file
 ///
 /// This tries to verify that the file being unlinked really is the file that
@@ -294,6 +392,31 @@ io_open_src_real(file_pair *pair)
 		pair->src_fd = STDIN_FILENO;
 #ifdef TUKLIB_DOSLIKE
 		setmode(STDIN_FILENO, O_BINARY);
+#else
+		// Enable O_NONBLOCK for stdin.
+		stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
+		if (stdin_flags == -1) {
+			message_error(_("Error getting the file status flags "
+					"from standard input: %s"),
+					strerror(errno));
+			return true;
+		}
+
+		if ((stdin_flags & O_NONBLOCK) == 0) {
+			if (fcntl(STDIN_FILENO, F_SETFL,
+					stdin_flags | O_NONBLOCK) == -1) {
+				message_error(_("Error setting O_NONBLOCK "
+						"on standard input: %s"),
+						strerror(errno));
+				return true;
+			}
+
+			restore_stdin_flags = true;
+		}
+#endif
+#ifdef HAVE_POSIX_FADVISE
+		// It will fail if stdin is a pipe and that's fine.
+		(void)posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL);
 #endif
 		return false;
 	}
@@ -311,13 +434,12 @@ io_open_src_real(file_pair *pair)
 	int flags = O_RDONLY | O_BINARY | O_NOCTTY;
 
 #ifndef TUKLIB_DOSLIKE
-	// If we accept only regular files, we need to be careful to avoid
-	// problems with special files like devices and FIFOs. O_NONBLOCK
-	// prevents blocking when opening such files. When we want to accept
-	// special files, we must not use O_NONBLOCK, or otherwise we won't
-	// block waiting e.g. FIFOs to become readable.
-	if (reg_files_only)
-		flags |= O_NONBLOCK;
+	// Use non-blocking I/O:
+	//   - It prevents blocking when opening FIFOs and some other
+	//     special files, which is good if we want to accept only
+	//     regular files.
+	//   - It can help avoiding some race conditions with signal handling.
+	flags |= O_NONBLOCK;
 #endif
 
 #if defined(O_NOFOLLOW)
@@ -345,30 +467,13 @@ io_open_src_real(file_pair *pair)
 	(void)follow_symlinks;
 #endif
 
-	// Try to open the file. If we are accepting non-regular files,
-	// unblock the caught signals so that open() can be interrupted
-	// if it blocks e.g. due to a FIFO file.
-	if (!reg_files_only)
-		signals_unblock();
-
-	// Maybe this wouldn't need a loop, since all the signal handlers for
-	// which we don't use SA_RESTART set user_abort to true. But it
-	// doesn't hurt to have it just in case.
-	do {
-		pair->src_fd = open(pair->src_name, flags);
-	} while (pair->src_fd == -1 && errno == EINTR && !user_abort);
-
-	if (!reg_files_only)
-		signals_block();
+	// Try to open the file. Signals have been blocked so EINTR shouldn't
+	// be possible.
+	pair->src_fd = open(pair->src_name, flags);
 
 	if (pair->src_fd == -1) {
-		// If we were interrupted, don't display any error message.
-		if (errno == EINTR) {
-			// All the signals that don't have SA_RESTART
-			// set user_abort.
-			assert(user_abort);
-			return true;
-		}
+		// Signals (that have a signal handler) have been blocked.
+		assert(errno != EINTR);
 
 #ifdef O_NOFOLLOW
 		// Give an understandable error message if the reason
@@ -427,26 +532,20 @@ io_open_src_real(file_pair *pair)
 		return true;
 	}
 
-#ifndef TUKLIB_DOSLIKE
-	// Drop O_NONBLOCK, which is used only when we are accepting only
-	// regular files. After the open() call, we want things to block
-	// instead of giving EAGAIN.
-	if (reg_files_only) {
-		flags = fcntl(pair->src_fd, F_GETFL);
-		if (flags == -1)
-			goto error_msg;
-
-		flags &= ~O_NONBLOCK;
-
-		if (fcntl(pair->src_fd, F_SETFL, flags) == -1)
-			goto error_msg;
-	}
-#endif
-
 	// Stat the source file. We need the result also when we copy
 	// the permissions, and when unlinking.
+	//
+	// NOTE: Use stat() instead of fstat() with DJGPP, because
+	// then we have a better chance to get st_ino value that can
+	// be used in io_open_dest_real() to prevent overwriting the
+	// source file.
+#ifdef __DJGPP__
+	if (stat(pair->src_name, &pair->src_st))
+		goto error_msg;
+#else
 	if (fstat(pair->src_fd, &pair->src_st))
 		goto error_msg;
+#endif
 
 	if (S_ISDIR(pair->src_st.st_mode)) {
 		message_warning(_("%s: Is a directory, skipping"),
@@ -492,6 +591,23 @@ io_open_src_real(file_pair *pair)
 			goto error;
 		}
 	}
+
+	// If it is something else than a regular file, wait until
+	// there is input available. This way reading from FIFOs
+	// will work when open() is used with O_NONBLOCK.
+	if (!S_ISREG(pair->src_st.st_mode)) {
+		signals_unblock();
+		const io_wait_ret ret = io_wait(pair, -1, true);
+		signals_block();
+
+		if (ret != IO_WAIT_MORE)
+			goto error;
+	}
+#endif
+
+#ifdef HAVE_POSIX_FADVISE
+	// It will fail with some special files like FIFOs but that is fine.
+	(void)posix_fadvise(pair->src_fd, 0, 0, POSIX_FADV_SEQUENTIAL);
 #endif
 
 	return false;
@@ -542,6 +658,19 @@ io_open_src(const char *src_name)
 static void
 io_close_src(file_pair *pair, bool success)
 {
+#ifndef TUKLIB_DOSLIKE
+	if (restore_stdin_flags) {
+		assert(pair->src_fd == STDIN_FILENO);
+
+		restore_stdin_flags = false;
+
+		if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
+			message_error(_("Error restoring the status flags "
+					"to standard input: %s"),
+					strerror(errno));
+	}
+#endif
+
 	if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
 #ifdef TUKLIB_DOSLIKE
 		(void)close(pair->src_fd);
@@ -575,12 +704,58 @@ io_open_dest_real(file_pair *pair)
 		pair->dest_fd = STDOUT_FILENO;
 #ifdef TUKLIB_DOSLIKE
 		setmode(STDOUT_FILENO, O_BINARY);
+#else
+		// Set O_NONBLOCK if it isn't already set.
+		//
+		// NOTE: O_APPEND may be unset later in this function
+		// and it relies on stdout_flags being set here.
+		stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
+		if (stdout_flags == -1) {
+			message_error(_("Error getting the file status flags "
+					"from standard output: %s"),
+					strerror(errno));
+			return true;
+		}
+
+		if ((stdout_flags & O_NONBLOCK) == 0) {
+			if (fcntl(STDOUT_FILENO, F_SETFL,
+					stdout_flags | O_NONBLOCK) == -1) {
+				message_error(_("Error setting O_NONBLOCK "
+						"on standard output: %s"),
+						strerror(errno));
+				return true;
+			}
+
+			restore_stdout_flags = true;
+		}
 #endif
 	} else {
 		pair->dest_name = suffix_get_dest_name(pair->src_name);
 		if (pair->dest_name == NULL)
 			return true;
 
+#ifdef __DJGPP__
+		struct stat st;
+		if (stat(pair->dest_name, &st) == 0) {
+			// Check that it isn't a special file like "prn".
+			if (st.st_dev == -1) {
+				message_error("%s: Refusing to write to "
+						"a DOS special file",
+						pair->dest_name);
+				return true;
+			}
+
+			// Check that we aren't overwriting the source file.
+			if (st.st_dev == pair->src_st.st_dev
+					&& st.st_ino == pair->src_st.st_ino) {
+				message_error("%s: Output file is the same "
+						"as the input file",
+						pair->dest_name);
+				return true;
+			}
+		}
+#endif
+
 		// If --force was used, unlink the target file first.
 		if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
 			message_error(_("%s: Cannot remove: %s"),
@@ -590,8 +765,11 @@ io_open_dest_real(file_pair *pair)
 		}
 
 		// Open the file.
-		const int flags = O_WRONLY | O_BINARY | O_NOCTTY
+		int flags = O_WRONLY | O_BINARY | O_NOCTTY
 				| O_CREAT | O_EXCL;
+#ifndef TUKLIB_DOSLIKE
+		flags |= O_NONBLOCK;
+#endif
 		const mode_t mode = S_IRUSR | S_IWUSR;
 		pair->dest_fd = open(pair->dest_name, flags, mode);
 
@@ -603,17 +781,19 @@ io_open_dest_real(file_pair *pair)
 		}
 	}
 
-	// If this really fails... well, we have a safe fallback.
+#ifndef TUKLIB_DOSLIKE
+	// dest_st isn't used on DOS-like systems except as a dummy
+	// argument to io_unlink(), so don't fstat() on such systems.
 	if (fstat(pair->dest_fd, &pair->dest_st)) {
-#if defined(__VMS)
+		// If fstat() really fails, we have a safe fallback here.
+#	if defined(__VMS)
 		pair->dest_st.st_ino[0] = 0;
 		pair->dest_st.st_ino[1] = 0;
 		pair->dest_st.st_ino[2] = 0;
-#elif !defined(TUKLIB_DOSLIKE)
+#	else
 		pair->dest_st.st_dev = 0;
 		pair->dest_st.st_ino = 0;
-#endif
-#ifndef TUKLIB_DOSLIKE
+#	endif
 	} else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
 		// When writing to standard output, we need to be extra
 		// careful:
@@ -631,10 +811,6 @@ io_open_dest_real(file_pair *pair)
 			if (!S_ISREG(pair->dest_st.st_mode))
 				return false;
 
-			stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
-			if (stdout_flags == -1)
-				return false;
-
 			if (stdout_flags & O_APPEND) {
 				// Creating a sparse file is not possible
 				// when O_APPEND is active (it's used by
@@ -653,14 +829,23 @@ io_open_dest_real(file_pair *pair)
 				if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
 					return false;
 
+				// O_NONBLOCK was set earlier in this function
+				// so it must be kept here too. If this
+				// fcntl() call fails, we continue but won't
+				// try to create sparse output. The original
+				// flags will still be restored if needed (to
+				// unset O_NONBLOCK) when the file is finished.
 				if (fcntl(STDOUT_FILENO, F_SETFL,
-						stdout_flags & ~O_APPEND)
-						== -1)
+						(stdout_flags | O_NONBLOCK)
+						& ~O_APPEND) == -1)
 					return false;
 
 				// Disabling O_APPEND succeeded. Mark
 				// that the flags should be restored
-				// in io_close_dest().
+				// in io_close_dest(). This quite likely was
+				// already set when enabling O_NONBLOCK but
+				// just in case O_NONBLOCK was already set,
+				// set this again here.
 				restore_stdout_flags = true;
 
 			} else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
@@ -673,8 +858,8 @@ io_open_dest_real(file_pair *pair)
 		}
 
 		pair->dest_try_sparse = true;
-#endif
 	}
+#endif
 
 	return false;
 }
@@ -790,6 +975,21 @@ io_close(file_pair *pair, bool success)
 }
 
 
+extern void
+io_fix_src_pos(file_pair *pair, size_t rewind_size)
+{
+	assert(rewind_size <= IO_BUFFER_SIZE);
+
+	if (rewind_size > 0) {
+		// This doesn't need to work on unseekable file descriptors,
+		// so just ignore possible errors.
+		(void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
+	}
+
+	return;
+}
+
+
 extern size_t
 io_read(file_pair *pair, io_buf *buf_union, size_t size)
 {
@@ -815,12 +1015,30 @@ io_read(file_pair *pair, io_buf *buf_union, size_t size)
 				continue;
 			}
 
+#ifndef TUKLIB_DOSLIKE
+			if (errno == EAGAIN || errno == EWOULDBLOCK) {
+				const io_wait_ret ret = io_wait(pair,
+						mytime_get_flush_timeout(),
+						true);
+				switch (ret) {
+				case IO_WAIT_MORE:
+					continue;
+
+				case IO_WAIT_ERROR:
+					return SIZE_MAX;
+
+				case IO_WAIT_TIMEOUT:
+					return size - left;
+
+				default:
+					message_bug();
+				}
+			}
+#endif
+
 			message_error(_("%s: Read error: %s"),
 					pair->src_name, strerror(errno));
 
-			// FIXME Is this needed?
-			pair->src_eof = true;
-
 			return SIZE_MAX;
 		}
 
@@ -885,6 +1103,15 @@ io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
 				continue;
 			}
 
+#ifndef TUKLIB_DOSLIKE
+			if (errno == EAGAIN || errno == EWOULDBLOCK) {
+				if (io_wait(pair, -1, false) == IO_WAIT_MORE)
+					continue;
+
+				return true;
+			}
+#endif
+
 			// Handle broken pipe specially. gzip and bzip2
 			// don't print anything on SIGPIPE. In addition,
 			// gzip --quiet uses exit status 2 (warning) on
diff --git a/src/xz/file_io.h b/src/xz/file_io.h
index 967da868b0797..2de3379238d65 100644
--- a/src/xz/file_io.h
+++ b/src/xz/file_io.h
@@ -68,6 +68,14 @@ typedef struct {
 extern void io_init(void);
 
 
+#ifndef TUKLIB_DOSLIKE
+/// \brief      Write a byte to user_abort_pipe[1]
+///
+/// This is called from a signal handler.
+extern void io_write_to_user_abort_pipe(void);
+#endif
+
+
 /// \brief      Disable creation of sparse files when decompressing
 extern void io_no_sparse(void);
 
@@ -102,6 +110,19 @@ extern void io_close(file_pair *pair, bool success);
 extern size_t io_read(file_pair *pair, io_buf *buf, size_t size);
 
 
+/// \brief      Fix the position in src_fd
+///
+/// This is used when --single-thream has been specified and decompression
+/// is successful. If the input file descriptor supports seeking, this
+/// function fixes the input position to point to the next byte after the
+/// decompressed stream.
+///
+/// \param      pair        File pair having the source file open for reading
+/// \param      rewind_size How many bytes of extra have been read i.e.
+///                         how much to seek backwards.
+extern void io_fix_src_pos(file_pair *pair, size_t rewind_size);
+
+
 /// \brief      Read from source file from given offset to a buffer
 ///
 /// This is remotely similar to standard pread(). This uses lseek() though,
diff --git a/src/xz/hardware.c b/src/xz/hardware.c
index a4733c27e1180..ff32f6d301484 100644
--- a/src/xz/hardware.c
+++ b/src/xz/hardware.c
@@ -11,12 +11,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "private.h"
-#include "tuklib_cpucores.h"
 
 
-/// Maximum number of free *coder* threads. This can be set with
+/// Maximum number of worker threads. This can be set with
 /// the --threads=NUM command line option.
-static uint32_t threadlimit;
+static uint32_t threads_max = 1;
 
 /// Memory usage limit for compression
 static uint64_t memlimit_compress;
@@ -29,15 +28,23 @@ static uint64_t total_ram;
 
 
 extern void
-hardware_threadlimit_set(uint32_t new_threadlimit)
+hardware_threads_set(uint32_t n)
 {
-	if (new_threadlimit == 0) {
-		// The default is the number of available CPU cores.
-		threadlimit = tuklib_cpucores();
-		if (threadlimit == 0)
-			threadlimit = 1;
+	if (n == 0) {
+		// Automatic number of threads was requested.
+		// If threading support was enabled at build time,
+		// use the number of available CPU cores. Otherwise
+		// use one thread since disabling threading support
+		// omits lzma_cputhreads() from liblzma.
+#ifdef MYTHREAD_ENABLED
+		threads_max = lzma_cputhreads();
+		if (threads_max == 0)
+			threads_max = 1;
+#else
+		threads_max = 1;
+#endif
 	} else {
-		threadlimit = new_threadlimit;
+		threads_max = n;
 	}
 
 	return;
@@ -45,9 +52,9 @@ hardware_threadlimit_set(uint32_t new_threadlimit)
 
 
 extern uint32_t
-hardware_threadlimit_get(void)
+hardware_threads_get(void)
 {
-	return threadlimit;
+	return threads_max;
 }
 
 
@@ -139,6 +146,5 @@ hardware_init(void)
 
 	// Set the defaults.
 	hardware_memlimit_set(0, true, true, false);
-	hardware_threadlimit_set(0);
 	return;
 }
diff --git a/src/xz/hardware.h b/src/xz/hardware.h
index ad526f260bc17..4fae61815656f 100644
--- a/src/xz/hardware.h
+++ b/src/xz/hardware.h
@@ -15,12 +15,11 @@
 extern void hardware_init(void);
 
 
-/// Set custom value for maximum number of coder threads.
-extern void hardware_threadlimit_set(uint32_t threadlimit);
+/// Set the maximum number of worker threads.
+extern void hardware_threads_set(uint32_t threadlimit);
 
-/// Get the maximum number of coder threads. Some additional helper threads
-/// are allowed on top of this).
-extern uint32_t hardware_threadlimit_get(void);
+/// Get the maximum number of worker threads.
+extern uint32_t hardware_threads_get(void);
 
 
 /// Set the memory usage limit. There are separate limits for compression
diff --git a/src/xz/list.c b/src/xz/list.c
index 0e73d519ea4a1..449c2bc4e02fd 100644
--- a/src/xz/list.c
+++ b/src/xz/list.c
@@ -29,9 +29,12 @@ typedef struct {
 	/// Uncompressed Size fields
 	bool all_have_sizes;
 
+	/// Oldest XZ Utils version that will decompress the file
+	uint32_t min_version;
+
 } xz_file_info;
 
-#define XZ_FILE_INFO_INIT { NULL, 0, 0, true }
+#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
 
 
 /// Information about a .xz Block
@@ -104,8 +107,32 @@ static struct {
 	uint64_t stream_padding;
 	uint64_t memusage_max;
 	uint32_t checks;
+	uint32_t min_version;
 	bool all_have_sizes;
-} totals = { 0, 0, 0, 0, 0, 0, 0, 0, true };
+} totals = { 0, 0, 0, 0, 0, 0, 0, 0, 0, true };
+
+
+/// Convert XZ Utils version number to a string.
+static const char *
+xz_ver_to_str(uint32_t ver)
+{
+	static char buf[32];
+
+	unsigned int major = ver / 10000000U;
+	ver -= major * 10000000U;
+
+	unsigned int minor = ver / 10000U;
+	ver -= minor * 10000U;
+
+	unsigned int patch = ver / 10U;
+	ver -= patch * 10U;
+
+	const char *stability = ver == 0 ? "alpha" : ver == 1 ? "beta" : "";
+
+	snprintf(buf, sizeof(buf), "%u.%u.%u%s",
+			major, minor, patch, stability);
+	return buf;
+}
 
 
 /// \brief      Parse the Index(es) from the given .xz file
@@ -478,6 +505,21 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter,
 	if (xfi->memusage_max < bhi->memusage)
 		xfi->memusage_max = bhi->memusage;
 
+	// Determine the minimum XZ Utils version that supports this Block.
+	//
+	// Currently the only thing that 5.0.0 doesn't support is empty
+	// LZMA2 Block. This decoder bug was fixed in 5.0.2.
+	{
+		size_t i = 0;
+		while (filters[i + 1].id != LZMA_VLI_UNKNOWN)
+			++i;
+
+		if (filters[i].id == LZMA_FILTER_LZMA2
+				&& iter->block.uncompressed_size == 0
+				&& xfi->min_version < 50000022U)
+			xfi->min_version = 50000022U;
+	}
+
 	// Convert the filter chain to human readable form.
 	message_filters_to_str(bhi->filter_chain, filters, false);
 
@@ -856,6 +898,8 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
 				round_up_to_mib(xfi->memusage_max), 0));
 		printf(_("  Sizes in headers:   %s\n"),
 				xfi->all_have_sizes ? _("Yes") : _("No"));
+		printf(_("  Minimum XZ Utils version: %s\n"),
+				xz_ver_to_str(xfi->min_version));
 	}
 
 	return false;
@@ -938,9 +982,10 @@ print_info_robot(xz_file_info *xfi, file_pair *pair)
 	}
 
 	if (message_verbosity_get() >= V_DEBUG)
-		printf("summary\t%" PRIu64 "\t%s\n",
+		printf("summary\t%" PRIu64 "\t%s\t%" PRIu32 "\n",
 				xfi->memusage_max,
-				xfi->all_have_sizes ? "yes" : "no");
+				xfi->all_have_sizes ? "yes" : "no",
+				xfi->min_version);
 
 	return false;
 }
@@ -961,6 +1006,9 @@ update_totals(const xz_file_info *xfi)
 	if (totals.memusage_max < xfi->memusage_max)
 		totals.memusage_max = xfi->memusage_max;
 
+	if (totals.min_version < xfi->min_version)
+		totals.min_version = xfi->min_version;
+
 	totals.all_have_sizes &= xfi->all_have_sizes;
 
 	return;
@@ -1025,6 +1073,8 @@ print_totals_adv(void)
 				round_up_to_mib(totals.memusage_max), 0));
 		printf(_("  Sizes in headers:   %s\n"),
 				totals.all_have_sizes ? _("Yes") : _("No"));
+		printf(_("  Minimum XZ Utils version: %s\n"),
+				xz_ver_to_str(totals.min_version));
 	}
 
 	return;
@@ -1050,9 +1100,10 @@ print_totals_robot(void)
 			totals.files);
 
 	if (message_verbosity_get() >= V_DEBUG)
-		printf("\t%" PRIu64 "\t%s",
+		printf("\t%" PRIu64 "\t%s\t%" PRIu32,
 				totals.memusage_max,
-				totals.all_have_sizes ? "yes" : "no");
+				totals.all_have_sizes ? "yes" : "no",
+				totals.min_version);
 
 	putchar('\n');
 
diff --git a/src/xz/main.c b/src/xz/main.c
index 8196c6e7e7746..a8f0683a47bd4 100644
--- a/src/xz/main.c
+++ b/src/xz/main.c
@@ -275,6 +275,11 @@ main(int argc, char **argv)
 		list_totals();
 	}
 
+#ifndef NDEBUG
+	coder_free();
+	args_free();
+#endif
+
 	// If we have got a signal, raise it to kill the program instead
 	// of calling tuklib_exit().
 	signals_exit();
diff --git a/src/xz/message.c b/src/xz/message.c
index 0a7a522f7afa7..8a31b00ed89c5 100644
--- a/src/xz/message.c
+++ b/src/xz/message.c
@@ -12,10 +12,6 @@
 
 #include "private.h"
 
-#ifdef HAVE_SYS_TIME_H
-#	include <sys/time.h>
-#endif
-
 #include <stdarg.h>
 
 
@@ -64,9 +60,6 @@ static lzma_stream *progress_strm;
 /// and estimate remaining time.
 static uint64_t expected_in_size;
 
-/// Time when we started processing the file
-static uint64_t start_time;
-
 
 // Use alarm() and SIGALRM when they are supported. This has two minor
 // advantages over the alternative of polling gettimeofday():
@@ -112,16 +105,6 @@ static uint64_t progress_next_update;
 #endif
 
 
-/// Get the current time as microseconds since epoch
-static uint64_t
-my_time(void)
-{
-	struct timeval tv;
-	gettimeofday(&tv, NULL);
-	return (uint64_t)(tv.tv_sec) * UINT64_C(1000000) + tv.tv_usec;
-}
-
-
 extern void
 message_init(void)
 {
@@ -264,11 +247,10 @@ message_progress_start(lzma_stream *strm, uint64_t in_size)
 	// It is needed to find out the position in the stream.
 	progress_strm = strm;
 
-	// Store the processing start time of the file and its expected size.
-	// If we aren't printing any statistics, then these are unused. But
-	// since it is possible that the user sends us a signal to show
-	// statistics, we need to have these available anyway.
-	start_time = my_time();
+	// Store the expected size of the file. If we aren't printing any
+	// statistics, then is will be unused. But since it is possible
+	// that the user sends us a signal to show statistics, we need
+	// to have it available anyway.
 	expected_in_size = in_size;
 
 	// Indicate that progress info may need to be printed before
@@ -290,7 +272,7 @@ message_progress_start(lzma_stream *strm, uint64_t in_size)
 		alarm(1);
 #else
 		progress_needs_updating = true;
-		progress_next_update = 1000000;
+		progress_next_update = 1000;
 #endif
 	}
 
@@ -364,7 +346,7 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
 {
 	// Don't print the speed immediately, since the early values look
 	// somewhat random.
-	if (elapsed < 3000000)
+	if (elapsed < 3000)
 		return "";
 
 	static const char unit[][8] = {
@@ -377,7 +359,7 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
 
 	// Calculate the speed as KiB/s.
 	double speed = (double)(uncompressed_pos)
-			/ ((double)(elapsed) * (1024.0 / 1e6));
+			/ ((double)(elapsed) * (1024.0 / 1000.0));
 
 	// Adjust the unit of the speed if needed.
 	while (speed > 999.0) {
@@ -402,12 +384,12 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
 /// Make a string indicating elapsed or remaining time. The format is either
 /// M:SS or H:MM:SS depending on if the time is an hour or more.
 static const char *
-progress_time(uint64_t useconds)
+progress_time(uint64_t mseconds)
 {
 	// 9999 hours = 416 days
 	static char buf[sizeof("9999:59:59")];
 
-	uint32_t seconds = useconds / 1000000;
+	uint32_t seconds = mseconds / 1000;
 
 	// Don't show anything if the time is zero or ridiculously big.
 	if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59)
@@ -445,14 +427,14 @@ progress_remaining(uint64_t in_pos, uint64_t elapsed)
 	//  - Only a few seconds has passed since we started (de)compressing,
 	//    so estimate would be too inaccurate.
 	if (expected_in_size == 0 || in_pos > expected_in_size
-			|| in_pos < (UINT64_C(1) << 19) || elapsed < 8000000)
+			|| in_pos < (UINT64_C(1) << 19) || elapsed < 8000)
 		return "";
 
 	// Calculate the estimate. Don't give an estimate of zero seconds,
 	// since it is possible that all the input has been already passed
 	// to the library, but there is still quite a bit of output pending.
 	uint32_t remaining = (double)(expected_in_size - in_pos)
-			* ((double)(elapsed) / 1e6) / (double)(in_pos);
+			* ((double)(elapsed) / 1000.0) / (double)(in_pos);
 	if (remaining < 1)
 		remaining = 1;
 
@@ -518,28 +500,26 @@ progress_remaining(uint64_t in_pos, uint64_t elapsed)
 }
 
 
-/// Calculate the elapsed time as microseconds.
-static uint64_t
-progress_elapsed(void)
-{
-	return my_time() - start_time;
-}
-
-
-/// Get information about position in the stream. This is currently simple,
-/// but it will become more complicated once we have multithreading support.
+/// Get how much uncompressed and compressed data has been processed.
 static void
 progress_pos(uint64_t *in_pos,
 		uint64_t *compressed_pos, uint64_t *uncompressed_pos)
 {
-	*in_pos = progress_strm->total_in;
+	uint64_t out_pos;
+	lzma_get_progress(progress_strm, in_pos, &out_pos);
+
+	// It cannot have processed more input than it has been given.
+	assert(*in_pos <= progress_strm->total_in);
+
+	// It cannot have produced more output than it claims to have ready.
+	assert(out_pos >= progress_strm->total_out);
 
 	if (opt_mode == MODE_COMPRESS) {
-		*compressed_pos = progress_strm->total_out;
-		*uncompressed_pos = progress_strm->total_in;
+		*compressed_pos = out_pos;
+		*uncompressed_pos = *in_pos;
 	} else {
-		*compressed_pos = progress_strm->total_in;
-		*uncompressed_pos = progress_strm->total_out;
+		*compressed_pos = *in_pos;
+		*uncompressed_pos = out_pos;
 	}
 
 	return;
@@ -553,13 +533,13 @@ message_progress_update(void)
 		return;
 
 	// Calculate how long we have been processing this file.
-	const uint64_t elapsed = progress_elapsed();
+	const uint64_t elapsed = mytime_get_elapsed();
 
 #ifndef SIGALRM
 	if (progress_next_update > elapsed)
 		return;
 
-	progress_next_update = elapsed + 1000000;
+	progress_next_update = elapsed + 1000;
 #endif
 
 	// Get our current position in the stream.
@@ -652,7 +632,7 @@ progress_flush(bool finished)
 
 	progress_active = false;
 
-	const uint64_t elapsed = progress_elapsed();
+	const uint64_t elapsed = mytime_get_elapsed();
 
 	signals_block();
 
@@ -1122,7 +1102,10 @@ message_help(bool long_help)
 "  -f, --force         force overwrite of output file and (de)compress links\n"
 "  -c, --stdout        write to standard output and don't delete input files"));
 
-	if (long_help)
+	if (long_help) {
+		puts(_(
+"      --single-stream decompress only the first stream, and silently\n"
+"                      ignore possible remaining input data"));
 		puts(_(
 "      --no-sparse     do not create sparse files when decompressing\n"
 "  -S, --suffix=.SUF   use the suffix `.SUF' on compressed files\n"
@@ -1130,6 +1113,7 @@ message_help(bool long_help)
 "                      omitted, filenames are read from the standard input;\n"
 "                      filenames must be terminated with the newline character\n"
 "      --files0[=FILE] like --files but use the null character as terminator"));
+	}
 
 	if (long_help) {
 		puts(_("\n Basic file format and compression options:\n"));
@@ -1138,6 +1122,8 @@ message_help(bool long_help)
 "                      `auto' (default), `xz', `lzma', and `raw'\n"
 "  -C, --check=CHECK   integrity check type: `none' (use with caution),\n"
 "                      `crc32', `crc64' (default), or `sha256'"));
+		puts(_(
+"      --ignore-check  don't verify the integrity check when decompressing"));
 	}
 
 	puts(_(
@@ -1148,7 +1134,25 @@ message_help(bool long_help)
 "  -e, --extreme       try to improve compression ratio by using more CPU time;\n"
 "                      does not affect decompressor memory requirements"));
 
+	puts(_(
+"  -T, --threads=NUM   use at most NUM threads; the default is 1; set to 0\n"
+"                      to use as many threads as there are processor cores"));
+
 	if (long_help) {
+		puts(_(
+"      --block-size=SIZE\n"
+"                      start a new .xz block after every SIZE bytes of input;\n"
+"                      use this to set the block size for threaded compression"));
+		puts(_(
+"      --block-list=SIZES\n"
+"                      start a new .xz block after the given comma-separated\n"
+"                      intervals of uncompressed data"));
+		puts(_(
+"      --flush-timeout=TIMEOUT\n"
+"                      when compressing, if more than TIMEOUT milliseconds has\n"
+"                      passed since the previous flush and reading more input\n"
+"                      would block, all pending data is flushed out"
+		));
 		puts(_( // xgettext:no-c-format
 "      --memlimit-compress=LIMIT\n"
 "      --memlimit-decompress=LIMIT\n"
@@ -1244,5 +1248,10 @@ message_help(bool long_help)
 			PACKAGE_BUGREPORT);
 	printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
 
+#if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE
+	puts(_(
+"THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE."));
+#endif
+
 	tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT);
 }
diff --git a/src/xz/mytime.c b/src/xz/mytime.c
new file mode 100644
index 0000000000000..4be184fd19daf
--- /dev/null
+++ b/src/xz/mytime.c
@@ -0,0 +1,89 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       mytime.c
+/// \brief      Time handling functions
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+#if !(defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC)
+#	include <sys/time.h>
+#endif
+
+uint64_t opt_flush_timeout = 0;
+bool flush_needed;
+
+static uint64_t start_time;
+static uint64_t next_flush;
+
+
+/// \brief      Get the current time as milliseconds
+///
+/// It's relative to some point but not necessarily to the UNIX Epoch.
+static uint64_t
+mytime_now(void)
+{
+	// NOTE: HAVE_DECL_CLOCK_MONOTONIC is always defined to 0 or 1.
+#if defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC
+	// If CLOCK_MONOTONIC was available at compile time but for some
+	// reason isn't at runtime, fallback to CLOCK_REALTIME which
+	// according to POSIX is mandatory for all implementations.
+	static clockid_t clk_id = CLOCK_MONOTONIC;
+	struct timespec tv;
+	while (clock_gettime(clk_id, &tv))
+		clk_id = CLOCK_REALTIME;
+
+	return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_nsec / 1000000;
+#else
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_usec / 1000;
+#endif
+}
+
+
+extern void
+mytime_set_start_time(void)
+{
+	start_time = mytime_now();
+	next_flush = start_time + opt_flush_timeout;
+	flush_needed = false;
+	return;
+}
+
+
+extern uint64_t
+mytime_get_elapsed(void)
+{
+	return mytime_now() - start_time;
+}
+
+
+extern void
+mytime_set_flush_time(void)
+{
+	next_flush = mytime_now() + opt_flush_timeout;
+	flush_needed = false;
+	return;
+}
+
+
+extern int
+mytime_get_flush_timeout(void)
+{
+	if (opt_flush_timeout == 0 || opt_mode != MODE_COMPRESS)
+		return -1;
+
+	const uint64_t now = mytime_now();
+	if (now >= next_flush)
+		return 0;
+
+	const uint64_t remaining = next_flush - now;
+	return remaining > INT_MAX ? INT_MAX : (int)remaining;
+}
diff --git a/src/xz/mytime.h b/src/xz/mytime.h
new file mode 100644
index 0000000000000..ea291eed81c7a
--- /dev/null
+++ b/src/xz/mytime.h
@@ -0,0 +1,47 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       mytime.h
+/// \brief      Time handling functions
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+/// \brief      Number of milliseconds to between LZMA_SYNC_FLUSHes
+///
+/// If 0, timed flushing is disabled. Otherwise if no more input is available
+/// and not at the end of the file and at least opt_flush_timeout milliseconds
+/// has elapsed since the start of compression or the previous flushing
+/// (LZMA_SYNC_FLUSH or LZMA_FULL_FLUSH), set LZMA_SYNC_FLUSH to flush
+/// the pending data.
+extern uint64_t opt_flush_timeout;
+
+
+/// \brief      True when flushing is needed due to expired timeout
+extern bool flush_needed;
+
+
+/// \brief      Store the time when (de)compression was started
+///
+/// The start time is also stored as the time of the first flush.
+extern void mytime_set_start_time(void);
+
+
+/// \brief      Get the number of milliseconds since the operation started
+extern uint64_t mytime_get_elapsed(void);
+
+
+/// \brief      Store the time of when compressor was flushed
+extern void mytime_set_flush_time(void);
+
+
+/// \brief      Get the number of milliseconds until the next flush
+///
+/// This returns -1 if no timed flushing is used.
+///
+/// The return value is inteded for use with poll().
+extern int mytime_get_flush_timeout(void);
diff --git a/src/xz/options.c b/src/xz/options.c
index f21a0ba510651..f9c7ab9e8535b 100644
--- a/src/xz/options.c
+++ b/src/xz/options.c
@@ -31,8 +31,8 @@ typedef struct {
 } option_map;
 
 
-/// Parses option=value pairs that are separated with colons, semicolons,
-/// or commas: opt=val:opt=val;opt=val,opt=val
+/// Parses option=value pairs that are separated with commas:
+/// opt=val,opt=val,opt=val
 ///
 /// Each option is a string, that is converted to an integer using the
 /// index where the option string is in the array.
diff --git a/src/xz/private.h b/src/xz/private.h
index 6b01e51354e1c..4acfa8dc45583 100644
--- a/src/xz/private.h
+++ b/src/xz/private.h
@@ -12,6 +12,7 @@
 
 #include "sysdefs.h"
 #include "mythread.h"
+
 #include "lzma.h"
 
 #include <sys/types.h>
@@ -45,6 +46,7 @@
 #endif
 
 #include "main.h"
+#include "mytime.h"
 #include "coder.h"
 #include "message.h"
 #include "args.h"
diff --git a/src/xz/signals.c b/src/xz/signals.c
index 322811f472b5a..5387c424e1a5a 100644
--- a/src/xz/signals.c
+++ b/src/xz/signals.c
@@ -41,6 +41,11 @@ signal_handler(int sig)
 {
 	exit_signal = sig;
 	user_abort = true;
+
+#ifndef TUKLIB_DOSLIKE
+	io_write_to_user_abort_pipe();
+#endif
+
 	return;
 }
 
diff --git a/src/xz/suffix.c b/src/xz/suffix.c
index 8e331a7022a3c..9d4fcd139b8f7 100644
--- a/src/xz/suffix.c
+++ b/src/xz/suffix.c
@@ -12,6 +12,10 @@
 
 #include "private.h"
 
+#ifdef __DJGPP__
+#	include <fcntl.h>
+#endif
+
 // For case-insensitive filename suffix on case-insensitive systems
 #if defined(TUKLIB_DOSLIKE) || defined(__VMS)
 #	define strcmp strcasecmp
@@ -45,6 +49,31 @@ has_dir_sep(const char *str)
 }
 
 
+#ifdef __DJGPP__
+/// \brief      Test for special suffix used for 8.3 short filenames (SFN)
+///
+/// \return     If str matches *.?- or *.??-, true is returned. Otherwise
+///             false is returned.
+static bool
+has_sfn_suffix(const char *str, size_t len)
+{
+	if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
+			&& !is_dir_sep(str[len - 2])) {
+		// *.?-
+		if (str[len - 3] == '.')
+			return !is_dir_sep(str[len - 4]);
+
+		// *.??-
+		if (len >= 5 && !is_dir_sep(str[len - 3])
+				&& str[len - 4] == '.')
+			return !is_dir_sep(str[len - 5]);
+	}
+
+	return false;
+}
+#endif
+
+
 /// \brief      Checks if src_name has given compressed_suffix
 ///
 /// \param      suffix      Filename suffix to look for
@@ -87,6 +116,9 @@ uncompressed_name(const char *src_name, const size_t src_len)
 		{ ".xz",    "" },
 		{ ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
 		{ ".lzma",  "" },
+#ifdef __DJGPP__
+		{ ".lzm",   "" },
+#endif
 		{ ".tlz",   ".tar" },
 		// { ".gz",    "" },
 		// { ".tgz",   ".tar" },
@@ -112,6 +144,17 @@ uncompressed_name(const char *src_name, const size_t src_len)
 				break;
 			}
 		}
+
+#ifdef __DJGPP__
+		// Support also *.?- -> *.? and *.??- -> *.?? on DOS.
+		// This is done also when long filenames are available
+		// to keep it easy to decompress files created when
+		// long filename support wasn't available.
+		if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
+			new_suffix = "";
+			new_len = src_len - 1;
+		}
+#endif
 	}
 
 	if (new_len == 0 && custom_suffix != NULL)
@@ -134,21 +177,35 @@ uncompressed_name(const char *src_name, const size_t src_len)
 }
 
 
+/// This message is needed in multiple places in compressed_name(),
+/// so the message has been put into its own function.
+static void
+msg_suffix(const char *src_name, const char *suffix)
+{
+	message_warning(_("%s: File already has `%s' suffix, skipping"),
+			src_name, suffix);
+	return;
+}
+
+
 /// \brief      Appends suffix to src_name
 ///
 /// In contrast to uncompressed_name(), we check only suffixes that are valid
 /// for the specified file format.
 static char *
-compressed_name(const char *src_name, const size_t src_len)
+compressed_name(const char *src_name, size_t src_len)
 {
 	// The order of these must match the order in args.h.
-	static const char *const all_suffixes[][3] = {
+	static const char *const all_suffixes[][4] = {
 		{
 			".xz",
 			".txz",
 			NULL
 		}, {
 			".lzma",
+#ifdef __DJGPP__
+			".lzm",
+#endif
 			".tlz",
 			NULL
 /*
@@ -170,20 +227,27 @@ compressed_name(const char *src_name, const size_t src_len)
 	const size_t format = opt_format - 1;
 	const char *const *suffixes = all_suffixes[format];
 
+	// Look for known filename suffixes and refuse to compress them.
 	for (size_t i = 0; suffixes[i] != NULL; ++i) {
 		if (test_suffix(suffixes[i], src_name, src_len) != 0) {
-			message_warning(_("%s: File already has `%s' "
-					"suffix, skipping"), src_name,
-					suffixes[i]);
+			msg_suffix(src_name, suffixes[i]);
 			return NULL;
 		}
 	}
 
+#ifdef __DJGPP__
+	// Recognize also the special suffix that is used when long
+	// filename (LFN) support isn't available. This suffix is
+	// recognized on LFN systems too.
+	if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
+		msg_suffix(src_name, "-");
+		return NULL;
+	}
+#endif
+
 	if (custom_suffix != NULL) {
 		if (test_suffix(custom_suffix, src_name, src_len) != 0) {
-			message_warning(_("%s: File already has `%s' "
-					"suffix, skipping"), src_name,
-					custom_suffix);
+			msg_suffix(src_name, custom_suffix);
 			return NULL;
 		}
 	}
@@ -199,7 +263,101 @@ compressed_name(const char *src_name, const size_t src_len)
 
 	const char *suffix = custom_suffix != NULL
 			? custom_suffix : suffixes[0];
-	const size_t suffix_len = strlen(suffix);
+	size_t suffix_len = strlen(suffix);
+
+#ifdef __DJGPP__
+	if (!_use_lfn(src_name)) {
+		// Long filename (LFN) support isn't available and we are
+		// limited to 8.3 short filenames (SFN).
+		//
+		// Look for suffix separator from the filename, and make sure
+		// that it is in the filename, not in a directory name.
+		const char *sufsep = strrchr(src_name, '.');
+		if (sufsep == NULL || sufsep[1] == '\0'
+				|| has_dir_sep(sufsep)) {
+			// src_name has no filename extension.
+			//
+			// Examples:
+			// xz foo         -> foo.xz
+			// xz -F lzma foo -> foo.lzm
+			// xz -S x foo    -> foox
+			// xz -S x foo.   -> foo.x
+			// xz -S x.y foo  -> foox.y
+			// xz -S .x foo   -> foo.x
+			// xz -S .x foo.  -> foo.x
+			//
+			// Avoid double dots:
+			if (sufsep != NULL && sufsep[1] == '\0'
+					&& suffix[0] == '.')
+				--src_len;
+
+		} else if (custom_suffix == NULL
+				&& strcasecmp(sufsep, ".tar") == 0) {
+			// ".tar" is handled specially.
+			//
+			// Examples:
+			// xz foo.tar          -> foo.txz
+			// xz -F lzma foo.tar  -> foo.tlz
+			static const char *const tar_suffixes[] = {
+				".txz",
+				".tlz",
+				// ".tgz",
+			};
+			suffix = tar_suffixes[format];
+			suffix_len = 4;
+			src_len -= 4;
+
+		} else {
+			if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
+				// Instead of the .xz suffix, use a single
+				// character at the end of the filename
+				// extension. This is to minimize name
+				// conflicts when compressing multiple files
+				// with the same basename. E.g. foo.txt and
+				// foo.exe become foo.tx- and foo.ex-. Dash
+				// is rare as the last character of the
+				// filename extension, so it seems to be
+				// quite safe choice and it stands out better
+				// in directory listings than e.g. x. For
+				// comparison, gzip uses z.
+				suffix = "-";
+				suffix_len = 1;
+			}
+
+			if (suffix[0] == '.') {
+				// The first character of the suffix is a dot.
+				// Throw away the original filename extension
+				// and replace it with the new suffix.
+				//
+				// Examples:
+				// xz -F lzma foo.txt  -> foo.lzm
+				// xz -S .x  foo.txt   -> foo.x
+				src_len = sufsep - src_name;
+
+			} else {
+				// The first character of the suffix is not
+				// a dot. Preserve the first 0-2 characters
+				// of the original filename extension.
+				//
+				// Examples:
+				// xz foo.txt         -> foo.tx-
+				// xz -S x  foo.c     -> foo.cx
+				// xz -S ab foo.c     -> foo.cab
+				// xz -S ab foo.txt   -> foo.tab
+				// xz -S abc foo.txt  -> foo.abc
+				//
+				// Truncate the suffix to three chars:
+				if (suffix_len > 3)
+					suffix_len = 3;
+
+				// If needed, overwrite 1-3 characters.
+				if (strlen(sufsep) > 4 - suffix_len)
+					src_len = sufsep - src_name
+							+ 4 - suffix_len;
+			}
+		}
+	}
+#endif
 
 	char *dest_name = xmalloc(src_len + suffix_len + 1);
 
diff --git a/src/xz/xz.1 b/src/xz/xz.1
index 363b90cb4e551..75aead3d04393 100644
--- a/src/xz/xz.1
+++ b/src/xz/xz.1
@@ -5,7 +5,7 @@
 .\" This file has been put into the public domain.
 .\" You can do whatever you want with this file.
 .\"
-.TH XZ 1 "2013-06-21" "Tukaani" "XZ Utils"
+.TH XZ 1 "2014-12-16" "Tukaani" "XZ Utils"
 .
 .SH NAME
 xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
@@ -435,6 +435,29 @@ standard output instead of a file.
 This implies
 .BR \-\-keep .
 .TP
+.B \-\-single\-stream
+Decompress only the first
+.B .xz
+stream, and
+silently ignore possible remaining input data following the stream.
+Normally such trailing garbage makes
+.B xz
+display an error.
+.IP ""
+.B xz
+never decompresses more than one stream from
+.B .lzma
+files or raw streams, but this option still makes
+.B xz
+ignore the possible trailing data after the
+.B .lzma
+file or raw stream.
+.IP ""
+This option has no effect if the operation mode is not
+.B \-\-decompress
+or
+.BR \-\-test .
+.TP
 .B \-\-no\-sparse
 Disable creation of sparse files.
 By default, if decompressing into a regular file,
@@ -586,6 +609,25 @@ Integrity of the
 headers is always verified with CRC32.
 It is not possible to change or disable it.
 .TP
+.B \-\-ignore\-check
+Don't verify the integrity check of the compressed data when decompressing.
+The CRC32 values in the
+.B .xz
+headers will still be verified normally.
+.IP ""
+.B "Do not use this option unless you know what you are doing."
+Possible reasons to use this option:
+.RS
+.IP \(bu 3
+Trying to recover data from a corrupt .xz file.
+.IP \(bu 3
+Speeding up decompression.
+This matters mostly with SHA-256 or
+with files that have compressed extremely well.
+It's recommended to not use this option for this purpose
+unless the file integrity is verified externally in some other way.
+.RE
+.TP
 .BR \-0 " ... " \-9
 Select a compression preset level.
 The default is
@@ -778,6 +820,124 @@ These are provided only for backwards compatibility
 with LZMA Utils.
 Avoid using these options.
 .TP
+.BI \-\-block\-size= size
+When compressing to the
+.B .xz
+format, split the input data into blocks of
+.I size
+bytes.
+The blocks are compressed independently from each other,
+which helps with multi-threading and
+makes limited random-access decompression possible.
+This option is typically used to override the default
+block size in multi-threaded mode,
+but this option can be used in single-threaded mode too.
+.IP ""
+In multi-threaded mode about three times
+.I size
+bytes will be allocated in each thread for buffering input and output.
+The default
+.I size
+is three times the LZMA2 dictionary size or 1 MiB,
+whichever is more.
+Typically a good value is 2\-4 times
+the size of the LZMA2 dictionary or at least 1 MiB.
+Using
+.I size
+less than the LZMA2 dictionary size is waste of RAM
+because then the LZMA2 dictionary buffer will never get fully used.
+The sizes of the blocks are stored in the block headers,
+which a future version of
+.B xz
+will use for multi-threaded decompression.
+.IP ""
+In single-threaded mode no block splitting is done by default.
+Setting this option doesn't affect memory usage.
+No size information is stored in block headers,
+thus files created in single-threaded mode
+won't be identical to files created in multi-threaded mode.
+The lack of size information also means that a future version of
+.B xz
+won't be able decompress the files in multi-threaded mode.
+.TP
+.BI \-\-block\-list= sizes
+When compressing to the
+.B .xz
+format, start a new block after
+the given intervals of uncompressed data.
+.IP ""
+The uncompressed
+.I sizes
+of the blocks are specified as a comma-separated list.
+Omitting a size (two or more consecutive commas) is a shorthand
+to use the size of the previous block.
+.IP ""
+If the input file is bigger than the sum of
+.IR sizes ,
+the last value in
+.I sizes
+is repeated until the end of the file.
+A special value of
+.B 0
+may be used as the last value to indicate that
+the rest of the file should be encoded as a single block.
+.IP ""
+If one specifies
+.I sizes
+that exceed the encoder's block size
+(either the default value in threaded mode or
+the value specified with \fB\-\-block\-size=\fIsize\fR),
+the encoder will create additional blocks while
+keeping the boundaries specified in
+.IR sizes .
+For example, if one specifies
+.B \-\-block\-size=10MiB
+.B \-\-block\-list=5MiB,10MiB,8MiB,12MiB,24MiB
+and the input file is 80 MiB,
+one will get 11 blocks:
+5, 10, 8, 10, 2, 10, 10, 4, 10, 10, and 1 MiB.
+.IP ""
+In multi-threaded mode the sizes of the blocks
+are stored in the block headers.
+This isn't done in single-threaded mode,
+so the encoded output won't be
+identical to that of the multi-threaded mode.
+.TP
+.BI \-\-flush\-timeout= timeout
+When compressing, if more than
+.I timeout
+milliseconds (a positive integer) has passed since the previous flush and
+reading more input would block,
+all the pending input data is flushed from the encoder and
+made available in the output stream.
+This can be useful if
+.B xz
+is used to compress data that is streamed over a network.
+Small
+.I timeout
+values make the data available at the receiving end
+with a small delay, but large
+.I timeout
+values give better compression ratio.
+.IP ""
+This feature is disabled by default.
+If this option is specified more than once, the last one takes effect.
+The special
+.I timeout
+value of
+.B 0
+can be used to explicitly disable this feature.
+.IP ""
+This feature is not available on non-POSIX systems.
+.IP ""
+.\" FIXME
+.B "This feature is still experimental."
+Currently
+.B xz
+is unsuitable for decompressing the stream in real time due to how
+.B xz
+does buffering.
+.TP
 .BI \-\-memlimit\-compress= limit
 Set a memory usage limit for compression.
 If this option is specified multiple times,
@@ -876,24 +1036,25 @@ Automatic adjusting is always disabled when creating raw streams
 .TP
 \fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads
 Specify the number of worker threads to use.
+Setting
+.I threads
+to a special value
+.B 0
+makes
+.B xz
+use as many threads as there are CPU cores on the system.
 The actual number of threads can be less than
 .I threads
+if the input file is not big enough
+for threading with the given settings or
 if using more threads would exceed the memory usage limit.
 .IP ""
-.B "Multithreaded compression and decompression are not"
-.B "implemented yet, so this option has no effect for now."
-.IP ""
-.B "As of writing (2010-09-27), it hasn't been decided"
-.B "if threads will be used by default on multicore systems"
-.B "once support for threading has been implemented."
-.B "Comments are welcome."
-The complicating factor is that using many threads
-will increase the memory usage dramatically.
-Note that if multithreading will be the default,
-it will probably be done so that single-threaded and
-multithreaded modes produce the same output,
-so compression ratio won't be significantly affected
-if threading will be enabled by default.
+Currently the only threading method is to split the input into
+blocks and compress them independently from each other.
+The default block size depends on the compression level and
+can be overriden with the
+.BI \-\-block\-size= size
+option.
 .
 .SS "Custom compressor filter chains"
 A custom filter chain allows specifying
@@ -1863,6 +2024,14 @@ or
 .B no
 indicating if all block headers have both compressed size and
 uncompressed size stored in them
+.PP
+.I Since
+.B xz
+.I 5.1.2alpha:
+.IP 4. 4
+Minimum
+.B xz
+version required to decompress the file
 .RE
 .PD
 .PP
@@ -1913,6 +2082,14 @@ or
 .B no
 indicating if all block headers have both compressed size and
 uncompressed size stored in them
+.PP
+.I Since
+.B xz
+.I 5.1.2alpha:
+.IP 12. 4
+Minimum
+.B xz
+version required to decompress the file
 .RE
 .PD
 .PP
@@ -2173,7 +2350,9 @@ If there is data left after the first
 .B .lzma
 stream,
 .B xz
-considers the file to be corrupt.
+considers the file to be corrupt unless
+.B \-\-single\-stream
+was used.
 This may break obscure scripts which have
 assumed that trailing garbage is ignored.
 .