14 files changed, 140 insertions, 84 deletions
diff --git a/src/xz/args.c b/src/xz/args.c
index 341f29e1b0e9..9238fb32ec00 100644
--- a/src/xz/args.c
+++ b/src/xz/args.c
@@ -88,7 +88,7 @@ parse_block_list(char *str)
 			// There is no string, that is, a comma follows
 			// another comma. Use the previous value.
 			//
-			// NOTE: We checked earler that the first char
+			// NOTE: We checked earlier that the first char
 			// of the whole list cannot be a comma.
 			assert(i > 0);
 			opt_block_list[i] = opt_block_list[i - 1];
@@ -218,7 +218,7 @@ parse_real(args_info *args, int argc, char **argv)
 		// Compression preset (also for decompression if --format=raw)
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
-			coder_set_preset(c - '0');
+			coder_set_preset((uint32_t)(c - '0'));
 			break;
 
 		// --memlimit-compress
@@ -683,7 +683,7 @@ args_parse(args_info *args, int argc, char **argv)
 		// We got at least one filename from the command line, or
 		// --files or --files0 was specified.
 		args->arg_names = argv + optind;
-		args->arg_count = argc - optind;
+		args->arg_count = (unsigned int)(argc - optind);
 	}
 
 	return;
diff --git a/src/xz/coder.c b/src/xz/coder.c
index 3c6a01cbbdbe..85f954393d8b 100644
--- a/src/xz/coder.c
+++ b/src/xz/coder.c
@@ -612,6 +612,20 @@ split_block(uint64_t *block_remaining,
 }
 
 
+static bool
+coder_write_output(file_pair *pair)
+{
+	if (opt_mode != MODE_TEST) {
+		if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out))
+			return true;
+	}
+
+	strm.next_out = out_buf.u8;
+	strm.avail_out = IO_BUFFER_SIZE;
+	return false;
+}
+
+
 /// Compress or decompress using liblzma.
 static bool
 coder_normal(file_pair *pair)
@@ -635,7 +649,7 @@ coder_normal(file_pair *pair)
 	// only a single block is created.
 	uint64_t block_remaining = UINT64_MAX;
 
-	// next_block_remining for when we are in single-threaded mode and
+	// next_block_remaining for when we are in single-threaded mode and
 	// the Block in --block-list is larger than the --block-size=SIZE.
 	uint64_t next_block_remaining = 0;
 
@@ -697,7 +711,7 @@ coder_normal(file_pair *pair)
 					action = LZMA_FULL_BARRIER;
 			}
 
-			if (action == LZMA_RUN && flush_needed)
+			if (action == LZMA_RUN && pair->flush_needed)
 				action = LZMA_SYNC_FLUSH;
 		}
 
@@ -706,29 +720,23 @@ coder_normal(file_pair *pair)
 
 		// Write out if the output buffer became full.
 		if (strm.avail_out == 0) {
-			if (opt_mode != MODE_TEST && io_write(pair, &out_buf,
-					IO_BUFFER_SIZE - strm.avail_out))
+			if (coder_write_output(pair))
 				break;
-
-			strm.next_out = out_buf.u8;
-			strm.avail_out = IO_BUFFER_SIZE;
 		}
 
 		if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
 				|| action == LZMA_FULL_BARRIER)) {
 			if (action == LZMA_SYNC_FLUSH) {
 				// Flushing completed. Write the pending data
-				// out immediatelly so that the reading side
+				// out immediately so that the reading side
 				// can decompress everything compressed so far.
-				if (io_write(pair, &out_buf, IO_BUFFER_SIZE
-						- strm.avail_out))
+				if (coder_write_output(pair))
 					break;
 
-				strm.next_out = out_buf.u8;
-				strm.avail_out = IO_BUFFER_SIZE;
-
-				// Set the time of the most recent flushing.
-				mytime_set_flush_time();
+				// Mark that we haven't seen any new input
+				// since the previous flush.
+				pair->src_has_seen_input = false;
+				pair->flush_needed = false;
 			} else {
 				// Start a new Block after LZMA_FULL_BARRIER.
 				if (opt_block_list == NULL) {
@@ -758,9 +766,7 @@ coder_normal(file_pair *pair)
 				// as much data as possible, which can be good
 				// when trying to get at least some useful
 				// data out of damaged files.
-				if (opt_mode != MODE_TEST && io_write(pair,
-						&out_buf, IO_BUFFER_SIZE
-							- strm.avail_out))
+				if (coder_write_output(pair))
 					break;
 			}
 
@@ -897,21 +903,23 @@ coder_run(const char *filename)
 			// is used.
 			if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
 				// Remember the current time. It is needed
-				// for progress indicator and for timed
-				// flushing.
+				// for progress indicator.
 				mytime_set_start_time();
 
 				// Initialize the progress indicator.
+				const bool is_passthru = init_ret
+						== CODER_INIT_PASSTHRU;
 				const uint64_t in_size
-						= pair->src_st.st_size <= 0
-						? 0 : pair->src_st.st_size;
-				message_progress_start(&strm, in_size);
+					= pair->src_st.st_size <= 0
+					? 0 : (uint64_t)(pair->src_st.st_size);
+				message_progress_start(&strm,
+						is_passthru, in_size);
 
 				// Do the actual coding or passthru.
-				if (init_ret == CODER_INIT_NORMAL)
-					success = coder_normal(pair);
-				else
+				if (is_passthru)
 					success = coder_passthru(pair);
+				else
+					success = coder_normal(pair);
 
 				message_progress_end(success);
 			}
diff --git a/src/xz/file_io.c b/src/xz/file_io.c
index 041bed88e006..0ba8db8fbc4c 100644
--- a/src/xz/file_io.c
+++ b/src/xz/file_io.c
@@ -170,8 +170,11 @@ static void
 io_sandbox_enter(int src_fd)
 {
 	if (!sandbox_allowed) {
-		message(V_DEBUG, _("Sandbox is disabled due "
-				"to incompatible command line arguments"));
+		// This message is more often annoying than useful so
+		// it's commented out. It can be useful when developing
+		// the sandboxing code.
+		//message(V_DEBUG, _("Sandbox is disabled due "
+		//		"to incompatible command line arguments"));
 		return;
 	}
 
@@ -213,7 +216,8 @@ io_sandbox_enter(int src_fd)
 #	error ENABLE_SANDBOX is defined but no sandboxing method was found.
 #endif
 
-	message(V_DEBUG, _("Sandbox was successfully enabled"));
+	// This message is annoying in xz -lvv.
+	//message(V_DEBUG, _("Sandbox was successfully enabled"));
 	return;
 
 error:
@@ -266,11 +270,8 @@ io_wait(file_pair *pair, int timeout, bool is_reading)
 			return IO_WAIT_ERROR;
 		}
 
-		if (ret == 0) {
-			assert(opt_flush_timeout != 0);
-			flush_needed = true;
+		if (ret == 0)
 			return IO_WAIT_TIMEOUT;
-		}
 
 		if (pfd[0].revents != 0)
 			return IO_WAIT_MORE;
@@ -360,13 +361,14 @@ io_copy_attrs(const file_pair *pair)
 	// Try changing the owner of the file. If we aren't root or the owner
 	// isn't already us, fchown() probably doesn't succeed. We warn
 	// about failing fchown() only if we are root.
-	if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown)
+	if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1))
+			&& warn_fchown)
 		message_warning(_("%s: Cannot set the file owner: %s"),
 				pair->dest_name, strerror(errno));
 
 	mode_t mode;
 
-	if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) {
+	if (fchown(pair->dest_fd, (uid_t)(-1), pair->src_st.st_gid)) {
 		message_warning(_("%s: Cannot set the file group: %s"),
 				pair->dest_name, strerror(errno));
 		// We can still safely copy some additional permissions:
@@ -751,6 +753,8 @@ io_open_src(const char *src_name)
 		.src_fd = -1,
 		.dest_fd = -1,
 		.src_eof = false,
+		.src_has_seen_input = false,
+		.flush_needed = false,
 		.dest_try_sparse = false,
 		.dest_pending_sparse = 0,
 	};
@@ -1109,16 +1113,16 @@ io_fix_src_pos(file_pair *pair, size_t rewind_size)
 
 
 extern size_t
-io_read(file_pair *pair, io_buf *buf_union, size_t size)
+io_read(file_pair *pair, io_buf *buf, size_t size)
 {
 	// We use small buffers here.
 	assert(size < SSIZE_MAX);
 
-	uint8_t *buf = buf_union->u8;
-	size_t left = size;
+	size_t pos = 0;
 
-	while (left > 0) {
-		const ssize_t amount = read(pair->src_fd, buf, left);
+	while (pos < size) {
+		const ssize_t amount = read(
+				pair->src_fd, buf->u8 + pos, size - pos);
 
 		if (amount == 0) {
 			pair->src_eof = true;
@@ -1135,10 +1139,15 @@ io_read(file_pair *pair, io_buf *buf_union, size_t size)
 
 #ifndef TUKLIB_DOSLIKE
 			if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
-				const io_wait_ret ret = io_wait(pair,
-						mytime_get_flush_timeout(),
-						true);
-				switch (ret) {
+				// Disable the flush-timeout if no input has
+				// been seen since the previous flush and thus
+				// there would be nothing to flush after the
+				// timeout expires (avoids busy waiting).
+				const int timeout = pair->src_has_seen_input
+						? mytime_get_flush_timeout()
+						: -1;
+
+				switch (io_wait(pair, timeout, true)) {
 				case IO_WAIT_MORE:
 					continue;
 
@@ -1146,7 +1155,8 @@ io_read(file_pair *pair, io_buf *buf_union, size_t size)
 					return SIZE_MAX;
 
 				case IO_WAIT_TIMEOUT:
-					return size - left;
+					pair->flush_needed = true;
+					return pos;
 
 				default:
 					message_bug();
@@ -1160,11 +1170,15 @@ io_read(file_pair *pair, io_buf *buf_union, size_t size)
 			return SIZE_MAX;
 		}
 
-		buf += (size_t)(amount);
-		left -= (size_t)(amount);
+		pos += (size_t)(amount);
+
+		if (!pair->src_has_seen_input) {
+			pair->src_has_seen_input = true;
+			mytime_set_flush_time();
+		}
 	}
 
-	return size - left;
+	return pos;
 }
 
 
@@ -1272,8 +1286,15 @@ io_write(file_pair *pair, const io_buf *buf, size_t size)
 		// if the file ends with sparse block, we must also return
 		// if size == 0 to avoid doing the lseek().
 		if (size == IO_BUFFER_SIZE) {
-			if (is_sparse(buf)) {
-				pair->dest_pending_sparse += size;
+			// Even if the block was sparse, treat it as non-sparse
+			// if the pending sparse amount is large compared to
+			// the size of off_t. In practice this only matters
+			// on 32-bit systems where off_t isn't always 64 bits.
+			const off_t pending_max
+				= (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2);
+			if (is_sparse(buf) && pair->dest_pending_sparse
+					< pending_max) {
+				pair->dest_pending_sparse += (off_t)(size);
 				return false;
 			}
 		} else if (size == 0) {
diff --git a/src/xz/file_io.h b/src/xz/file_io.h
index 6722aef84092..c533d641badb 100644
--- a/src/xz/file_io.h
+++ b/src/xz/file_io.h
@@ -20,7 +20,10 @@
 
 
 /// is_sparse() accesses the buffer as uint64_t for maximum speed.
-/// Use an union to make sure that the buffer is properly aligned.
+/// The u32 and u64 members must only be access through this union
+/// to avoid strict aliasing violations. Taking a pointer of u8
+/// should be fine as long as uint8_t maps to unsigned char which
+/// can alias anything.
 typedef union {
 	uint8_t u8[IO_BUFFER_SIZE];
 	uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
@@ -46,6 +49,13 @@ typedef struct {
 	/// True once end of the source file has been detected.
 	bool src_eof;
 
+	/// For --flush-timeout: True if at least one byte has been read
+	/// since the previous flush or the start of the file.
+	bool src_has_seen_input;
+
+	/// For --flush-timeout: True when flushing is needed.
+	bool flush_needed;
+
 	/// If true, we look for long chunks of zeros and try to create
 	/// a sparse file.
 	bool dest_try_sparse;
diff --git a/src/xz/main.c b/src/xz/main.c
index af550c4585d9..ca8a4680b688 100644
--- a/src/xz/main.c
+++ b/src/xz/main.c
@@ -159,7 +159,7 @@ main(int argc, char **argv)
 	// Initialize handling of error/warning/other messages.
 	message_init();
 
-	// Set hardware-dependent default values. These can be overriden
+	// Set hardware-dependent default values. These can be overridden
 	// on the command line, thus this must be done before args_parse().
 	hardware_init();
 
@@ -326,5 +326,5 @@ main(int argc, char **argv)
 	if (es == E_WARNING && no_warn)
 		es = E_SUCCESS;
 
-	tuklib_exit(es, E_ERROR, message_verbosity_get() != V_SILENT);
+	tuklib_exit((int)es, E_ERROR, message_verbosity_get() != V_SILENT);
 }
diff --git a/src/xz/message.c b/src/xz/message.c
index f88c1231e7d5..aa915d2d60d1 100644
--- a/src/xz/message.c
+++ b/src/xz/message.c
@@ -56,6 +56,11 @@ static bool progress_active = false;
 /// Pointer to lzma_stream used to do the encoding or decoding.
 static lzma_stream *progress_strm;
 
+/// This is true if we are in passthru mode (not actually compressing or
+/// decompressing) and thus cannot use lzma_get_progress(progress_strm, ...).
+/// That is, we are using coder_passthru() in coder.c.
+static bool progress_is_from_passthru;
+
 /// Expected size of the input stream is needed to show completion percentage
 /// and estimate remaining time.
 static uint64_t expected_in_size;
@@ -241,11 +246,12 @@ message_filename(const char *src_name)
 
 
 extern void
-message_progress_start(lzma_stream *strm, uint64_t in_size)
+message_progress_start(lzma_stream *strm, bool is_passthru, uint64_t in_size)
 {
 	// Store the pointer to the lzma_stream used to do the coding.
 	// It is needed to find out the position in the stream.
 	progress_strm = strm;
+	progress_is_from_passthru = is_passthru;
 
 	// Store the expected size of the file. If we aren't printing any
 	// statistics, then is will be unused. But since it is possible
@@ -434,8 +440,8 @@ progress_remaining(uint64_t in_pos, uint64_t elapsed)
 	// Calculate the estimate. Don't give an estimate of zero seconds,
 	// since it is possible that all the input has been already passed
 	// to the library, but there is still quite a bit of output pending.
-	uint32_t remaining = (double)(expected_in_size - in_pos)
-			* ((double)(elapsed) / 1000.0) / (double)(in_pos);
+	uint32_t remaining = (uint32_t)((double)(expected_in_size - in_pos)
+			* ((double)(elapsed) / 1000.0) / (double)(in_pos));
 	if (remaining < 1)
 		remaining = 1;
 
@@ -507,7 +513,15 @@ progress_pos(uint64_t *in_pos,
 		uint64_t *compressed_pos, uint64_t *uncompressed_pos)
 {
 	uint64_t out_pos;
-	lzma_get_progress(progress_strm, in_pos, &out_pos);
+	if (progress_is_from_passthru) {
+		// In passthru mode the progress info is in total_in/out but
+		// the *progress_strm itself isn't initialized and thus we
+		// cannot use lzma_get_progress().
+		*in_pos = progress_strm->total_in;
+		out_pos = progress_strm->total_out;
+	} else {
+		lzma_get_progress(progress_strm, in_pos, &out_pos);
+	}
 
 	// It cannot have processed more input than it has been given.
 	assert(*in_pos <= progress_strm->total_in);
diff --git a/src/xz/message.h b/src/xz/message.h
index 74599bd978a1..894ac7835f71 100644
--- a/src/xz/message.h
+++ b/src/xz/message.h
@@ -150,7 +150,8 @@ extern void message_filename(const char *src_name);
 /// \param      strm      Pointer to lzma_stream used for the coding.
 /// \param      in_size   Size of the input file, or zero if unknown.
 ///
-extern void message_progress_start(lzma_stream *strm, uint64_t in_size);
+extern void message_progress_start(lzma_stream *strm,
+		bool is_passthru, uint64_t in_size);
 
 
 /// Update the progress info if in verbose mode and enough time has passed
diff --git a/src/xz/mytime.c b/src/xz/mytime.c
index 4be184fd19da..70444001bdd7 100644
--- a/src/xz/mytime.c
+++ b/src/xz/mytime.c
@@ -17,7 +17,6 @@
 #endif
 
 uint64_t opt_flush_timeout = 0;
-bool flush_needed;
 
 static uint64_t start_time;
 static uint64_t next_flush;
@@ -39,11 +38,11 @@ mytime_now(void)
 	while (clock_gettime(clk_id, &tv))
 		clk_id = CLOCK_REALTIME;
 
-	return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_nsec / 1000000;
+	return (uint64_t)tv.tv_sec * 1000 + (uint64_t)(tv.tv_nsec / 1000000);
 #else
 	struct timeval tv;
 	gettimeofday(&tv, NULL);
-	return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_usec / 1000;
+	return (uint64_t)tv.tv_sec * 1000 + (uint64_t)(tv.tv_usec / 1000);
 #endif
 }
 
@@ -52,8 +51,6 @@ extern void
 mytime_set_start_time(void)
 {
 	start_time = mytime_now();
-	next_flush = start_time + opt_flush_timeout;
-	flush_needed = false;
 	return;
 }
 
@@ -69,7 +66,6 @@ extern void
 mytime_set_flush_time(void)
 {
 	next_flush = mytime_now() + opt_flush_timeout;
-	flush_needed = false;
 	return;
 }
 
diff --git a/src/xz/mytime.h b/src/xz/mytime.h
index ea291eed81c7..a7be2aa7ca62 100644
--- a/src/xz/mytime.h
+++ b/src/xz/mytime.h
@@ -21,10 +21,6 @@
 extern uint64_t opt_flush_timeout;
 
 
-/// \brief      True when flushing is needed due to expired timeout
-extern bool flush_needed;
-
-
 /// \brief      Store the time when (de)compression was started
 ///
 /// The start time is also stored as the time of the first flush.
@@ -43,5 +39,5 @@ extern void mytime_set_flush_time(void);
 ///
 /// This returns -1 if no timed flushing is used.
 ///
-/// The return value is inteded for use with poll().
+/// The return value is intended for use with poll().
 extern int mytime_get_flush_timeout(void);
diff --git a/src/xz/options.c b/src/xz/options.c
index de05364ba1b0..0c1ee221b22e 100644
--- a/src/xz/options.c
+++ b/src/xz/options.c
@@ -258,7 +258,7 @@ set_lzma(void *options, unsigned key, uint64_t value, const char *valuestr)
 		if (valuestr[0] < '0' || valuestr[0] > '9')
 			error_lzma_preset(valuestr);
 
-		uint32_t preset = valuestr[0] - '0';
+		uint32_t preset = (uint32_t)(valuestr[0] - '0');
 
 		// Currently only "e" is supported as a modifier,
 		// so keep this simple for now.
diff --git a/src/xz/private.h b/src/xz/private.h
index e61563ac72af..d97c22cc6674 100644
--- a/src/xz/private.h
+++ b/src/xz/private.h
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 //
 /// \file       private.h
-/// \brief      Common includes, definions, and prototypes
+/// \brief      Common includes, definitions, and prototypes
 //
 //  Author:     Lasse Collin
 //
diff --git a/src/xz/signals.c b/src/xz/signals.c
index 5387c424e1a5..7aef463c7570 100644
--- a/src/xz/signals.c
+++ b/src/xz/signals.c
@@ -23,7 +23,7 @@ volatile sig_atomic_t user_abort = false;
 /// been done.
 static volatile sig_atomic_t exit_signal = 0;
 
-/// Mask of signals for which have have established a signal handler to set
+/// Mask of signals for which we have established a signal handler to set
 /// user_abort to true.
 static sigset_t hooked_signals;
 
@@ -152,7 +152,7 @@ signals_unblock(void)
 extern void
 signals_exit(void)
 {
-	const int sig = exit_signal;
+	const int sig = (int)exit_signal;
 
 	if (sig != 0) {
 #if defined(TUKLIB_DOSLIKE) || defined(__VMS)
@@ -166,7 +166,7 @@ signals_exit(void)
 		sigfillset(&sa.sa_mask);
 		sa.sa_flags = 0;
 		sigaction(sig, &sa, NULL);
-		raise(exit_signal);
+		raise(sig);
 #endif
 	}
 
diff --git a/src/xz/util.c b/src/xz/util.c
index 35850f4c9046..a1339f4fdf3c 100644
--- a/src/xz/util.c
+++ b/src/xz/util.c
@@ -79,7 +79,7 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
 		result *= 10;
 
 		// Another overflow check
-		const uint32_t add = *value - '0';
+		const uint32_t add = (uint32_t)(*value - '0');
 		if (UINT64_MAX - add < result)
 			goto error;
 
@@ -142,14 +142,24 @@ round_up_to_mib(uint64_t n)
 }
 
 
-/// Check if thousand separator is supported. Run-time checking is easiest,
-/// because it seems to be sometimes lacking even on POSIXish system.
+/// Check if thousands separator is supported. Run-time checking is easiest
+/// because it seems to be sometimes lacking even on a POSIXish system.
+/// Note that trying to use thousands separators when snprintf() doesn't
+/// support them results in undefined behavior. This just has happened to
+/// work well enough in practice.
+///
+/// DJGPP 2.05 added support for thousands separators but it's broken
+/// at least under WinXP with Finnish locale that uses a non-breaking space
+/// as the thousands separator. Workaround by disabling thousands separators
+/// for DJGPP builds.
 static void
 check_thousand_sep(uint32_t slot)
 {
 	if (thousand == UNKNOWN) {
 		bufs[slot][0] = '\0';
+#ifndef __DJGPP__
 		snprintf(bufs[slot], sizeof(bufs[slot]), "%'u", 1U);
+#endif
 		thousand = bufs[slot][0] == '1' ? WORKS : BROKEN;
 	}
 
@@ -243,7 +253,7 @@ my_snprintf(char **pos, size_t *left, const char *fmt, ...)
 		*left = 0;
 	} else {
 		*pos += len;
-		*left -= len;
+		*left -= (size_t)(len);
 	}
 
 	return;
diff --git a/src/xz/xz.1 b/src/xz/xz.1
index a4aaa0027ecb..540d1364dccb 100644
--- a/src/xz/xz.1
+++ b/src/xz/xz.1
@@ -5,7 +5,7 @@
 .\" This file has been put into the public domain.
 .\" You can do whatever you want with this file.
 .\"
-.TH XZ 1 "2017-04-19" "Tukaani" "XZ Utils"
+.TH XZ 1 "2020-02-01" "Tukaani" "XZ Utils"
 .
 .SH NAME
 xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
@@ -1071,7 +1071,7 @@ if using more threads would exceed the memory usage limit.
 Currently the only threading method is to split the input into
 blocks and compress them independently from each other.
 The default block size depends on the compression level and
-can be overriden with the
+can be overridden with the
 .BI \-\-block\-size= size
 option.
 .IP ""
@@ -1570,7 +1570,7 @@ The old BCJ filters will still be useful in embedded systems,
 because the decoder of the new filter will be bigger
 and use more memory.
 .IP ""
-Different instruction sets have have different alignment:
+Different instruction sets have different alignment:
 .RS
 .RS
 .PP