115 files changed, 5024 insertions, 942 deletions
diff --git a/src/common/mythread.h b/src/common/mythread.h
index 476c2fc9e103..be22654240aa 100644
--- a/src/common/mythread.h
+++ b/src/common/mythread.h
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 //
 /// \file       mythread.h
-/// \brief      Wrappers for threads
+/// \brief      Some threading related helper macros and functions
 //
 //  Author:     Lasse Collin
 //
@@ -10,33 +10,512 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
+#ifndef MYTHREAD_H
+#define MYTHREAD_H
+
 #include "sysdefs.h"
 
+// If any type of threading is enabled, #define MYTHREAD_ENABLED.
+#if defined(MYTHREAD_POSIX) || defined(MYTHREAD_WIN95) \
+		|| defined(MYTHREAD_VISTA)
+#	define MYTHREAD_ENABLED 1
+#endif
+
+
+#ifdef MYTHREAD_ENABLED
+
+////////////////////////////////////////
+// Shared between all threading types //
+////////////////////////////////////////
+
+// Locks a mutex for a duration of a block.
+//
+// Perform mythread_mutex_lock(&mutex) in the beginning of a block
+// and mythread_mutex_unlock(&mutex) at the end of the block. "break"
+// may be used to unlock the mutex and jump out of the block.
+// mythread_sync blocks may be nested.
+//
+// Example:
+//
+//     mythread_sync(mutex) {
+//         foo();
+//         if (some_error)
+//             break; // Skips bar()
+//         bar();
+//     }
+//
+// At least GCC optimizes the loops completely away so it doesn't slow
+// things down at all compared to plain mythread_mutex_lock(&mutex)
+// and mythread_mutex_unlock(&mutex) calls.
+//
+#define mythread_sync(mutex) mythread_sync_helper1(mutex, __LINE__)
+#define mythread_sync_helper1(mutex, line) mythread_sync_helper2(mutex, line)
+#define mythread_sync_helper2(mutex, line) \
+	for (unsigned int mythread_i_ ## line = 0; \
+			mythread_i_ ## line \
+				? (mythread_mutex_unlock(&(mutex)), 0) \
+				: (mythread_mutex_lock(&(mutex)), 1); \
+			mythread_i_ ## line = 1) \
+		for (unsigned int mythread_j_ ## line = 0; \
+				!mythread_j_ ## line; \
+				mythread_j_ ## line = 1)
+#endif
+
+
+#if !defined(MYTHREAD_ENABLED)
+
+//////////////////
+// No threading //
+//////////////////
+
+// Calls the given function once. This isn't thread safe.
+#define mythread_once(func) \
+do { \
+	static bool once_ = false; \
+	if (!once_) { \
+		func(); \
+		once_ = true; \
+	} \
+} while (0)
+
+
+#if !(defined(_WIN32) && !defined(__CYGWIN__))
+// Use sigprocmask() to set the signal mask in single-threaded programs.
+#include <signal.h>
+
+static inline void
+mythread_sigmask(int how, const sigset_t *restrict set,
+		sigset_t *restrict oset)
+{
+	int ret = sigprocmask(how, set, oset);
+	assert(ret == 0);
+	(void)ret;
+}
+#endif
+
+
+#elif defined(MYTHREAD_POSIX)
+
+////////////////////
+// Using pthreads //
+////////////////////
+
+#include <sys/time.h>
+#include <pthread.h>
+#include <signal.h>
+#include <time.h>
+#include <errno.h>
+
+#define MYTHREAD_RET_TYPE void *
+#define MYTHREAD_RET_VALUE NULL
+
+typedef pthread_t mythread;
+typedef pthread_mutex_t mythread_mutex;
+
+typedef struct {
+	pthread_cond_t cond;
+#ifdef HAVE_CLOCK_GETTIME
+	// Clock ID (CLOCK_REALTIME or CLOCK_MONOTONIC) associated with
+	// the condition variable.
+	clockid_t clk_id;
+#endif
+} mythread_cond;
+
+typedef struct timespec mythread_condtime;
 
-#ifdef HAVE_PTHREAD
-#	include <pthread.h>
 
-#	define mythread_once(func) \
+// Calls the given function once in a thread-safe way.
+#define mythread_once(func) \
 	do { \
 		static pthread_once_t once_ = PTHREAD_ONCE_INIT; \
 		pthread_once(&once_, &func); \
 	} while (0)
 
-#	define mythread_sigmask(how, set, oset) \
-		pthread_sigmask(how, set, oset)
 
+// Use pthread_sigmask() to set the signal mask in multi-threaded programs.
+// Do nothing on OpenVMS since it lacks pthread_sigmask().
+static inline void
+mythread_sigmask(int how, const sigset_t *restrict set,
+		sigset_t *restrict oset)
+{
+#ifdef __VMS
+	(void)how;
+	(void)set;
+	(void)oset;
+#else
+	int ret = pthread_sigmask(how, set, oset);
+	assert(ret == 0);
+	(void)ret;
+#endif
+}
+
+
+// Creates a new thread with all signals blocked. Returns zero on success
+// and non-zero on error.
+static inline int
+mythread_create(mythread *thread, void *(*func)(void *arg), void *arg)
+{
+	sigset_t old;
+	sigset_t all;
+	sigfillset(&all);
+
+	mythread_sigmask(SIG_SETMASK, &all, &old);
+	const int ret = pthread_create(thread, NULL, func, arg);
+	mythread_sigmask(SIG_SETMASK, &old, NULL);
+
+	return ret;
+}
+
+// Joins a thread. Returns zero on success and non-zero on error.
+static inline int
+mythread_join(mythread thread)
+{
+	return pthread_join(thread, NULL);
+}
+
+
+// Initiatlizes a mutex. Returns zero on success and non-zero on error.
+static inline int
+mythread_mutex_init(mythread_mutex *mutex)
+{
+	return pthread_mutex_init(mutex, NULL);
+}
+
+static inline void
+mythread_mutex_destroy(mythread_mutex *mutex)
+{
+	int ret = pthread_mutex_destroy(mutex);
+	assert(ret == 0);
+	(void)ret;
+}
+
+static inline void
+mythread_mutex_lock(mythread_mutex *mutex)
+{
+	int ret = pthread_mutex_lock(mutex);
+	assert(ret == 0);
+	(void)ret;
+}
+
+static inline void
+mythread_mutex_unlock(mythread_mutex *mutex)
+{
+	int ret = pthread_mutex_unlock(mutex);
+	assert(ret == 0);
+	(void)ret;
+}
+
+
+// Initializes a condition variable.
+//
+// Using CLOCK_MONOTONIC instead of the default CLOCK_REALTIME makes the
+// timeout in pthread_cond_timedwait() work correctly also if system time
+// is suddenly changed. Unfortunately CLOCK_MONOTONIC isn't available
+// everywhere while the default CLOCK_REALTIME is, so the default is
+// used if CLOCK_MONOTONIC isn't available.
+//
+// If clock_gettime() isn't available at all, gettimeofday() will be used.
+static inline int
+mythread_cond_init(mythread_cond *mycond)
+{
+#ifdef HAVE_CLOCK_GETTIME
+	// NOTE: HAVE_DECL_CLOCK_MONOTONIC is always defined to 0 or 1.
+#	if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && HAVE_DECL_CLOCK_MONOTONIC
+	struct timespec ts;
+	pthread_condattr_t condattr;
+
+	// POSIX doesn't seem to *require* that pthread_condattr_setclock()
+	// will fail if given an unsupported clock ID. Test that
+	// CLOCK_MONOTONIC really is supported using clock_gettime().
+	if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0
+			&& pthread_condattr_init(&condattr) == 0) {
+		int ret = pthread_condattr_setclock(
+				&condattr, CLOCK_MONOTONIC);
+		if (ret == 0)
+			ret = pthread_cond_init(&mycond->cond, &condattr);
+
+		pthread_condattr_destroy(&condattr);
+
+		if (ret == 0) {
+			mycond->clk_id = CLOCK_MONOTONIC;
+			return 0;
+		}
+	}
+
+	// If anything above fails, fall back to the default CLOCK_REALTIME.
+	// POSIX requires that all implementations of clock_gettime() must
+	// support at least CLOCK_REALTIME.
+#	endif
+
+	mycond->clk_id = CLOCK_REALTIME;
+#endif
+
+	return pthread_cond_init(&mycond->cond, NULL);
+}
+
+static inline void
+mythread_cond_destroy(mythread_cond *cond)
+{
+	int ret = pthread_cond_destroy(&cond->cond);
+	assert(ret == 0);
+	(void)ret;
+}
+
+static inline void
+mythread_cond_signal(mythread_cond *cond)
+{
+	int ret = pthread_cond_signal(&cond->cond);
+	assert(ret == 0);
+	(void)ret;
+}
+
+static inline void
+mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex)
+{
+	int ret = pthread_cond_wait(&cond->cond, mutex);
+	assert(ret == 0);
+	(void)ret;
+}
+
+// Waits on a condition or until a timeout expires. If the timeout expires,
+// non-zero is returned, otherwise zero is returned.
+static inline int
+mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex,
+		const mythread_condtime *condtime)
+{
+	int ret = pthread_cond_timedwait(&cond->cond, mutex, condtime);
+	assert(ret == 0 || ret == ETIMEDOUT);
+	return ret;
+}
+
+// Sets condtime to the absolute time that is timeout_ms milliseconds
+// in the future. The type of the clock to use is taken from cond.
+static inline void
+mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond,
+		uint32_t timeout_ms)
+{
+	condtime->tv_sec = timeout_ms / 1000;
+	condtime->tv_nsec = (timeout_ms % 1000) * 1000000;
+
+#ifdef HAVE_CLOCK_GETTIME
+	struct timespec now;
+	int ret = clock_gettime(cond->clk_id, &now);
+	assert(ret == 0);
+	(void)ret;
+
+	condtime->tv_sec += now.tv_sec;
+	condtime->tv_nsec += now.tv_nsec;
 #else
+	(void)cond;
+
+	struct timeval now;
+	gettimeofday(&now, NULL);
+
+	condtime->tv_sec += now.tv_sec;
+	condtime->tv_nsec += now.tv_usec * 1000L;
+#endif
+
+	// tv_nsec must stay in the range [0, 999_999_999].
+	if (condtime->tv_nsec >= 1000000000L) {
+		condtime->tv_nsec -= 1000000000L;
+		++condtime->tv_sec;
+	}
+}
+
+
+#elif defined(MYTHREAD_WIN95) || defined(MYTHREAD_VISTA)
 
-#	define mythread_once(func) \
+/////////////////////
+// Windows threads //
+/////////////////////
+
+#define WIN32_LEAN_AND_MEAN
+#ifdef MYTHREAD_VISTA
+#	undef _WIN32_WINNT
+#	define _WIN32_WINNT 0x0600
+#endif
+#include <windows.h>
+#include <process.h>
+
+#define MYTHREAD_RET_TYPE unsigned int __stdcall
+#define MYTHREAD_RET_VALUE 0
+
+typedef HANDLE mythread;
+typedef CRITICAL_SECTION mythread_mutex;
+
+#ifdef MYTHREAD_WIN95
+typedef HANDLE mythread_cond;
+#else
+typedef CONDITION_VARIABLE mythread_cond;
+#endif
+
+typedef struct {
+	// Tick count (milliseconds) in the beginning of the timeout.
+	// NOTE: This is 32 bits so it wraps around after 49.7 days.
+	// Multi-day timeouts may not work as expected.
+	DWORD start;
+
+	// Length of the timeout in milliseconds. The timeout expires
+	// when the current tick count minus "start" is equal or greater
+	// than "timeout".
+	DWORD timeout;
+} mythread_condtime;
+
+
+// mythread_once() is only available with Vista threads.
+#ifdef MYTHREAD_VISTA
+#define mythread_once(func) \
 	do { \
-		static bool once_ = false; \
-		if (!once_) { \
+		static INIT_ONCE once_ = INIT_ONCE_STATIC_INIT; \
+		BOOL pending_; \
+		if (!InitOnceBeginInitialize(&once_, 0, &pending_, NULL)) \
+			abort(); \
+		if (pending_) \
 			func(); \
-			once_ = true; \
-		} \
+		if (!InitOnceComplete(&once, 0, NULL)) \
+			abort(); \
 	} while (0)
+#endif
+
+
+// mythread_sigmask() isn't available on Windows. Even a dummy version would
+// make no sense because the other POSIX signal functions are missing anyway.
+
+
+static inline int
+mythread_create(mythread *thread,
+		unsigned int (__stdcall *func)(void *arg), void *arg)
+{
+	uintptr_t ret = _beginthreadex(NULL, 0, func, arg, 0, NULL);
+	if (ret == 0)
+		return -1;
+
+	*thread = (HANDLE)ret;
+	return 0;
+}
+
+static inline int
+mythread_join(mythread thread)
+{
+	int ret = 0;
+
+	if (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0)
+		ret = -1;
+
+	if (!CloseHandle(thread))
+		ret = -1;
+
+	return ret;
+}
 
-#	define mythread_sigmask(how, set, oset) \
-		sigprocmask(how, set, oset)
+
+static inline int
+mythread_mutex_init(mythread_mutex *mutex)
+{
+	InitializeCriticalSection(mutex);
+	return 0;
+}
+
+static inline void
+mythread_mutex_destroy(mythread_mutex *mutex)
+{
+	DeleteCriticalSection(mutex);
+}
+
+static inline void
+mythread_mutex_lock(mythread_mutex *mutex)
+{
+	EnterCriticalSection(mutex);
+}
+
+static inline void
+mythread_mutex_unlock(mythread_mutex *mutex)
+{
+	LeaveCriticalSection(mutex);
+}
+
+
+static inline int
+mythread_cond_init(mythread_cond *cond)
+{
+#ifdef MYTHREAD_WIN95
+	*cond = CreateEvent(NULL, FALSE, FALSE, NULL);
+	return *cond == NULL ? -1 : 0;
+#else
+	InitializeConditionVariable(cond);
+	return 0;
+#endif
+}
+
+static inline void
+mythread_cond_destroy(mythread_cond *cond)
+{
+#ifdef MYTHREAD_WIN95
+	CloseHandle(*cond);
+#else
+	(void)cond;
+#endif
+}
+
+static inline void
+mythread_cond_signal(mythread_cond *cond)
+{
+#ifdef MYTHREAD_WIN95
+	SetEvent(*cond);
+#else
+	WakeConditionVariable(cond);
+#endif
+}
+
+static inline void
+mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex)
+{
+#ifdef MYTHREAD_WIN95
+	LeaveCriticalSection(mutex);
+	WaitForSingleObject(*cond, INFINITE);
+	EnterCriticalSection(mutex);
+#else
+	BOOL ret = SleepConditionVariableCS(cond, mutex, INFINITE);
+	assert(ret);
+	(void)ret;
+#endif
+}
+
+static inline int
+mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex,
+		const mythread_condtime *condtime)
+{
+#ifdef MYTHREAD_WIN95
+	LeaveCriticalSection(mutex);
+#endif
+
+	DWORD elapsed = GetTickCount() - condtime->start;
+	DWORD timeout = elapsed >= condtime->timeout
+			? 0 : condtime->timeout - elapsed;
+
+#ifdef MYTHREAD_WIN95
+	DWORD ret = WaitForSingleObject(*cond, timeout);
+	assert(ret == WAIT_OBJECT_0 || ret == WAIT_TIMEOUT);
+
+	EnterCriticalSection(mutex);
+
+	return ret == WAIT_TIMEOUT;
+#else
+	BOOL ret = SleepConditionVariableCS(cond, mutex, timeout);
+	assert(ret || GetLastError() == ERROR_TIMEOUT);
+	return !ret;
+#endif
+}
+
+static inline void
+mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond,
+		uint32_t timeout)
+{
+	(void)cond;
+	condtime->start = GetTickCount();
+	condtime->timeout = timeout;
+}
+
+#endif
 
 #endif
diff --git a/src/common/sysdefs.h b/src/common/sysdefs.h
index 5ea6bdae4aed..e056ca4ac908 100644
--- a/src/common/sysdefs.h
+++ b/src/common/sysdefs.h
@@ -165,6 +165,16 @@ typedef unsigned char _Bool;
 #	include <memory.h>
 #endif
 
+// As of MSVC 2013, inline and restrict are supported with
+// non-standard keywords.
+#if defined(_WIN32) && defined(_MSC_VER)
+#	ifndef inline
+#		define inline __inline
+#	endif
+#	ifndef restrict
+#		define restrict __restrict
+#	endif
+#endif
 
 ////////////
 // Macros //
diff --git a/src/common/tuklib_cpucores.c b/src/common/tuklib_cpucores.c
index 1da13df7f5f6..7574bc9c0645 100644
--- a/src/common/tuklib_cpucores.c
+++ b/src/common/tuklib_cpucores.c
@@ -12,7 +12,13 @@
 
 #include "tuklib_cpucores.h"
 
-#if defined(TUKLIB_CPUCORES_SYSCTL)
+#if defined(_WIN32) || defined(__CYGWIN__)
+#	ifndef _WIN32_WINNT
+#		define _WIN32_WINNT 0x0500
+#	endif
+#	include <windows.h>
+
+#elif defined(TUKLIB_CPUCORES_SYSCTL)
 #	ifdef HAVE_SYS_PARAM_H
 #		include <sys/param.h>
 #	endif
@@ -33,7 +39,12 @@ tuklib_cpucores(void)
 {
 	uint32_t ret = 0;
 
-#if defined(TUKLIB_CPUCORES_SYSCTL)
+#if defined(_WIN32) || defined(__CYGWIN__)
+	SYSTEM_INFO sysinfo;
+	GetSystemInfo(&sysinfo);
+	ret = sysinfo.dwNumberOfProcessors;
+
+#elif defined(TUKLIB_CPUCORES_SYSCTL)
 	int name[2] = { CTL_HW, HW_NCPU };
 	int cpus;
 	size_t cpus_size = sizeof(cpus);
diff --git a/src/common/tuklib_physmem.c b/src/common/tuklib_physmem.c
index 623b6e70b7f2..3cc7d12a171f 100644
--- a/src/common/tuklib_physmem.c
+++ b/src/common/tuklib_physmem.c
@@ -33,6 +33,10 @@
 #	include <syidef.h>
 #	include <ssdef.h>
 
+#elif defined(AMIGA) || defined(__AROS__)
+#	define __USE_INLINE__
+#	include <proto/exec.h>
+
 // AIX
 #elif defined(TUKLIB_PHYSMEM_AIX)
 #	include <sys/systemcfg.h>
@@ -119,6 +123,9 @@ tuklib_physmem(void)
 	if (LIB$GETSYI(&val, &vms_mem, 0, 0, 0, 0) == SS$_NORMAL)
 		ret = (uint64_t)vms_mem * 8192;
 
+#elif defined(AMIGA) || defined(__AROS__)
+	ret = AvailMem(MEMF_TOTAL);
+
 #elif defined(TUKLIB_PHYSMEM_AIX)
 	ret = _system_configuration.physmem;
 
diff --git a/src/liblzma/api/lzma.h b/src/liblzma/api/lzma.h
index fb874c3e1377..72c51b2411c3 100644
--- a/src/liblzma/api/lzma.h
+++ b/src/liblzma/api/lzma.h
@@ -286,7 +286,7 @@ extern "C" {
 #include "lzma/filter.h"
 #include "lzma/bcj.h"
 #include "lzma/delta.h"
-#include "lzma/lzma.h"
+#include "lzma/lzma12.h"
 
 /* Container formats */
 #include "lzma/container.h"
diff --git a/src/liblzma/api/lzma/base.h b/src/liblzma/api/lzma/base.h
index 43dde8d60f3d..7a31a4205136 100644
--- a/src/liblzma/api/lzma/base.h
+++ b/src/liblzma/api/lzma/base.h
@@ -240,12 +240,12 @@ typedef enum {
 /**
  * \brief       The `action' argument for lzma_code()
  *
- * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH,
- * the same `action' must is used until lzma_code() returns LZMA_STREAM_END.
- * Also, the amount of input (that is, strm->avail_in) must not be modified
- * by the application until lzma_code() returns LZMA_STREAM_END. Changing the
- * `action' or modifying the amount of input will make lzma_code() return
- * LZMA_PROG_ERROR.
+ * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, LZMA_FULL_BARRIER,
+ * or LZMA_FINISH, the same `action' must is used until lzma_code() returns
+ * LZMA_STREAM_END. Also, the amount of input (that is, strm->avail_in) must
+ * not be modified by the application until lzma_code() returns
+ * LZMA_STREAM_END. Changing the `action' or modifying the amount of input
+ * will make lzma_code() return LZMA_PROG_ERROR.
  */
 typedef enum {
 	LZMA_RUN = 0,
@@ -293,7 +293,7 @@ typedef enum {
 		 *
 		 * All the input data going to the current Block must have
 		 * been given to the encoder (the last bytes can still be
-		 * pending in* next_in). Call lzma_code() with LZMA_FULL_FLUSH
+		 * pending in *next_in). Call lzma_code() with LZMA_FULL_FLUSH
 		 * until it returns LZMA_STREAM_END. Then continue normally
 		 * with LZMA_RUN or finish the Stream with LZMA_FINISH.
 		 *
@@ -302,6 +302,29 @@ typedef enum {
 		 * no unfinished Block, no empty Block is created.
 		 */
 
+	LZMA_FULL_BARRIER = 4,
+		/**<
+		 * \brief       Finish encoding of the current Block
+		 *
+		 * This is like LZMA_FULL_FLUSH except that this doesn't
+		 * necessarily wait until all the input has been made
+		 * available via the output buffer. That is, lzma_code()
+		 * might return LZMA_STREAM_END as soon as all the input
+		 * has been consumed (avail_in == 0).
+		 *
+		 * LZMA_FULL_BARRIER is useful with a threaded encoder if
+		 * one wants to split the .xz Stream into Blocks at specific
+		 * offsets but doesn't care if the output isn't flushed
+		 * immediately. Using LZMA_FULL_BARRIER allows keeping
+		 * the threads busy while LZMA_FULL_FLUSH would make
+		 * lzma_code() wait until all the threads have finished
+		 * until more data could be passed to the encoder.
+		 *
+		 * With a lzma_stream initialized with the single-threaded
+		 * lzma_stream_encoder() or lzma_easy_encoder(),
+		 * LZMA_FULL_BARRIER is an alias for LZMA_FULL_FLUSH.
+		 */
+
 	LZMA_FINISH = 3
 		/**<
 		 * \brief       Finish the coding operation
@@ -332,11 +355,19 @@ typedef enum {
  * malloc() and free(). C++ users should note that the custom memory
  * handling functions must not throw exceptions.
  *
- * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is
- * OK to change these function pointers in the middle of the coding
- * process, but obviously it must be done carefully to make sure that the
- * replacement `free' can deallocate memory allocated by the earlier
- * `alloc' function(s).
+ * Single-threaded mode only: liblzma doesn't make an internal copy of
+ * lzma_allocator. Thus, it is OK to change these function pointers in
+ * the middle of the coding process, but obviously it must be done
+ * carefully to make sure that the replacement `free' can deallocate
+ * memory allocated by the earlier `alloc' function(s).
+ *
+ * Multithreaded mode: liblzma might internally store pointers to the
+ * lzma_allocator given via the lzma_stream structure. The application
+ * must not change the allocator pointer in lzma_stream or the contents
+ * of the pointed lzma_allocator structure until lzma_end() has been used
+ * to free the memory associated with that lzma_stream. The allocation
+ * functions might be called simultaneously from multiple threads, and
+ * thus they must be thread safe.
  */
 typedef struct {
 	/**
@@ -448,7 +479,8 @@ typedef struct lzma_internal_s lzma_internal;
  *
  * Application may modify the values of total_in and total_out as it wants.
  * They are updated by liblzma to match the amount of data read and
- * written, but aren't used for anything else.
+ * written but aren't used for anything else except as a possible return
+ * values from lzma_get_progress().
  */
 typedef struct {
 	const uint8_t *next_in; /**< Pointer to the next input byte. */
@@ -464,8 +496,10 @@ typedef struct {
 	 *
 	 * In most cases this is NULL which makes liblzma use
 	 * the standard malloc() and free().
+	 *
+	 * \note        In 5.0.x this is not a const pointer.
 	 */
-	lzma_allocator *allocator;
+	const lzma_allocator *allocator;
 
 	/** Internal state is not visible to applications. */
 	lzma_internal *internal;
@@ -547,6 +581,25 @@ extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow;
 
 
 /**
+ * \brief       Get progress information
+ *
+ * In single-threaded mode, applications can get progress information from
+ * strm->total_in and strm->total_out. In multi-threaded mode this is less
+ * useful because a significant amount of both input and output data gets
+ * buffered internally by liblzma. This makes total_in and total_out give
+ * misleading information and also makes the progress indicator updates
+ * non-smooth.
+ *
+ * This function gives realistic progress information also in multi-threaded
+ * mode by taking into account the progress made by each thread. In
+ * single-threaded mode *progress_in and *progress_out are set to
+ * strm->total_in and strm->total_out, respectively.
+ */
+extern LZMA_API(void) lzma_get_progress(lzma_stream *strm,
+		uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow;
+
+
+/**
  * \brief       Get the memory usage of decoder filter chain
  *
  * This function is currently supported only when *strm has been initialized
diff --git a/src/liblzma/api/lzma/block.h b/src/liblzma/api/lzma/block.h
index e6710a7bc816..7bdcfd7cbe0c 100644
--- a/src/liblzma/api/lzma/block.h
+++ b/src/liblzma/api/lzma/block.h
@@ -31,11 +31,16 @@ typedef struct {
 	/**
 	 * \brief       Block format version
 	 *
-	 * To prevent API and ABI breakages if new features are needed in
-	 * the Block field, a version number is used to indicate which
-	 * fields in this structure are in use. For now, version must always
-	 * be zero. With non-zero version, most Block related functions will
-	 * return LZMA_OPTIONS_ERROR.
+	 * To prevent API and ABI breakages when new features are needed,
+	 * a version number is used to indicate which fields in this
+	 * structure are in use:
+	 *   - liblzma >= 5.0.0: version = 0 is supported.
+	 *   - liblzma >= 5.1.4beta: Support for version = 1 was added,
+	 *     which adds the ignore_check field.
+	 *
+	 * If version is greater than one, most Block related functions
+	 * will return LZMA_OPTIONS_ERROR (lzma_block_header_decode() works
+	 * with any version value).
 	 *
 	 * Read by:
 	 *  - All functions that take pointer to lzma_block as argument,
@@ -233,7 +238,28 @@ typedef struct {
 	lzma_reserved_enum reserved_enum2;
 	lzma_reserved_enum reserved_enum3;
 	lzma_reserved_enum reserved_enum4;
-	lzma_bool reserved_bool1;
+
+	/**
+	 * \brief       A flag to Block decoder to not verify the Check field
+	 *
+	 * This field is supported by liblzma >= 5.1.4beta if .version >= 1.
+	 *
+	 * If this is set to true, the integrity check won't be calculated
+	 * and verified. Unless you know what you are doing, you should
+	 * leave this to false. (A reason to set this to true is when the
+	 * file integrity is verified externally anyway and you want to
+	 * speed up the decompression, which matters mostly when using
+	 * SHA-256 as the integrity check.)
+	 *
+	 * If .version >= 1, read by:
+	 *   - lzma_block_decoder()
+	 *   - lzma_block_buffer_decode()
+	 *
+	 * Written by (.version is ignored):
+	 *   - lzma_block_header_decode() always sets this to false
+	 */
+	lzma_bool ignore_check;
+
 	lzma_bool reserved_bool2;
 	lzma_bool reserved_bool3;
 	lzma_bool reserved_bool4;
@@ -310,10 +336,14 @@ extern LZMA_API(lzma_ret) lzma_block_header_encode(
 /**
  * \brief       Decode Block Header
  *
- * block->version should be set to the highest value supported by the
- * application; currently the only possible version is zero. This function
- * will set version to the lowest value that still supports all the features
- * required by the Block Header.
+ * block->version should (usually) be set to the highest value supported
+ * by the application. If the application sets block->version to a value
+ * higher than supported by the current liblzma version, this function will
+ * downgrade block->version to the highest value supported by it. Thus one
+ * should check the value of block->version after calling this function if
+ * block->version was set to a non-zero value and the application doesn't
+ * otherwise know that the liblzma version being used is new enough to
+ * support the specified block->version.
  *
  * The size of the Block Header must have already been decoded with
  * lzma_block_header_size_decode() macro and stored to block->header_size.
@@ -344,7 +374,7 @@ extern LZMA_API(lzma_ret) lzma_block_header_encode(
  *                block->header_size is invalid or block->filters is NULL.
  */
 extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block,
-		lzma_allocator *allocator, const uint8_t *in)
+		const lzma_allocator *allocator, const uint8_t *in)
 		lzma_nothrow lzma_attr_warn_unused_result;
 
 
@@ -493,7 +523,25 @@ extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size)
  *              - LZMA_PROG_ERROR
  */
 extern LZMA_API(lzma_ret) lzma_block_buffer_encode(
-		lzma_block *block, lzma_allocator *allocator,
+		lzma_block *block, const lzma_allocator *allocator,
+		const uint8_t *in, size_t in_size,
+		uint8_t *out, size_t *out_pos, size_t out_size)
+		lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
+ * \brief       Single-call uncompressed .xz Block encoder
+ *
+ * This is like lzma_block_buffer_encode() except this doesn't try to
+ * compress the data and instead encodes the data using LZMA2 uncompressed
+ * chunks. The required output buffer size can be determined with
+ * lzma_block_buffer_bound().
+ *
+ * Since the data won't be compressed, this function ignores block->filters.
+ * This function doesn't take lzma_allocator because this function doesn't
+ * allocate any memory from the heap.
+ */
+extern LZMA_API(lzma_ret) lzma_block_uncomp_encode(lzma_block *block,
 		const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 		lzma_nothrow lzma_attr_warn_unused_result;
@@ -527,7 +575,7 @@ extern LZMA_API(lzma_ret) lzma_block_buffer_encode(
  *              - LZMA_PROG_ERROR
  */
 extern LZMA_API(lzma_ret) lzma_block_buffer_decode(
-		lzma_block *block, lzma_allocator *allocator,
+		lzma_block *block, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 		lzma_nothrow;
diff --git a/src/liblzma/api/lzma/container.h b/src/liblzma/api/lzma/container.h
index 7a9ffc645788..86991add1967 100644
--- a/src/liblzma/api/lzma/container.h
+++ b/src/liblzma/api/lzma/container.h
@@ -61,6 +61,131 @@
 
 
 /**
+ * \brief       Multithreading options
+ */
+typedef struct {
+	/**
+	 * \brief       Flags
+	 *
+	 * Set this to zero if no flags are wanted.
+	 *
+	 * No flags are currently supported.
+	 */
+	uint32_t flags;
+
+	/**
+	 * \brief       Number of worker threads to use
+	 */
+	uint32_t threads;
+
+	/**
+	 * \brief       Maximum uncompressed size of a Block
+	 *
+	 * The encoder will start a new .xz Block every block_size bytes.
+	 * Using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER with lzma_code()
+	 * the caller may tell liblzma to start a new Block earlier.
+	 *
+	 * With LZMA2, a recommended block size is 2-4 times the LZMA2
+	 * dictionary size. With very small dictionaries, it is recommended
+	 * to use at least 1 MiB block size for good compression ratio, even
+	 * if this is more than four times the dictionary size. Note that
+	 * these are only recommendations for typical use cases; feel free
+	 * to use other values. Just keep in mind that using a block size
+	 * less than the LZMA2 dictionary size is waste of RAM.
+	 *
+	 * Set this to 0 to let liblzma choose the block size depending
+	 * on the compression options. For LZMA2 it will be 3*dict_size
+	 * or 1 MiB, whichever is more.
+	 *
+	 * For each thread, about 3 * block_size bytes of memory will be
+	 * allocated. This may change in later liblzma versions. If so,
+	 * the memory usage will probably be reduced, not increased.
+	 */
+	uint64_t block_size;
+
+	/**
+	 * \brief       Timeout to allow lzma_code() to return early
+	 *
+	 * Multithreading can make liblzma to consume input and produce
+	 * output in a very bursty way: it may first read a lot of input
+	 * to fill internal buffers, then no input or output occurs for
+	 * a while.
+	 *
+	 * In single-threaded mode, lzma_code() won't return until it has
+	 * either consumed all the input or filled the output buffer. If
+	 * this is done in multithreaded mode, it may cause a call
+	 * lzma_code() to take even tens of seconds, which isn't acceptable
+	 * in all applications.
+	 *
+	 * To avoid very long blocking times in lzma_code(), a timeout
+	 * (in milliseconds) may be set here. If lzma_code() would block
+	 * longer than this number of milliseconds, it will return with
+	 * LZMA_OK. Reasonable values are 100 ms or more. The xz command
+	 * line tool uses 300 ms.
+	 *
+	 * If long blocking times are fine for you, set timeout to a special
+	 * value of 0, which will disable the timeout mechanism and will make
+	 * lzma_code() block until all the input is consumed or the output
+	 * buffer has been filled.
+	 *
+	 * \note        Even with a timeout, lzma_code() might sometimes take
+	 *              somewhat long time to return. No timing guarantees
+	 *              are made.
+	 */
+	uint32_t timeout;
+
+	/**
+	 * \brief       Compression preset (level and possible flags)
+	 *
+	 * The preset is set just like with lzma_easy_encoder().
+	 * The preset is ignored if filters below is non-NULL.
+	 */
+	uint32_t preset;
+
+	/**
+	 * \brief       Filter chain (alternative to a preset)
+	 *
+	 * If this is NULL, the preset above is used. Otherwise the preset
+	 * is ignored and the filter chain specified here is used.
+	 */
+	const lzma_filter *filters;
+
+	/**
+	 * \brief       Integrity check type
+	 *
+	 * See check.h for available checks. The xz command line tool
+	 * defaults to LZMA_CHECK_CRC64, which is a good choice if you
+	 * are unsure.
+	 */
+	lzma_check check;
+
+	/*
+	 * Reserved space to allow possible future extensions without
+	 * breaking the ABI. You should not touch these, because the names
+	 * of these variables may change. These are and will never be used
+	 * with the currently supported options, so it is safe to leave these
+	 * uninitialized.
+	 */
+	lzma_reserved_enum reserved_enum1;
+	lzma_reserved_enum reserved_enum2;
+	lzma_reserved_enum reserved_enum3;
+	uint32_t reserved_int1;
+	uint32_t reserved_int2;
+	uint32_t reserved_int3;
+	uint32_t reserved_int4;
+	uint64_t reserved_int5;
+	uint64_t reserved_int6;
+	uint64_t reserved_int7;
+	uint64_t reserved_int8;
+	void *reserved_ptr1;
+	void *reserved_ptr2;
+	void *reserved_ptr3;
+	void *reserved_ptr4;
+
+} lzma_mt;
+
+
+/**
  * \brief       Calculate approximate memory usage of easy encoder
  *
  * This function is a wrapper for lzma_raw_encoder_memusage().
@@ -165,7 +290,8 @@ extern LZMA_API(lzma_ret) lzma_easy_encoder(
  */
 extern LZMA_API(lzma_ret) lzma_easy_buffer_encode(
 		uint32_t preset, lzma_check check,
-		lzma_allocator *allocator, const uint8_t *in, size_t in_size,
+		const lzma_allocator *allocator,
+		const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow;
 
 
@@ -191,6 +317,49 @@ extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm,
 
 
 /**
+ * \brief       Calculate approximate memory usage of multithreaded .xz encoder
+ *
+ * Since doing the encoding in threaded mode doesn't affect the memory
+ * requirements of single-threaded decompressor, you can use
+ * lzma_easy_decoder_memusage(options->preset) or
+ * lzma_raw_decoder_memusage(options->filters) to calculate
+ * the decompressor memory requirements.
+ *
+ * \param       options Compression options
+ *
+ * \return      Number of bytes of memory required for encoding with the
+ *              given options. If an error occurs, for example due to
+ *              unsupported preset or filter chain, UINT64_MAX is returned.
+ */
+extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage(
+		const lzma_mt *options) lzma_nothrow lzma_attr_pure;
+
+
+/**
+ * \brief       Initialize multithreaded .xz Stream encoder
+ *
+ * This provides the functionality of lzma_easy_encoder() and
+ * lzma_stream_encoder() as a single function for multithreaded use.
+ *
+ * The supported actions for lzma_code() are LZMA_RUN, LZMA_FULL_FLUSH,
+ * LZMA_FULL_BARRIER, and LZMA_FINISH. Support for LZMA_SYNC_FLUSH might be
+ * added in the future.
+ *
+ * \param       strm    Pointer to properly prepared lzma_stream
+ * \param       options Pointer to multithreaded compression options
+ *
+ * \return      - LZMA_OK
+ *              - LZMA_MEM_ERROR
+ *              - LZMA_UNSUPPORTED_CHECK
+ *              - LZMA_OPTIONS_ERROR
+ *              - LZMA_PROG_ERROR
+ */
+extern LZMA_API(lzma_ret) lzma_stream_encoder_mt(
+		lzma_stream *strm, const lzma_mt *options)
+		lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
  * \brief       Initialize .lzma encoder (legacy file format)
  *
  * The .lzma format is sometimes called the LZMA_Alone format, which is the
@@ -269,7 +438,8 @@ extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size)
  */
 extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
 		lzma_filter *filters, lzma_check check,
-		lzma_allocator *allocator, const uint8_t *in, size_t in_size,
+		const lzma_allocator *allocator,
+		const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 		lzma_nothrow lzma_attr_warn_unused_result;
 
@@ -305,6 +475,30 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
 
 
 /**
+ * This flag makes lzma_code() not calculate and verify the integrity check
+ * of the compressed data in .xz files. This means that invalid integrity
+ * check values won't be detected and LZMA_DATA_ERROR won't be returned in
+ * such cases.
+ *
+ * This flag only affects the checks of the compressed data itself; the CRC32
+ * values in the .xz headers will still be verified normally.
+ *
+ * Don't use this flag unless you know what you are doing. Possible reasons
+ * to use this flag:
+ *
+ *   - Trying to recover data from a corrupt .xz file.
+ *
+ *   - Speeding up decompression, which matters mostly with SHA-256
+ *     or with files that have compressed extremely well. It's recommended
+ *     to not use this flag for this purpose unless the file integrity is
+ *     verified externally in some other way.
+ *
+ * Support for this flag was added in liblzma 5.1.4beta.
+ */
+#define LZMA_IGNORE_CHECK               UINT32_C(0x10)
+
+
+/**
  * This flag enables decoding of concatenated files with file formats that
  * allow concatenating compressed files as is. From the formats currently
  * supported by liblzma, only the .xz format allows concatenated files.
@@ -418,7 +612,8 @@ extern LZMA_API(lzma_ret) lzma_alone_decoder(
  *              - LZMA_PROG_ERROR
  */
 extern LZMA_API(lzma_ret) lzma_stream_buffer_decode(
-		uint64_t *memlimit, uint32_t flags, lzma_allocator *allocator,
+		uint64_t *memlimit, uint32_t flags,
+		const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 		lzma_nothrow lzma_attr_warn_unused_result;
diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h
index e0bc163ad35a..4e78752b8f79 100644
--- a/src/liblzma/api/lzma/filter.h
+++ b/src/liblzma/api/lzma/filter.h
@@ -116,8 +116,9 @@ extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id)
  *                is not NULL.
  *              - LZMA_PROG_ERROR: src or dest is NULL.
  */
-extern LZMA_API(lzma_ret) lzma_filters_copy(const lzma_filter *src,
-		lzma_filter *dest, lzma_allocator *allocator) lzma_nothrow;
+extern LZMA_API(lzma_ret) lzma_filters_copy(
+		const lzma_filter *src, lzma_filter *dest,
+		const lzma_allocator *allocator) lzma_nothrow;
 
 
 /**
@@ -256,7 +257,7 @@ extern LZMA_API(lzma_ret) lzma_filters_update(
  *              won't necessarily meet that bound.)
  */
 extern LZMA_API(lzma_ret) lzma_raw_buffer_encode(
-		const lzma_filter *filters, lzma_allocator *allocator,
+		const lzma_filter *filters, const lzma_allocator *allocator,
 		const uint8_t *in, size_t in_size, uint8_t *out,
 		size_t *out_pos, size_t out_size) lzma_nothrow;
 
@@ -280,7 +281,7 @@ extern LZMA_API(lzma_ret) lzma_raw_buffer_encode(
  *                          which no data is written to is out[out_size].
  */
 extern LZMA_API(lzma_ret) lzma_raw_buffer_decode(
-		const lzma_filter *filters, lzma_allocator *allocator,
+		const lzma_filter *filters, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow;
 
@@ -356,7 +357,7 @@ extern LZMA_API(lzma_ret) lzma_properties_encode(
  *              - LZMA_MEM_ERROR
  */
 extern LZMA_API(lzma_ret) lzma_properties_decode(
-		lzma_filter *filter, lzma_allocator *allocator,
+		lzma_filter *filter, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size) lzma_nothrow;
 
 
@@ -419,6 +420,6 @@ extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filter,
  *              - LZMA_PROG_ERROR
  */
 extern LZMA_API(lzma_ret) lzma_filter_flags_decode(
-		lzma_filter *filter, lzma_allocator *allocator,
+		lzma_filter *filter, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size)
 		lzma_nothrow lzma_attr_warn_unused_result;
diff --git a/src/liblzma/api/lzma/hardware.h b/src/liblzma/api/lzma/hardware.h
index e7dd03c3e8dd..5321d9af8e46 100644
--- a/src/liblzma/api/lzma/hardware.h
+++ b/src/liblzma/api/lzma/hardware.h
@@ -48,3 +48,17 @@
  *              of RAM on the specific operating system.
  */
 extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow;
+
+
+/**
+ * \brief       Get the number of processor cores or threads
+ *
+ * This function may be useful when determining how many threads to use.
+ * If the hardware supports more than one thread per CPU core, the number
+ * of hardware threads is returned if that information is available.
+ *
+ * \brief       On success, the number of available CPU threads or cores is
+ *              returned. If this information isn't available or an error
+ *              occurs, zero is returned.
+ */
+extern LZMA_API(uint32_t) lzma_cputhreads(void) lzma_nothrow;
diff --git a/src/liblzma/api/lzma/index.h b/src/liblzma/api/lzma/index.h
index 16bacc287dfc..dda60ec1c185 100644
--- a/src/liblzma/api/lzma/index.h
+++ b/src/liblzma/api/lzma/index.h
@@ -303,7 +303,7 @@ extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i)
  * \return      On success, a pointer to an empty initialized lzma_index is
  *              returned. If allocation fails, NULL is returned.
  */
-extern LZMA_API(lzma_index *) lzma_index_init(lzma_allocator *allocator)
+extern LZMA_API(lzma_index *) lzma_index_init(const lzma_allocator *allocator)
 		lzma_nothrow;
 
 
@@ -312,8 +312,8 @@ extern LZMA_API(lzma_index *) lzma_index_init(lzma_allocator *allocator)
  *
  * If i is NULL, this does nothing.
  */
-extern LZMA_API(void) lzma_index_end(lzma_index *i, lzma_allocator *allocator)
-		lzma_nothrow;
+extern LZMA_API(void) lzma_index_end(
+		lzma_index *i, const lzma_allocator *allocator) lzma_nothrow;
 
 
 /**
@@ -341,7 +341,7 @@ extern LZMA_API(void) lzma_index_end(lzma_index *i, lzma_allocator *allocator)
  *              - LZMA_PROG_ERROR
  */
 extern LZMA_API(lzma_ret) lzma_index_append(
-		lzma_index *i, lzma_allocator *allocator,
+		lzma_index *i, const lzma_allocator *allocator,
 		lzma_vli unpadded_size, lzma_vli uncompressed_size)
 		lzma_nothrow lzma_attr_warn_unused_result;
 
@@ -564,8 +564,8 @@ extern LZMA_API(lzma_bool) lzma_index_iter_locate(
  *              - LZMA_MEM_ERROR
  *              - LZMA_PROG_ERROR
  */
-extern LZMA_API(lzma_ret) lzma_index_cat(
-		lzma_index *dest, lzma_index *src, lzma_allocator *allocator)
+extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *dest, lzma_index *src,
+		const lzma_allocator *allocator)
 		lzma_nothrow lzma_attr_warn_unused_result;
 
 
@@ -575,7 +575,7 @@ extern LZMA_API(lzma_ret) lzma_index_cat(
  * \return      A copy of the lzma_index, or NULL if memory allocation failed.
  */
 extern LZMA_API(lzma_index *) lzma_index_dup(
-		const lzma_index *i, lzma_allocator *allocator)
+		const lzma_index *i, const lzma_allocator *allocator)
 		lzma_nothrow lzma_attr_warn_unused_result;
 
 
@@ -677,6 +677,6 @@ extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i,
  *              - LZMA_PROG_ERROR
  */
 extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i,
-		uint64_t *memlimit, lzma_allocator *allocator,
+		uint64_t *memlimit, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size)
 		lzma_nothrow;
diff --git a/src/liblzma/api/lzma/index_hash.h b/src/liblzma/api/lzma/index_hash.h
index fa2e048d552d..9287f1dfdb56 100644
--- a/src/liblzma/api/lzma/index_hash.h
+++ b/src/liblzma/api/lzma/index_hash.h
@@ -37,7 +37,7 @@ typedef struct lzma_index_hash_s lzma_index_hash;
  * pointer than the index_hash that was given as an argument.
  */
 extern LZMA_API(lzma_index_hash *) lzma_index_hash_init(
-		lzma_index_hash *index_hash, lzma_allocator *allocator)
+		lzma_index_hash *index_hash, const lzma_allocator *allocator)
 		lzma_nothrow lzma_attr_warn_unused_result;
 
 
@@ -45,7 +45,7 @@ extern LZMA_API(lzma_index_hash *) lzma_index_hash_init(
  * \brief       Deallocate lzma_index_hash structure
  */
 extern LZMA_API(void) lzma_index_hash_end(
-		lzma_index_hash *index_hash, lzma_allocator *allocator)
+		lzma_index_hash *index_hash, const lzma_allocator *allocator)
 		lzma_nothrow;
 
 
diff --git a/src/liblzma/api/lzma/lzma.h b/src/liblzma/api/lzma/lzma12.h
index 3f8e095f70e8..4e32fa3a214a 100644
--- a/src/liblzma/api/lzma/lzma.h
+++ b/src/liblzma/api/lzma/lzma12.h
@@ -1,5 +1,5 @@
 /**
- * \file        lzma/lzma.h
+ * \file        lzma/lzma12.h
  * \brief       LZMA1 and LZMA2 filters
  */
 
diff --git a/src/liblzma/api/lzma/version.h b/src/liblzma/api/lzma/version.h
index beec18e20440..d9614da76f7b 100644
--- a/src/liblzma/api/lzma/version.h
+++ b/src/liblzma/api/lzma/version.h
@@ -21,8 +21,8 @@
  * Version number split into components
  */
 #define LZMA_VERSION_MAJOR 5
-#define LZMA_VERSION_MINOR 0
-#define LZMA_VERSION_PATCH 7
+#define LZMA_VERSION_MINOR 2
+#define LZMA_VERSION_PATCH 0
 #define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE
 
 #ifndef LZMA_VERSION_COMMIT
diff --git a/src/liblzma/check/check.h b/src/liblzma/check/check.h
index e100d2b85303..0f96f65b360c 100644
--- a/src/liblzma/check/check.h
+++ b/src/liblzma/check/check.h
@@ -15,6 +15,43 @@
 
 #include "common.h"
 
+#if defined(HAVE_COMMONCRYPTO_COMMONDIGEST_H)
+#	include <CommonCrypto/CommonDigest.h>
+#elif defined(HAVE_SHA256_H)
+#	include <sys/types.h>
+#	include <sha256.h>
+#elif defined(HAVE_SHA2_H)
+#	include <sys/types.h>
+#	include <sha2.h>
+#elif defined(HAVE_MINIX_SHA2_H)
+#	include <sys/types.h>
+#	include <minix/sha2.h>
+#endif
+
+#if defined(HAVE_CC_SHA256_CTX)
+typedef CC_SHA256_CTX lzma_sha256_state;
+#elif defined(HAVE_SHA256_CTX)
+typedef SHA256_CTX lzma_sha256_state;
+#elif defined(HAVE_SHA2_CTX)
+typedef SHA2_CTX lzma_sha256_state;
+#else
+/// State for the internal SHA-256 implementation
+typedef struct {
+	/// Internal state
+	uint32_t state[8];
+
+	/// Size of the message excluding padding
+	uint64_t size;
+} lzma_sha256_state;
+#endif
+
+#if defined(HAVE_CC_SHA256_INIT)
+#	define LZMA_SHA256FUNC(x) CC_SHA256_ ## x
+#elif defined(HAVE_SHA256_INIT)
+#	define LZMA_SHA256FUNC(x) SHA256_ ## x
+#elif defined(HAVE_SHA256INIT)
+#	define LZMA_SHA256FUNC(x) SHA256 ## x
+#endif
 
 // Index hashing needs the best possible hash function (preferably
 // a cryptographic hash) for maximum reliability.
@@ -43,14 +80,7 @@ typedef struct {
 	union {
 		uint32_t crc32;
 		uint64_t crc64;
-
-		struct {
-			/// Internal state
-			uint32_t state[8];
-
-			/// Size of the message excluding padding
-			uint64_t size;
-		} sha256;
+		lzma_sha256_state sha256;
 	} state;
 
 } lzma_check_state;
@@ -82,6 +112,8 @@ extern void lzma_check_update(lzma_check_state *check, lzma_check type,
 extern void lzma_check_finish(lzma_check_state *check, lzma_check type);
 
 
+#ifndef LZMA_SHA256FUNC
+
 /// Prepare SHA-256 state for new input.
 extern void lzma_sha256_init(lzma_check_state *check);
 
@@ -92,4 +124,39 @@ extern void lzma_sha256_update(
 /// Finish the SHA-256 calculation and store the result to check->buffer.u8.
 extern void lzma_sha256_finish(lzma_check_state *check);
 
+
+#else
+
+static inline void
+lzma_sha256_init(lzma_check_state *check)
+{
+	LZMA_SHA256FUNC(Init)(&check->state.sha256);
+}
+
+
+static inline void
+lzma_sha256_update(const uint8_t *buf, size_t size, lzma_check_state *check)
+{
+#if defined(HAVE_CC_SHA256_INIT) && SIZE_MAX > UINT32_MAX
+	// Darwin's CC_SHA256_Update takes uint32_t as the buffer size,
+	// so use a loop to support size_t.
+	while (size > UINT32_MAX) {
+		LZMA_SHA256FUNC(Update)(&check->state.sha256, buf, UINT32_MAX);
+		buf += UINT32_MAX;
+		size -= UINT32_MAX;
+	}
+#endif
+
+	LZMA_SHA256FUNC(Update)(&check->state.sha256, buf, size);
+}
+
+
+static inline void
+lzma_sha256_finish(lzma_check_state *check)
+{
+	LZMA_SHA256FUNC(Final)(check->buffer.u8, &check->state.sha256);
+}
+
+#endif
+
 #endif
diff --git a/src/liblzma/check/sha256.c b/src/liblzma/check/sha256.c
index f2cc0d71ac63..5eede5ce05bc 100644
--- a/src/liblzma/check/sha256.c
+++ b/src/liblzma/check/sha256.c
@@ -21,22 +21,22 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-// Avoid bogus warnings in transform().
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __GNUC__ > 4
-#	pragma GCC diagnostic ignored "-Wuninitialized"
-#endif
-
 #include "check.h"
 
-// At least on x86, GCC is able to optimize this to a rotate instruction.
-#define rotr_32(num, amount) ((num) >> (amount) | (num) << (32 - (amount)))
+// Rotate a uint32_t. GCC can optimize this to a rotate instruction
+// at least on x86.
+static inline uint32_t
+rotr_32(uint32_t num, unsigned amount)
+{
+        return (num >> amount) | (num << (32 - amount));
+}
 
-#define blk0(i) (W[i] = data[i])
+#define blk0(i) (W[i] = conv32be(data[i]))
 #define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \
 		+ s0(W[(i - 15) & 15]))
 
 #define Ch(x, y, z) (z ^ (x & (y ^ z)))
-#define Maj(x, y, z) ((x & y) | (z & (x | y)))
+#define Maj(x, y, z) ((x & (y ^ z)) + (y & z))
 
 #define a(i) T[(0 - i) & 7]
 #define b(i) T[(1 - i) & 7]
@@ -47,16 +47,17 @@
 #define g(i) T[(6 - i) & 7]
 #define h(i) T[(7 - i) & 7]
 
-#define R(i) \
-	h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] \
-		+ (j ? blk2(i) : blk0(i)); \
+#define R(i, j, blk) \
+	h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] + blk; \
 	d(i) += h(i); \
 	h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
+#define R0(i) R(i, 0, blk0(i))
+#define R2(i) R(i, j, blk2(i))
 
-#define S0(x) (rotr_32(x, 2) ^ rotr_32(x, 13) ^ rotr_32(x, 22))
-#define S1(x) (rotr_32(x, 6) ^ rotr_32(x, 11) ^ rotr_32(x, 25))
-#define s0(x) (rotr_32(x, 7) ^ rotr_32(x, 18) ^ (x >> 3))
-#define s1(x) (rotr_32(x, 17) ^ rotr_32(x, 19) ^ (x >> 10))
+#define S0(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 9), 11), 2)
+#define S1(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 14), 5), 6)
+#define s0(x) (rotr_32(x ^ rotr_32(x, 11), 7) ^ (x >> 3))
+#define s1(x) (rotr_32(x ^ rotr_32(x, 2), 17) ^ (x >> 10))
 
 
 static const uint32_t SHA256_K[64] = {
@@ -88,12 +89,18 @@ transform(uint32_t state[8], const uint32_t data[16])
 	// Copy state[] to working vars.
 	memcpy(T, state, sizeof(T));
 
-	// 64 operations, partially loop unrolled
-	for (unsigned int j = 0; j < 64; j += 16) {
-		R( 0); R( 1); R( 2); R( 3);
-		R( 4); R( 5); R( 6); R( 7);
-		R( 8); R( 9); R(10); R(11);
-		R(12); R(13); R(14); R(15);
+	// The first 16 operations unrolled
+	R0( 0); R0( 1); R0( 2); R0( 3);
+	R0( 4); R0( 5); R0( 6); R0( 7);
+	R0( 8); R0( 9); R0(10); R0(11);
+	R0(12); R0(13); R0(14); R0(15);
+
+	// The remaining 48 operations partially unrolled
+	for (unsigned int j = 16; j < 64; j += 16) {
+		R2( 0); R2( 1); R2( 2); R2( 3);
+		R2( 4); R2( 5); R2( 6); R2( 7);
+		R2( 8); R2( 9); R2(10); R2(11);
+		R2(12); R2(13); R2(14); R2(15);
 	}
 
 	// Add the working vars back into state[].
@@ -111,18 +118,7 @@ transform(uint32_t state[8], const uint32_t data[16])
 static void
 process(lzma_check_state *check)
 {
-#ifdef WORDS_BIGENDIAN
 	transform(check->state.sha256.state, check->buffer.u32);
-
-#else
-	uint32_t data[16];
-
-	for (size_t i = 0; i < 16; ++i)
-		data[i] = bswap32(check->buffer.u32[i]);
-
-	transform(check->state.sha256.state, data);
-#endif
-
 	return;
 }
 
diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c
index c25112e6875f..c1360ca1eb7a 100644
--- a/src/liblzma/common/alone_decoder.c
+++ b/src/liblzma/common/alone_decoder.c
@@ -51,7 +51,7 @@ struct lzma_coder_s {
 
 static lzma_ret
 alone_decode(lzma_coder *coder,
-		lzma_allocator *allocator lzma_attribute((__unused__)),
+		const lzma_allocator *allocator lzma_attribute((__unused__)),
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size,
@@ -166,7 +166,7 @@ alone_decode(lzma_coder *coder,
 
 
 static void
-alone_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+alone_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
@@ -193,7 +193,7 @@ alone_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
 
 
 extern lzma_ret
-lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_alone_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		uint64_t memlimit, bool picky)
 {
 	lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator);
diff --git a/src/liblzma/common/alone_decoder.h b/src/liblzma/common/alone_decoder.h
index f666fc3823e1..dfa031aa77dd 100644
--- a/src/liblzma/common/alone_decoder.h
+++ b/src/liblzma/common/alone_decoder.h
@@ -17,7 +17,7 @@
 
 
 extern lzma_ret lzma_alone_decoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		uint64_t memlimit, bool picky);
 
 #endif
diff --git a/src/liblzma/common/alone_encoder.c b/src/liblzma/common/alone_encoder.c
index eb1697e99753..a2bc9eee1fa9 100644
--- a/src/liblzma/common/alone_encoder.c
+++ b/src/liblzma/common/alone_encoder.c
@@ -32,7 +32,7 @@ struct lzma_coder_s {
 
 static lzma_ret
 alone_encode(lzma_coder *coder,
-		lzma_allocator *allocator lzma_attribute((__unused__)),
+		const lzma_allocator *allocator lzma_attribute((__unused__)),
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size,
@@ -65,7 +65,7 @@ alone_encode(lzma_coder *coder,
 
 
 static void
-alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+alone_encoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
@@ -75,7 +75,7 @@ alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 // At least for now, this is not used by any internal function.
 static lzma_ret
-alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_options_lzma *options)
 {
 	lzma_next_coder_init(&alone_encoder_init, next, allocator);
@@ -137,7 +137,7 @@ alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 /*
 extern lzma_ret
-lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_options_alone *options)
 {
 	lzma_next_coder_init(&alone_encoder_init, next, allocator, options);
diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c
index 35c895fd14c2..bf3550701fe6 100644
--- a/src/liblzma/common/auto_decoder.c
+++ b/src/liblzma/common/auto_decoder.c
@@ -30,7 +30,7 @@ struct lzma_coder_s {
 
 
 static lzma_ret
-auto_decode(lzma_coder *coder, lzma_allocator *allocator,
+auto_decode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
@@ -100,7 +100,7 @@ auto_decode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+auto_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
@@ -143,7 +143,7 @@ auto_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
 
 
 static lzma_ret
-auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+auto_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		uint64_t memlimit, uint32_t flags)
 {
 	lzma_next_coder_init(&auto_decoder_init, next, allocator);
diff --git a/src/liblzma/common/block_buffer_decoder.c b/src/liblzma/common/block_buffer_decoder.c
index ff27a11ccfe6..b0ded90ddc3e 100644
--- a/src/liblzma/common/block_buffer_decoder.c
+++ b/src/liblzma/common/block_buffer_decoder.c
@@ -14,7 +14,7 @@
 
 
 extern LZMA_API(lzma_ret)
-lzma_block_buffer_decode(lzma_block *block, lzma_allocator *allocator,
+lzma_block_buffer_decode(lzma_block *block, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
diff --git a/src/liblzma/common/block_buffer_encoder.c b/src/liblzma/common/block_buffer_encoder.c
index 519c6a684d76..39e263aa4765 100644
--- a/src/liblzma/common/block_buffer_encoder.c
+++ b/src/liblzma/common/block_buffer_encoder.c
@@ -10,6 +10,7 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
+#include "block_buffer_encoder.h"
 #include "block_encoder.h"
 #include "filter_encoder.h"
 #include "lzma2_encoder.h"
@@ -28,8 +29,8 @@
 		+ LZMA_CHECK_SIZE_MAX + 3) & ~3)
 
 
-static lzma_vli
-lzma2_bound(lzma_vli uncompressed_size)
+static uint64_t
+lzma2_bound(uint64_t uncompressed_size)
 {
 	// Prevent integer overflow in overhead calculation.
 	if (uncompressed_size > COMPRESSED_SIZE_MAX)
@@ -39,7 +40,7 @@ lzma2_bound(lzma_vli uncompressed_size)
 	// uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX,
 	// multiply by the size of per-chunk header, and add one byte for
 	// the end marker.
-	const lzma_vli overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
+	const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
 				/ LZMA2_CHUNK_MAX)
 			* LZMA2_HEADER_UNCOMPRESSED + 1;
 
@@ -51,30 +52,36 @@ lzma2_bound(lzma_vli uncompressed_size)
 }
 
 
-extern LZMA_API(size_t)
-lzma_block_buffer_bound(size_t uncompressed_size)
+extern uint64_t
+lzma_block_buffer_bound64(uint64_t uncompressed_size)
 {
-	// For now, if the data doesn't compress, we always use uncompressed
-	// chunks of LZMA2. In future we may use Subblock filter too, but
-	// but for simplicity we probably will still use the same bound
-	// calculation even though Subblock filter would have slightly less
-	// overhead.
-	lzma_vli lzma2_size = lzma2_bound(uncompressed_size);
+	// If the data doesn't compress, we always use uncompressed
+	// LZMA2 chunks.
+	uint64_t lzma2_size = lzma2_bound(uncompressed_size);
 	if (lzma2_size == 0)
 		return 0;
 
 	// Take Block Padding into account.
-	lzma2_size = (lzma2_size + 3) & ~LZMA_VLI_C(3);
+	lzma2_size = (lzma2_size + 3) & ~UINT64_C(3);
 
-#if SIZE_MAX < LZMA_VLI_MAX
-	// Catch the possible integer overflow on 32-bit systems. There's no
-	// overflow on 64-bit systems, because lzma2_bound() already takes
+	// No risk of integer overflow because lzma2_bound() already takes
 	// into account the size of the headers in the Block.
-	if (SIZE_MAX - HEADERS_BOUND < lzma2_size)
+	return HEADERS_BOUND + lzma2_size;
+}
+
+
+extern LZMA_API(size_t)
+lzma_block_buffer_bound(size_t uncompressed_size)
+{
+	uint64_t ret = lzma_block_buffer_bound64(uncompressed_size);
+
+#if SIZE_MAX < UINT64_MAX
+	// Catch the possible integer overflow on 32-bit systems.
+	if (ret > SIZE_MAX)
 		return 0;
 #endif
 
-	return HEADERS_BOUND + lzma2_size;
+	return ret;
 }
 
 
@@ -82,9 +89,6 @@ static lzma_ret
 block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
-	// TODO: Figure out if the last filter is LZMA2 or Subblock and use
-	// that filter to encode the uncompressed chunks.
-
 	// Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at
 	// all, but LZMA2 always requires a dictionary, so use the minimum
 	// value to minimize memory usage of the decoder.
@@ -160,16 +164,11 @@ block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
 
 
 static lzma_ret
-block_encode_normal(lzma_block *block, lzma_allocator *allocator,
+block_encode_normal(lzma_block *block, const lzma_allocator *allocator,
 		const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
 	// Find out the size of the Block Header.
-	block->compressed_size = lzma2_bound(in_size);
-	if (block->compressed_size == 0)
-		return LZMA_DATA_ERROR;
-
-	block->uncompressed_size = in_size;
 	return_if_error(lzma_block_header_size(block));
 
 	// Reserve space for the Block Header and skip it for now.
@@ -221,10 +220,11 @@ block_encode_normal(lzma_block *block, lzma_allocator *allocator,
 }
 
 
-extern LZMA_API(lzma_ret)
-lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
+static lzma_ret
+block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
 		const uint8_t *in, size_t in_size,
-		uint8_t *out, size_t *out_pos, size_t out_size)
+		uint8_t *out, size_t *out_pos, size_t out_size,
+		bool try_to_compress)
 {
 	// Validate the arguments.
 	if (block == NULL || (in == NULL && in_size != 0) || out == NULL
@@ -233,11 +233,11 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
 
 	// The contents of the structure may depend on the version so
 	// check the version before validating the contents of *block.
-	if (block->version != 0)
+	if (block->version > 1)
 		return LZMA_OPTIONS_ERROR;
 
 	if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
-			|| block->filters == NULL)
+			|| (try_to_compress && block->filters == NULL))
 		return LZMA_PROG_ERROR;
 
 	if (!lzma_check_is_supported(block->check))
@@ -258,9 +258,19 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
 
 	out_size -= check_size;
 
+	// Initialize block->uncompressed_size and calculate the worst-case
+	// value for block->compressed_size.
+	block->uncompressed_size = in_size;
+	block->compressed_size = lzma2_bound(in_size);
+	if (block->compressed_size == 0)
+		return LZMA_DATA_ERROR;
+
 	// Do the actual compression.
-	const lzma_ret ret = block_encode_normal(block, allocator,
-			in, in_size, out, out_pos, out_size);
+	lzma_ret ret = LZMA_BUF_ERROR;
+	if (try_to_compress)
+		ret = block_encode_normal(block, allocator,
+				in, in_size, out, out_pos, out_size);
+
 	if (ret != LZMA_OK) {
 		// If the error was something else than output buffer
 		// becoming full, return the error now.
@@ -303,3 +313,25 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
 
 	return LZMA_OK;
 }
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
+		const uint8_t *in, size_t in_size,
+		uint8_t *out, size_t *out_pos, size_t out_size)
+{
+	return block_buffer_encode(block, allocator,
+			in, in_size, out, out_pos, out_size, true);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_uncomp_encode(lzma_block *block,
+		const uint8_t *in, size_t in_size,
+		uint8_t *out, size_t *out_pos, size_t out_size)
+{
+	// It won't allocate any memory from heap so no need
+	// for lzma_allocator.
+	return block_buffer_encode(block, NULL,
+			in, in_size, out, out_pos, out_size, false);
+}
diff --git a/src/liblzma/common/block_buffer_encoder.h b/src/liblzma/common/block_buffer_encoder.h
new file mode 100644
index 000000000000..653207f73498
--- /dev/null
+++ b/src/liblzma/common/block_buffer_encoder.h
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       block_buffer_encoder.h
+/// \brief      Single-call .xz Block encoder
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_BLOCK_BUFFER_ENCODER_H
+#define LZMA_BLOCK_BUFFER_ENCODER_H
+
+#include "common.h"
+
+
+/// uint64_t version of lzma_block_buffer_bound(). It is used by
+/// stream_encoder_mt.c. Probably the original lzma_block_buffer_bound()
+/// should have been 64-bit, but fixing it would break the ABI.
+extern uint64_t lzma_block_buffer_bound64(uint64_t uncompressed_size);
+
+#endif
diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c
index a3ce6f49500c..685c3b038fc3 100644
--- a/src/liblzma/common/block_decoder.c
+++ b/src/liblzma/common/block_decoder.c
@@ -45,6 +45,9 @@ struct lzma_coder_s {
 
 	/// Check of the uncompressed data
 	lzma_check_state check;
+
+	/// True if the integrity check won't be calculated and verified.
+	bool ignore_check;
 };
 
 
@@ -71,7 +74,7 @@ is_size_valid(lzma_vli size, lzma_vli reference)
 
 
 static lzma_ret
-block_decode(lzma_coder *coder, lzma_allocator *allocator,
+block_decode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
@@ -97,8 +100,9 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
 					coder->block->uncompressed_size))
 			return LZMA_DATA_ERROR;
 
-		lzma_check_update(&coder->check, coder->block->check,
-				out + out_start, out_used);
+		if (!coder->ignore_check)
+			lzma_check_update(&coder->check, coder->block->check,
+					out + out_start, out_used);
 
 		if (ret != LZMA_STREAM_END)
 			return ret;
@@ -140,7 +144,9 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
 		if (coder->block->check == LZMA_CHECK_NONE)
 			return LZMA_STREAM_END;
 
-		lzma_check_finish(&coder->check, coder->block->check);
+		if (!coder->ignore_check)
+			lzma_check_finish(&coder->check, coder->block->check);
+
 		coder->sequence = SEQ_CHECK;
 
 	// Fall through
@@ -155,7 +161,8 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
 		// Validate the Check only if we support it.
 		// coder->check.buffer may be uninitialized
 		// when the Check ID is not supported.
-		if (lzma_check_is_supported(coder->block->check)
+		if (!coder->ignore_check
+				&& lzma_check_is_supported(coder->block->check)
 				&& memcmp(coder->block->raw_check,
 					coder->check.buffer.u8,
 					check_size) != 0)
@@ -170,7 +177,7 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-block_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+block_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
@@ -179,7 +186,7 @@ block_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 extern lzma_ret
-lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		lzma_block *block)
 {
 	lzma_next_coder_init(&lzma_block_decoder_init, next, allocator);
@@ -224,6 +231,9 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 	next->coder->check_pos = 0;
 	lzma_check_init(&next->coder->check, block->check);
 
+	next->coder->ignore_check = block->version >= 1
+			? block->ignore_check : false;
+
 	// Initialize the filter chain.
 	return lzma_raw_decoder_init(&next->coder->next, allocator,
 			block->filters);
diff --git a/src/liblzma/common/block_decoder.h b/src/liblzma/common/block_decoder.h
index 7da9df63f767..718c5ced886c 100644
--- a/src/liblzma/common/block_decoder.h
+++ b/src/liblzma/common/block_decoder.h
@@ -17,6 +17,6 @@
 
 
 extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, lzma_block *block);
+		const lzma_allocator *allocator, lzma_block *block);
 
 #endif
diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c
index 1eeb502b7f83..def586410d28 100644
--- a/src/liblzma/common/block_encoder.c
+++ b/src/liblzma/common/block_encoder.c
@@ -45,7 +45,7 @@ struct lzma_coder_s {
 
 
 static lzma_ret
-block_encode(lzma_coder *coder, lzma_allocator *allocator,
+block_encode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
@@ -134,7 +134,7 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-block_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+block_encoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
@@ -143,7 +143,7 @@ block_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-block_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
+block_encoder_update(lzma_coder *coder, const lzma_allocator *allocator,
 		const lzma_filter *filters lzma_attribute((__unused__)),
 		const lzma_filter *reversed_filters)
 {
@@ -156,7 +156,7 @@ block_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_block_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		lzma_block *block)
 {
 	lzma_next_coder_init(&lzma_block_encoder_init, next, allocator);
@@ -166,7 +166,7 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 	// The contents of the structure may depend on the version so
 	// check the version first.
-	if (block->version != 0)
+	if (block->version > 1)
 		return LZMA_OPTIONS_ERROR;
 
 	// If the Check ID is not supported, we cannot calculate the check and
diff --git a/src/liblzma/common/block_encoder.h b/src/liblzma/common/block_encoder.h
index b9eff0be2736..bd97c186e503 100644
--- a/src/liblzma/common/block_encoder.h
+++ b/src/liblzma/common/block_encoder.h
@@ -42,6 +42,6 @@
 
 
 extern lzma_ret lzma_block_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, lzma_block *block);
+		const lzma_allocator *allocator, lzma_block *block);
 
 #endif
diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c
index 2c9573ee204c..1dd982f6bd68 100644
--- a/src/liblzma/common/block_header_decoder.c
+++ b/src/liblzma/common/block_header_decoder.c
@@ -15,7 +15,7 @@
 
 
 static void
-free_properties(lzma_block *block, lzma_allocator *allocator)
+free_properties(lzma_block *block, const lzma_allocator *allocator)
 {
 	// Free allocated filter options. The last array member is not
 	// touched after the initialization in the beginning of
@@ -32,7 +32,7 @@ free_properties(lzma_block *block, lzma_allocator *allocator)
 
 extern LZMA_API(lzma_ret)
 lzma_block_header_decode(lzma_block *block,
-		lzma_allocator *allocator, const uint8_t *in)
+		const lzma_allocator *allocator, const uint8_t *in)
 {
 	// NOTE: We consider the header to be corrupt not only when the
 	// CRC32 doesn't match, but also when variable-length integers
@@ -46,8 +46,16 @@ lzma_block_header_decode(lzma_block *block,
 		block->filters[i].options = NULL;
 	}
 
-	// Always zero for now.
-	block->version = 0;
+	// Versions 0 and 1 are supported. If a newer version was specified,
+	// we need to downgrade it.
+	if (block->version > 1)
+		block->version = 1;
+
+	// This isn't a Block Header option, but since the decompressor will
+	// read it if version >= 1, it's better to initialize it here than
+	// to expect the caller to do it since in almost all cases this
+	// should be false.
+	block->ignore_check = false;
 
 	// Validate Block Header Size and Check type. The caller must have
 	// already set these, so it is a programming error if this test fails.
diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c
index 707dd0cb14a2..5c5f5424ae85 100644
--- a/src/liblzma/common/block_header_encoder.c
+++ b/src/liblzma/common/block_header_encoder.c
@@ -17,7 +17,7 @@
 extern LZMA_API(lzma_ret)
 lzma_block_header_size(lzma_block *block)
 {
-	if (block->version != 0)
+	if (block->version > 1)
 		return LZMA_OPTIONS_ERROR;
 
 	// Block Header Size + Block Flags + CRC32.
diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c
index 62c934547c9e..00c7fe8d5196 100644
--- a/src/liblzma/common/block_util.c
+++ b/src/liblzma/common/block_util.c
@@ -51,7 +51,7 @@ lzma_block_unpadded_size(const lzma_block *block)
 	// NOTE: This function is used for validation too, so it is
 	// essential that these checks are always done even if
 	// Compressed Size is unknown.
-	if (block == NULL || block->version != 0
+	if (block == NULL || block->version > 1
 			|| block->header_size < LZMA_BLOCK_HEADER_SIZE_MIN
 			|| block->header_size > LZMA_BLOCK_HEADER_SIZE_MAX
 			|| (block->header_size & 3)
diff --git a/src/liblzma/common/common.c b/src/liblzma/common/common.c
index b9e386027368..28aa2b7142f4 100644
--- a/src/liblzma/common/common.c
+++ b/src/liblzma/common/common.c
@@ -36,7 +36,7 @@ lzma_version_string(void)
 ///////////////////////
 
 extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1)
-lzma_alloc(size_t size, lzma_allocator *allocator)
+lzma_alloc(size_t size, const lzma_allocator *allocator)
 {
 	// Some malloc() variants return NULL if called with size == 0.
 	if (size == 0)
@@ -53,8 +53,29 @@ lzma_alloc(size_t size, lzma_allocator *allocator)
 }
 
 
+extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1)
+lzma_alloc_zero(size_t size, const lzma_allocator *allocator)
+{
+	// Some calloc() variants return NULL if called with size == 0.
+	if (size == 0)
+		size = 1;
+
+	void *ptr;
+
+	if (allocator != NULL && allocator->alloc != NULL) {
+		ptr = allocator->alloc(allocator->opaque, 1, size);
+		if (ptr != NULL)
+			memzero(ptr, size);
+	} else {
+		ptr = calloc(1, size);
+	}
+
+	return ptr;
+}
+
+
 extern void
-lzma_free(void *ptr, lzma_allocator *allocator)
+lzma_free(void *ptr, const lzma_allocator *allocator)
 {
 	if (allocator != NULL && allocator->free != NULL)
 		allocator->free(allocator->opaque, ptr);
@@ -88,7 +109,7 @@ lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos,
 
 
 extern lzma_ret
-lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_next_filter_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	lzma_next_coder_init(filters[0].init, next, allocator);
@@ -99,7 +120,7 @@ lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_next_filter_update(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_next_filter_update(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *reversed_filters)
 {
 	// Check that the application isn't trying to change the Filter ID.
@@ -117,7 +138,7 @@ lzma_next_filter_update(lzma_next_coder *next, lzma_allocator *allocator,
 
 
 extern void
-lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator)
+lzma_next_end(lzma_next_coder *next, const lzma_allocator *allocator)
 {
 	if (next->init != (uintptr_t)(NULL)) {
 		// To avoid tiny end functions that simply call
@@ -156,10 +177,8 @@ lzma_strm_init(lzma_stream *strm)
 		strm->internal->next = LZMA_NEXT_CODER_INIT;
 	}
 
-	strm->internal->supported_actions[LZMA_RUN] = false;
-	strm->internal->supported_actions[LZMA_SYNC_FLUSH] = false;
-	strm->internal->supported_actions[LZMA_FULL_FLUSH] = false;
-	strm->internal->supported_actions[LZMA_FINISH] = false;
+	memzero(strm->internal->supported_actions,
+			sizeof(strm->internal->supported_actions));
 	strm->internal->sequence = ISEQ_RUN;
 	strm->internal->allow_buf_error = false;
 
@@ -178,7 +197,7 @@ lzma_code(lzma_stream *strm, lzma_action action)
 			|| (strm->next_out == NULL && strm->avail_out != 0)
 			|| strm->internal == NULL
 			|| strm->internal->next.code == NULL
-			|| (unsigned int)(action) > LZMA_FINISH
+			|| (unsigned int)(action) > LZMA_ACTION_MAX
 			|| !strm->internal->supported_actions[action])
 		return LZMA_PROG_ERROR;
 
@@ -213,6 +232,10 @@ lzma_code(lzma_stream *strm, lzma_action action)
 		case LZMA_FINISH:
 			strm->internal->sequence = ISEQ_FINISH;
 			break;
+
+		case LZMA_FULL_BARRIER:
+			strm->internal->sequence = ISEQ_FULL_BARRIER;
+			break;
 		}
 
 		break;
@@ -240,6 +263,13 @@ lzma_code(lzma_stream *strm, lzma_action action)
 
 		break;
 
+	case ISEQ_FULL_BARRIER:
+		if (action != LZMA_FULL_BARRIER
+				|| strm->internal->avail_in != strm->avail_in)
+			return LZMA_PROG_ERROR;
+
+		break;
+
 	case ISEQ_END:
 		return LZMA_STREAM_END;
 
@@ -265,7 +295,9 @@ lzma_code(lzma_stream *strm, lzma_action action)
 
 	strm->internal->avail_in = strm->avail_in;
 
-	switch (ret) {
+	// Cast is needed to silence a warning about LZMA_TIMED_OUT, which
+	// isn't part of lzma_ret enumeration.
+	switch ((unsigned int)(ret)) {
 	case LZMA_OK:
 		// Don't return LZMA_BUF_ERROR when it happens the first time.
 		// This is to avoid returning LZMA_BUF_ERROR when avail_out
@@ -281,9 +313,16 @@ lzma_code(lzma_stream *strm, lzma_action action)
 		}
 		break;
 
+	case LZMA_TIMED_OUT:
+		strm->internal->allow_buf_error = false;
+		ret = LZMA_OK;
+		break;
+
 	case LZMA_STREAM_END:
 		if (strm->internal->sequence == ISEQ_SYNC_FLUSH
-				|| strm->internal->sequence == ISEQ_FULL_FLUSH)
+				|| strm->internal->sequence == ISEQ_FULL_FLUSH
+				|| strm->internal->sequence
+					== ISEQ_FULL_BARRIER)
 			strm->internal->sequence = ISEQ_RUN;
 		else
 			strm->internal->sequence = ISEQ_END;
@@ -323,6 +362,22 @@ lzma_end(lzma_stream *strm)
 }
 
 
+extern LZMA_API(void)
+lzma_get_progress(lzma_stream *strm,
+		uint64_t *progress_in, uint64_t *progress_out)
+{
+	if (strm->internal->next.get_progress != NULL) {
+		strm->internal->next.get_progress(strm->internal->next.coder,
+				progress_in, progress_out);
+	} else {
+		*progress_in = strm->total_in;
+		*progress_out = strm->total_out;
+	}
+
+	return;
+}
+
+
 extern LZMA_API(lzma_check)
 lzma_get_check(const lzma_stream *strm)
 {
diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h
index 45aba4f06b27..955d784a5b6a 100644
--- a/src/liblzma/common/common.h
+++ b/src/liblzma/common/common.h
@@ -49,6 +49,13 @@
 #define LZMA_BUFFER_SIZE 4096
 
 
+/// Maximum number of worker threads within one multithreaded component.
+/// The limit exists solely to make it simpler to prevent integer overflows
+/// when allocating structures etc. This should be big enough for now...
+/// the code won't scale anywhere close to this number anyway.
+#define LZMA_THREADS_MAX 16384
+
+
 /// Starting value for memory usage estimates. Instead of calculating size
 /// of _every_ structure and taking into account malloc() overhead etc., we
 /// add a base size to all memory usage estimates. It's not very accurate
@@ -66,9 +73,21 @@
 	( LZMA_TELL_NO_CHECK \
 	| LZMA_TELL_UNSUPPORTED_CHECK \
 	| LZMA_TELL_ANY_CHECK \
+	| LZMA_IGNORE_CHECK \
 	| LZMA_CONCATENATED )
 
 
+/// Largest valid lzma_action value as unsigned integer.
+#define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER))
+
+
+/// Special return value (lzma_ret) to indicate that a timeout was reached
+/// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to
+/// LZMA_OK in lzma_code(). This is not in the lzma_ret enumeration because
+/// there's no need to have it in the public API.
+#define LZMA_TIMED_OUT 32
+
+
 /// Type of encoder/decoder specific data; the actual structure is defined
 /// differently in different coders.
 typedef struct lzma_coder_s lzma_coder;
@@ -80,7 +99,7 @@ typedef struct lzma_filter_info_s lzma_filter_info;
 
 /// Type of a function used to initialize a filter encoder or decoder
 typedef lzma_ret (*lzma_init_function)(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters);
 
 /// Type of a function to do some kind of coding work (filters, Stream,
@@ -88,7 +107,7 @@ typedef lzma_ret (*lzma_init_function)(
 /// input and output buffers, but for simplicity they still use this same
 /// function prototype.
 typedef lzma_ret (*lzma_code_function)(
-		lzma_coder *coder, lzma_allocator *allocator,
+		lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size,
@@ -96,7 +115,7 @@ typedef lzma_ret (*lzma_code_function)(
 
 /// Type of a function to free the memory allocated for the coder
 typedef void (*lzma_end_function)(
-		lzma_coder *coder, lzma_allocator *allocator);
+		lzma_coder *coder, const lzma_allocator *allocator);
 
 
 /// Raw coder validates and converts an array of lzma_filter structures to
@@ -139,6 +158,11 @@ struct lzma_next_coder_s {
 	/// lzma_next_coder.coder.
 	lzma_end_function end;
 
+	/// Pointer to a function to get progress information. If this is NULL,
+	/// lzma_stream.total_in and .total_out are used instead.
+	void (*get_progress)(lzma_coder *coder,
+			uint64_t *progress_in, uint64_t *progress_out);
+
 	/// Pointer to function to return the type of the integrity check.
 	/// Most coders won't support this.
 	lzma_check (*get_check)(const lzma_coder *coder);
@@ -150,7 +174,7 @@ struct lzma_next_coder_s {
 
 	/// Update the filter-specific options or the whole filter chain
 	/// in the encoder.
-	lzma_ret (*update)(lzma_coder *coder, lzma_allocator *allocator,
+	lzma_ret (*update)(lzma_coder *coder, const lzma_allocator *allocator,
 			const lzma_filter *filters,
 			const lzma_filter *reversed_filters);
 };
@@ -164,6 +188,7 @@ struct lzma_next_coder_s {
 		.id = LZMA_VLI_UNKNOWN, \
 		.code = NULL, \
 		.end = NULL, \
+		.get_progress = NULL, \
 		.get_check = NULL, \
 		.memconfig = NULL, \
 		.update = NULL, \
@@ -185,6 +210,7 @@ struct lzma_internal_s {
 		ISEQ_SYNC_FLUSH,
 		ISEQ_FULL_FLUSH,
 		ISEQ_FINISH,
+		ISEQ_FULL_BARRIER,
 		ISEQ_END,
 		ISEQ_ERROR,
 	} sequence;
@@ -195,7 +221,7 @@ struct lzma_internal_s {
 	size_t avail_in;
 
 	/// Indicates which lzma_action values are allowed by next.code.
-	bool supported_actions[4];
+	bool supported_actions[LZMA_ACTION_MAX + 1];
 
 	/// If true, lzma_code will return LZMA_BUF_ERROR if no progress was
 	/// made (no input consumed and no output produced by next.code).
@@ -204,11 +230,17 @@ struct lzma_internal_s {
 
 
 /// Allocates memory
-extern void *lzma_alloc(size_t size, lzma_allocator *allocator)
+extern void *lzma_alloc(size_t size, const lzma_allocator *allocator)
 		lzma_attribute((__malloc__)) lzma_attr_alloc_size(1);
 
+/// Allocates memory and zeroes it (like calloc()). This can be faster
+/// than lzma_alloc() + memzero() while being backward compatible with
+/// custom allocators.
+extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1)
+		lzma_alloc_zero(size_t size, const lzma_allocator *allocator);
+
 /// Frees memory
-extern void lzma_free(void *ptr, lzma_allocator *allocator);
+extern void lzma_free(void *ptr, const lzma_allocator *allocator);
 
 
 /// Allocates strm->internal if it is NULL, and initializes *strm and
@@ -220,17 +252,19 @@ extern lzma_ret lzma_strm_init(lzma_stream *strm);
 /// than the filter being initialized now. This way the actual filter
 /// initialization functions don't need to use lzma_next_coder_init macro.
 extern lzma_ret lzma_next_filter_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 /// Update the next filter in the chain, if any. This checks that
 /// the application is not trying to change the Filter IDs.
 extern lzma_ret lzma_next_filter_update(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *reversed_filters);
 
 /// Frees the memory allocated for next->coder either using next->end or,
 /// if next->end is NULL, using lzma_free.
-extern void lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator);
+extern void lzma_next_end(lzma_next_coder *next,
+		const lzma_allocator *allocator);
 
 
 /// Copy as much data as possible from in[] to out[] and update *in_pos
diff --git a/src/liblzma/common/easy_buffer_encoder.c b/src/liblzma/common/easy_buffer_encoder.c
index c4be34ccfa27..48eb56f5cc91 100644
--- a/src/liblzma/common/easy_buffer_encoder.c
+++ b/src/liblzma/common/easy_buffer_encoder.c
@@ -15,8 +15,8 @@
 
 extern LZMA_API(lzma_ret)
 lzma_easy_buffer_encode(uint32_t preset, lzma_check check,
-		lzma_allocator *allocator, const uint8_t *in, size_t in_size,
-		uint8_t *out, size_t *out_pos, size_t out_size)
+		const lzma_allocator *allocator, const uint8_t *in,
+		size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size)
 {
 	lzma_options_easy opt_easy;
 	if (lzma_easy_preset(&opt_easy, preset))
diff --git a/src/liblzma/common/easy_encoder.c b/src/liblzma/common/easy_encoder.c
index d13ccd7351f1..5cb492dd0681 100644
--- a/src/liblzma/common/easy_encoder.c
+++ b/src/liblzma/common/easy_encoder.c
@@ -11,7 +11,6 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "easy_preset.h"
-#include "stream_encoder.h"
 
 
 extern LZMA_API(lzma_ret)
diff --git a/src/liblzma/common/filter_buffer_decoder.c b/src/liblzma/common/filter_buffer_decoder.c
index 2d35ef8e0af4..6620986eea8a 100644
--- a/src/liblzma/common/filter_buffer_decoder.c
+++ b/src/liblzma/common/filter_buffer_decoder.c
@@ -14,7 +14,8 @@
 
 
 extern LZMA_API(lzma_ret)
-lzma_raw_buffer_decode(const lzma_filter *filters, lzma_allocator *allocator,
+lzma_raw_buffer_decode(
+		const lzma_filter *filters, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
diff --git a/src/liblzma/common/filter_buffer_encoder.c b/src/liblzma/common/filter_buffer_encoder.c
index 646e1b30374e..dda18e3d8e5e 100644
--- a/src/liblzma/common/filter_buffer_encoder.c
+++ b/src/liblzma/common/filter_buffer_encoder.c
@@ -14,9 +14,10 @@
 
 
 extern LZMA_API(lzma_ret)
-lzma_raw_buffer_encode(const lzma_filter *filters, lzma_allocator *allocator,
-		const uint8_t *in, size_t in_size, uint8_t *out,
-		size_t *out_pos, size_t out_size)
+lzma_raw_buffer_encode(
+		const lzma_filter *filters, const lzma_allocator *allocator,
+		const uint8_t *in, size_t in_size,
+		uint8_t *out, size_t *out_pos, size_t out_size)
 {
 	// Validate what isn't validated later in filter_common.c.
 	if ((in == NULL && in_size != 0) || out == NULL
diff --git a/src/liblzma/common/filter_common.c b/src/liblzma/common/filter_common.c
index 7c95b05f2a3f..9ad5d5d8e2af 100644
--- a/src/liblzma/common/filter_common.c
+++ b/src/liblzma/common/filter_common.c
@@ -123,7 +123,7 @@ static const struct {
 
 extern LZMA_API(lzma_ret)
 lzma_filters_copy(const lzma_filter *src, lzma_filter *dest,
-		lzma_allocator *allocator)
+		const lzma_allocator *allocator)
 {
 	if (src == NULL || dest == NULL)
 		return LZMA_PROG_ERROR;
@@ -239,7 +239,7 @@ validate_chain(const lzma_filter *filters, size_t *count)
 
 
 extern lzma_ret
-lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_raw_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *options,
 		lzma_filter_find coder_find, bool is_encoder)
 {
diff --git a/src/liblzma/common/filter_common.h b/src/liblzma/common/filter_common.h
index cd61fc0724f3..42a26a24a47f 100644
--- a/src/liblzma/common/filter_common.h
+++ b/src/liblzma/common/filter_common.h
@@ -36,7 +36,7 @@ typedef const lzma_filter_coder *(*lzma_filter_find)(lzma_vli id);
 
 
 extern lzma_ret lzma_raw_coder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *filters,
 		lzma_filter_find coder_find, bool is_encoder);
 
diff --git a/src/liblzma/common/filter_decoder.c b/src/liblzma/common/filter_decoder.c
index 1ebbe2afef0c..c75b0a89c30f 100644
--- a/src/liblzma/common/filter_decoder.c
+++ b/src/liblzma/common/filter_decoder.c
@@ -35,7 +35,8 @@ typedef struct {
 	/// \return     - LZMA_OK: Properties decoded successfully.
 	///             - LZMA_OPTIONS_ERROR: Unsupported properties
 	///             - LZMA_MEM_ERROR: Memory allocation failed.
-	lzma_ret (*props_decode)(void **options, lzma_allocator *allocator,
+	lzma_ret (*props_decode)(
+			void **options, const lzma_allocator *allocator,
 			const uint8_t *props, size_t props_size);
 
 } lzma_filter_decoder;
@@ -136,7 +137,7 @@ lzma_filter_decoder_is_supported(lzma_vli id)
 
 
 extern lzma_ret
-lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_raw_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *options)
 {
 	return lzma_raw_coder_init(next, allocator,
@@ -165,7 +166,7 @@ lzma_raw_decoder_memusage(const lzma_filter *filters)
 
 
 extern LZMA_API(lzma_ret)
-lzma_properties_decode(lzma_filter *filter, lzma_allocator *allocator,
+lzma_properties_decode(lzma_filter *filter, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size)
 {
 	// Make it always NULL so that the caller can always safely free() it.
diff --git a/src/liblzma/common/filter_decoder.h b/src/liblzma/common/filter_decoder.h
index d5c68bdd4a68..a2e255fe5f06 100644
--- a/src/liblzma/common/filter_decoder.h
+++ b/src/liblzma/common/filter_decoder.h
@@ -17,7 +17,7 @@
 
 
 extern lzma_ret lzma_raw_decoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *options);
 
 #endif
diff --git a/src/liblzma/common/filter_encoder.c b/src/liblzma/common/filter_encoder.c
index 635d81223472..c5d8f39721fe 100644
--- a/src/liblzma/common/filter_encoder.c
+++ b/src/liblzma/common/filter_encoder.c
@@ -30,11 +30,11 @@ typedef struct {
 	/// invalid, UINT64_MAX is returned.
 	uint64_t (*memusage)(const void *options);
 
-	/// Calculates the minimum sane size for Blocks (or other types of
-	/// chunks) to which the input data can be split to make
-	/// multithreaded encoding possible. If this is NULL, it is assumed
-	/// that the encoder is fast enough with single thread.
-	lzma_vli (*chunk_size)(const void *options);
+	/// Calculates the recommended Uncompressed Size for .xz Blocks to
+	/// which the input data can be split to make multithreaded
+	/// encoding possible. If this is NULL, it is assumed that
+	/// the encoder is fast enough with single thread.
+	uint64_t (*block_size)(const void *options);
 
 	/// Tells the size of the Filter Properties field. If options are
 	/// invalid, UINT32_MAX is returned. If this is NULL, props_size_fixed
@@ -59,7 +59,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_LZMA1,
 		.init = &lzma_lzma_encoder_init,
 		.memusage = &lzma_lzma_encoder_memusage,
-		.chunk_size = NULL, // FIXME
+		.block_size = NULL, // FIXME
 		.props_size_get = NULL,
 		.props_size_fixed = 5,
 		.props_encode = &lzma_lzma_props_encode,
@@ -70,7 +70,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_LZMA2,
 		.init = &lzma_lzma2_encoder_init,
 		.memusage = &lzma_lzma2_encoder_memusage,
-		.chunk_size = NULL, // FIXME
+		.block_size = &lzma_lzma2_block_size, // FIXME
 		.props_size_get = NULL,
 		.props_size_fixed = 1,
 		.props_encode = &lzma_lzma2_props_encode,
@@ -81,7 +81,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_X86,
 		.init = &lzma_simple_x86_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -91,7 +91,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_POWERPC,
 		.init = &lzma_simple_powerpc_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -101,7 +101,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_IA64,
 		.init = &lzma_simple_ia64_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -111,7 +111,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_ARM,
 		.init = &lzma_simple_arm_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -121,7 +121,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_ARMTHUMB,
 		.init = &lzma_simple_armthumb_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -131,7 +131,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_SPARC,
 		.init = &lzma_simple_sparc_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -141,7 +141,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_DELTA,
 		.init = &lzma_delta_encoder_init,
 		.memusage = &lzma_delta_coder_memusage,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = NULL,
 		.props_size_fixed = 1,
 		.props_encode = &lzma_delta_props_encode,
@@ -196,7 +196,7 @@ lzma_filters_update(lzma_stream *strm, const lzma_filter *filters)
 
 
 extern lzma_ret
-lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_raw_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *options)
 {
 	return lzma_raw_coder_init(next, allocator,
@@ -226,20 +226,19 @@ lzma_raw_encoder_memusage(const lzma_filter *filters)
 }
 
 
-/*
-extern LZMA_API(lzma_vli)
-lzma_chunk_size(const lzma_filter *filters)
+extern uint64_t
+lzma_mt_block_size(const lzma_filter *filters)
 {
-	lzma_vli max = 0;
+	uint64_t max = 0;
 
 	for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
 		const lzma_filter_encoder *const fe
 				= encoder_find(filters[i].id);
-		if (fe->chunk_size != NULL) {
-			const lzma_vli size
-					= fe->chunk_size(filters[i].options);
-			if (size == LZMA_VLI_UNKNOWN)
-				return LZMA_VLI_UNKNOWN;
+		if (fe->block_size != NULL) {
+			const uint64_t size
+					= fe->block_size(filters[i].options);
+			if (size == 0)
+				return 0;
 
 			if (size > max)
 				max = size;
@@ -248,7 +247,6 @@ lzma_chunk_size(const lzma_filter *filters)
 
 	return max;
 }
-*/
 
 
 extern LZMA_API(lzma_ret)
diff --git a/src/liblzma/common/filter_encoder.h b/src/liblzma/common/filter_encoder.h
index 5bc137f64584..f1d5683fe793 100644
--- a/src/liblzma/common/filter_encoder.h
+++ b/src/liblzma/common/filter_encoder.h
@@ -16,12 +16,12 @@
 #include "common.h"
 
 
-// FIXME: Might become a part of the public API once finished.
-// extern lzma_vli lzma_chunk_size(const lzma_filter *filters);
+// FIXME: Might become a part of the public API.
+extern uint64_t lzma_mt_block_size(const lzma_filter *filters);
 
 
 extern lzma_ret lzma_raw_encoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *filters);
 
 #endif
diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c
index caae10ce79a0..ddfb085943d0 100644
--- a/src/liblzma/common/filter_flags_decoder.c
+++ b/src/liblzma/common/filter_flags_decoder.c
@@ -15,7 +15,7 @@
 
 extern LZMA_API(lzma_ret)
 lzma_filter_flags_decode(
-		lzma_filter *filter, lzma_allocator *allocator,
+		lzma_filter *filter, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size)
 {
 	// Set the pointer to NULL so the caller can always safely free it.
diff --git a/src/liblzma/common/stream_encoder.h b/src/liblzma/common/hardware_cputhreads.c
index 46a7aed72eaa..f468366a6045 100644
--- a/src/liblzma/common/stream_encoder.h
+++ b/src/liblzma/common/hardware_cputhreads.c
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 //
-/// \file       stream_encoder.h
-/// \brief      Encodes .xz Streams
+/// \file       hardware_cputhreads.c
+/// \brief      Get the number of CPU threads or cores
 //
 //  Author:     Lasse Collin
 //
@@ -10,14 +10,13 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef LZMA_STREAM_ENCODER_H
-#define LZMA_STREAM_ENCODER_H
-
 #include "common.h"
 
+#include "tuklib_cpucores.h"
 
-extern lzma_ret lzma_stream_encoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
-		const lzma_filter *filters, lzma_check check);
 
-#endif
+extern LZMA_API(uint32_t)
+lzma_cputhreads(void)
+{
+	return tuklib_cpucores();
+}
diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c
index 9af4bc19d1b5..11f45f4009ea 100644
--- a/src/liblzma/common/index.c
+++ b/src/liblzma/common/index.c
@@ -191,8 +191,8 @@ index_tree_init(index_tree *tree)
 
 /// Helper for index_tree_end()
 static void
-index_tree_node_end(index_tree_node *node, lzma_allocator *allocator,
-		void (*free_func)(void *node, lzma_allocator *allocator))
+index_tree_node_end(index_tree_node *node, const lzma_allocator *allocator,
+		void (*free_func)(void *node, const lzma_allocator *allocator))
 {
 	// The tree won't ever be very huge, so recursion should be fine.
 	// 20 levels in the tree is likely quite a lot already in practice.
@@ -215,8 +215,8 @@ index_tree_node_end(index_tree_node *node, lzma_allocator *allocator,
 /// to free the Record groups from each index_stream before freeing
 /// the index_stream itself.
 static void
-index_tree_end(index_tree *tree, lzma_allocator *allocator,
-		void (*free_func)(void *node, lzma_allocator *allocator))
+index_tree_end(index_tree *tree, const lzma_allocator *allocator,
+		void (*free_func)(void *node, const lzma_allocator *allocator))
 {
 	if (tree->root != NULL)
 		index_tree_node_end(tree->root, allocator, free_func);
@@ -340,7 +340,7 @@ index_tree_locate(const index_tree *tree, lzma_vli target)
 static index_stream *
 index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base,
 		lzma_vli stream_number, lzma_vli block_number_base,
-		lzma_allocator *allocator)
+		const lzma_allocator *allocator)
 {
 	index_stream *s = lzma_alloc(sizeof(index_stream), allocator);
 	if (s == NULL)
@@ -368,7 +368,7 @@ index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base,
 
 /// Free the memory allocated for a Stream and its Record groups.
 static void
-index_stream_end(void *node, lzma_allocator *allocator)
+index_stream_end(void *node, const lzma_allocator *allocator)
 {
 	index_stream *s = node;
 	index_tree_end(&s->groups, allocator, NULL);
@@ -377,7 +377,7 @@ index_stream_end(void *node, lzma_allocator *allocator)
 
 
 static lzma_index *
-index_init_plain(lzma_allocator *allocator)
+index_init_plain(const lzma_allocator *allocator)
 {
 	lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
 	if (i != NULL) {
@@ -395,7 +395,7 @@ index_init_plain(lzma_allocator *allocator)
 
 
 extern LZMA_API(lzma_index *)
-lzma_index_init(lzma_allocator *allocator)
+lzma_index_init(const lzma_allocator *allocator)
 {
 	lzma_index *i = index_init_plain(allocator);
 	if (i == NULL)
@@ -414,7 +414,7 @@ lzma_index_init(lzma_allocator *allocator)
 
 
 extern LZMA_API(void)
-lzma_index_end(lzma_index *i, lzma_allocator *allocator)
+lzma_index_end(lzma_index *i, const lzma_allocator *allocator)
 {
 	// NOTE: If you modify this function, check also the bottom
 	// of lzma_index_cat().
@@ -637,7 +637,7 @@ lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding)
 
 
 extern LZMA_API(lzma_ret)
-lzma_index_append(lzma_index *i, lzma_allocator *allocator,
+lzma_index_append(lzma_index *i, const lzma_allocator *allocator,
 		lzma_vli unpadded_size, lzma_vli uncompressed_size)
 {
 	// Validate.
@@ -765,7 +765,7 @@ index_cat_helper(const index_cat_info *info, index_stream *this)
 
 extern LZMA_API(lzma_ret)
 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
-		lzma_allocator *allocator)
+		const lzma_allocator *allocator)
 {
 	const lzma_vli dest_file_size = lzma_index_file_size(dest);
 
@@ -859,7 +859,7 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
 
 /// Duplicate an index_stream.
 static index_stream *
-index_dup_stream(const index_stream *src, lzma_allocator *allocator)
+index_dup_stream(const index_stream *src, const lzma_allocator *allocator)
 {
 	// Catch a somewhat theoretical integer overflow.
 	if (src->record_count > PREALLOC_MAX)
@@ -919,7 +919,7 @@ index_dup_stream(const index_stream *src, lzma_allocator *allocator)
 
 
 extern LZMA_API(lzma_index *)
-lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
+lzma_index_dup(const lzma_index *src, const lzma_allocator *allocator)
 {
 	// Allocate the base structure (no initial Stream).
 	lzma_index *dest = index_init_plain(allocator);
diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c
index 83c8a3af1db8..795d1834cc58 100644
--- a/src/liblzma/common/index_decoder.c
+++ b/src/liblzma/common/index_decoder.c
@@ -54,7 +54,7 @@ struct lzma_coder_s {
 
 
 static lzma_ret
-index_decode(lzma_coder *coder, lzma_allocator *allocator,
+index_decode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size,
 		uint8_t *restrict out lzma_attribute((__unused__)),
@@ -207,7 +207,7 @@ out:
 
 
 static void
-index_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+index_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_index_end(coder->index, allocator);
 	lzma_free(coder, allocator);
@@ -234,7 +234,7 @@ index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
 
 
 static lzma_ret
-index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator,
+index_decoder_reset(lzma_coder *coder, const lzma_allocator *allocator,
 		lzma_index **i, uint64_t memlimit)
 {
 	// Remember the pointer given by the application. We will set it
@@ -261,7 +261,7 @@ index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static lzma_ret
-index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		lzma_index **i, uint64_t memlimit)
 {
 	lzma_next_coder_init(&index_decoder_init, next, allocator);
@@ -299,8 +299,8 @@ lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
 
 
 extern LZMA_API(lzma_ret)
-lzma_index_buffer_decode(
-		lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator,
+lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
+		const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size)
 {
 	// Sanity checks
diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c
index 45919f094c42..d25ac7d3372b 100644
--- a/src/liblzma/common/index_encoder.c
+++ b/src/liblzma/common/index_encoder.c
@@ -42,7 +42,7 @@ struct lzma_coder_s {
 
 static lzma_ret
 index_encode(lzma_coder *coder,
-		lzma_allocator *allocator lzma_attribute((__unused__)),
+		const lzma_allocator *allocator lzma_attribute((__unused__)),
 		const uint8_t *restrict in lzma_attribute((__unused__)),
 		size_t *restrict in_pos lzma_attribute((__unused__)),
 		size_t in_size lzma_attribute((__unused__)),
@@ -159,7 +159,7 @@ out:
 
 
 static void
-index_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+index_encoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_free(coder, allocator);
 	return;
@@ -181,7 +181,7 @@ index_encoder_reset(lzma_coder *coder, const lzma_index *i)
 
 
 extern lzma_ret
-lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_index_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_index *i)
 {
 	lzma_next_coder_init(&lzma_index_encoder_init, next, allocator);
diff --git a/src/liblzma/common/index_encoder.h b/src/liblzma/common/index_encoder.h
index a13c94dcd077..4d55cd104785 100644
--- a/src/liblzma/common/index_encoder.h
+++ b/src/liblzma/common/index_encoder.h
@@ -17,7 +17,7 @@
 
 
 extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_index *i);
+		const lzma_allocator *allocator, const lzma_index *i);
 
 
 #endif
diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c
index e3e9386ae8e3..d7a0344b76c3 100644
--- a/src/liblzma/common/index_hash.c
+++ b/src/liblzma/common/index_hash.c
@@ -70,7 +70,8 @@ struct lzma_index_hash_s {
 
 
 extern LZMA_API(lzma_index_hash *)
-lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator)
+lzma_index_hash_init(lzma_index_hash *index_hash,
+		const lzma_allocator *allocator)
 {
 	if (index_hash == NULL) {
 		index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
@@ -101,7 +102,8 @@ lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator)
 
 
 extern LZMA_API(void)
-lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator)
+lzma_index_hash_end(lzma_index_hash *index_hash,
+		const lzma_allocator *allocator)
 {
 	lzma_free(index_hash, allocator);
 	return;
diff --git a/src/liblzma/common/memcmplen.h b/src/liblzma/common/memcmplen.h
new file mode 100644
index 000000000000..f66e7cdb66c6
--- /dev/null
+++ b/src/liblzma/common/memcmplen.h
@@ -0,0 +1,170 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       memcmplen.h
+/// \brief      Optimized comparison of two buffers
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_MEMCMPLEN_H
+#define LZMA_MEMCMPLEN_H
+
+#include "common.h"
+
+#ifdef HAVE_IMMINTRIN_H
+#	include <immintrin.h>
+#endif
+
+/// How many extra bytes lzma_memcmplen() may read. This depends on
+/// the method but since it is just a few bytes the biggest possible
+/// value is used here.
+#define LZMA_MEMCMPLEN_EXTRA 16
+
+
+/// Find out how many equal bytes the two buffers have.
+///
+/// \param      buf1    First buffer
+/// \param      buf2    Second buffer
+/// \param      len     How many bytes have already been compared and will
+///                     be assumed to match
+/// \param      limit   How many bytes to compare at most, including the
+///                     already-compared bytes. This must be significantly
+///                     smaller than UINT32_MAX to avoid integer overflows.
+///                     Up to LZMA_MEMCMPLEN_EXTRA bytes may be read past
+///                     the specified limit from both buf1 and buf2.
+///
+/// \return     Number of equal bytes in the buffers is returned.
+///             This is always at least len and at most limit.
+static inline uint32_t lzma_attribute((__always_inline__))
+lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
+		uint32_t len, uint32_t limit)
+{
+	assert(len <= limit);
+	assert(limit <= UINT32_MAX / 2);
+
+#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
+		&& ((TUKLIB_GNUC_REQ(3, 4) && defined(__x86_64__)) \
+			|| (defined(__INTEL_COMPILER) && defined(__x86_64__)) \
+			|| (defined(__INTEL_COMPILER) && defined(_M_X64)) \
+			|| (defined(_MSC_VER) && defined(_M_X64)))
+	// NOTE: This will use 64-bit unaligned access which
+	// TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, but
+	// it's convenient here at least as long as it's x86-64 only.
+	//
+	// I keep this x86-64 only for now since that's where I know this
+	// to be a good method. This may be fine on other 64-bit CPUs too.
+	// On big endian one should use xor instead of subtraction and switch
+	// to __builtin_clzll().
+	while (len < limit) {
+		const uint64_t x = *(const uint64_t *)(buf1 + len)
+				- *(const uint64_t *)(buf2 + len);
+		if (x != 0) {
+#	if defined(_M_X64) // MSVC or Intel C compiler on Windows
+			unsigned long tmp;
+			_BitScanForward64(&tmp, x);
+			len += (uint32_t)tmp >> 3;
+#	else // GCC, clang, or Intel C compiler
+			len += (uint32_t)__builtin_ctzll(x) >> 3;
+#	endif
+			return my_min(len, limit);
+		}
+
+		len += 8;
+	}
+
+	return limit;
+
+#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
+		&& defined(HAVE__MM_MOVEMASK_EPI8) \
+		&& ((defined(__GNUC__) && defined(__SSE2_MATH__)) \
+			|| (defined(__INTEL_COMPILER) && defined(__SSE2__)) \
+			|| (defined(_MSC_VER) && defined(_M_IX86_FP) \
+				&& _M_IX86_FP >= 2))
+	// NOTE: Like above, this will use 128-bit unaligned access which
+	// TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit.
+	//
+	// SSE2 version for 32-bit and 64-bit x86. On x86-64 the above
+	// version is sometimes significantly faster and sometimes
+	// slightly slower than this SSE2 version, so this SSE2
+	// version isn't used on x86-64.
+	while (len < limit) {
+		const uint32_t x = 0xFFFF ^ _mm_movemask_epi8(_mm_cmpeq_epi8(
+			_mm_loadu_si128((const __m128i *)(buf1 + len)),
+			_mm_loadu_si128((const __m128i *)(buf2 + len))));
+
+		if (x != 0) {
+#	if defined(__INTEL_COMPILER)
+			len += _bit_scan_forward(x);
+#	elif defined(_MSC_VER)
+			unsigned long tmp;
+			_BitScanForward(&tmp, x);
+			len += tmp;
+#	else
+			len += __builtin_ctz(x);
+#	endif
+			return my_min(len, limit);
+		}
+
+		len += 16;
+	}
+
+	return limit;
+
+#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && !defined(WORDS_BIGENDIAN)
+	// Generic 32-bit little endian method
+	while (len < limit) {
+		uint32_t x = *(const uint32_t *)(buf1 + len)
+				- *(const uint32_t *)(buf2 + len);
+		if (x != 0) {
+			if ((x & 0xFFFF) == 0) {
+				len += 2;
+				x >>= 16;
+			}
+
+			if ((x & 0xFF) == 0)
+				++len;
+
+			return my_min(len, limit);
+		}
+
+		len += 4;
+	}
+
+	return limit;
+
+#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && defined(WORDS_BIGENDIAN)
+	// Generic 32-bit big endian method
+	while (len < limit) {
+		uint32_t x = *(const uint32_t *)(buf1 + len)
+				^ *(const uint32_t *)(buf2 + len);
+		if (x != 0) {
+			if ((x & 0xFFFF0000) == 0) {
+				len += 2;
+				x <<= 16;
+			}
+
+			if ((x & 0xFF000000) == 0)
+				++len;
+
+			return my_min(len, limit);
+		}
+
+		len += 4;
+	}
+
+	return limit;
+
+#else
+	// Simple portable version that doesn't use unaligned access.
+	while (len < limit && buf1[len] == buf2[len])
+		++len;
+
+	return len;
+#endif
+}
+
+#endif
diff --git a/src/liblzma/common/outqueue.c b/src/liblzma/common/outqueue.c
new file mode 100644
index 000000000000..2dc8a38d1be3
--- /dev/null
+++ b/src/liblzma/common/outqueue.c
@@ -0,0 +1,184 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       outqueue.c
+/// \brief      Output queue handling in multithreaded coding
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "outqueue.h"
+
+
+/// This is to ease integer overflow checking: We may allocate up to
+/// 2 * LZMA_THREADS_MAX buffers and we need some extra memory for other
+/// data structures (that's the second /2).
+#define BUF_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX / 2 / 2)
+
+
+static lzma_ret
+get_options(uint64_t *bufs_alloc_size, uint32_t *bufs_count,
+		uint64_t buf_size_max, uint32_t threads)
+{
+	if (threads > LZMA_THREADS_MAX || buf_size_max > BUF_SIZE_MAX)
+		return LZMA_OPTIONS_ERROR;
+
+	// The number of buffers is twice the number of threads.
+	// This wastes RAM but keeps the threads busy when buffers
+	// finish out of order.
+	//
+	// NOTE: If this is changed, update BUF_SIZE_MAX too.
+	*bufs_count = threads * 2;
+	*bufs_alloc_size = *bufs_count * buf_size_max;
+
+	return LZMA_OK;
+}
+
+
+extern uint64_t
+lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads)
+{
+	uint64_t bufs_alloc_size;
+	uint32_t bufs_count;
+
+	if (get_options(&bufs_alloc_size, &bufs_count, buf_size_max, threads)
+			!= LZMA_OK)
+		return UINT64_MAX;
+
+	return sizeof(lzma_outq) + bufs_count * sizeof(lzma_outbuf)
+			+ bufs_alloc_size;
+}
+
+
+extern lzma_ret
+lzma_outq_init(lzma_outq *outq, const lzma_allocator *allocator,
+		uint64_t buf_size_max, uint32_t threads)
+{
+	uint64_t bufs_alloc_size;
+	uint32_t bufs_count;
+
+	// Set bufs_count and bufs_alloc_size.
+	return_if_error(get_options(&bufs_alloc_size, &bufs_count,
+			buf_size_max, threads));
+
+	// Allocate memory if needed.
+	if (outq->buf_size_max != buf_size_max
+			|| outq->bufs_allocated != bufs_count) {
+		lzma_outq_end(outq, allocator);
+
+#if SIZE_MAX < UINT64_MAX
+		if (bufs_alloc_size > SIZE_MAX)
+			return LZMA_MEM_ERROR;
+#endif
+
+		outq->bufs = lzma_alloc(bufs_count * sizeof(lzma_outbuf),
+				allocator);
+		outq->bufs_mem = lzma_alloc((size_t)(bufs_alloc_size),
+				allocator);
+
+		if (outq->bufs == NULL || outq->bufs_mem == NULL) {
+			lzma_outq_end(outq, allocator);
+			return LZMA_MEM_ERROR;
+		}
+	}
+
+	// Initialize the rest of the main structure. Initialization of
+	// outq->bufs[] is done when they are actually needed.
+	outq->buf_size_max = (size_t)(buf_size_max);
+	outq->bufs_allocated = bufs_count;
+	outq->bufs_pos = 0;
+	outq->bufs_used = 0;
+	outq->read_pos = 0;
+
+	return LZMA_OK;
+}
+
+
+extern void
+lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator)
+{
+	lzma_free(outq->bufs, allocator);
+	outq->bufs = NULL;
+
+	lzma_free(outq->bufs_mem, allocator);
+	outq->bufs_mem = NULL;
+
+	return;
+}
+
+
+extern lzma_outbuf *
+lzma_outq_get_buf(lzma_outq *outq)
+{
+	// Caller must have checked it with lzma_outq_has_buf().
+	assert(outq->bufs_used < outq->bufs_allocated);
+
+	// Initialize the new buffer.
+	lzma_outbuf *buf = &outq->bufs[outq->bufs_pos];
+	buf->buf = outq->bufs_mem + outq->bufs_pos * outq->buf_size_max;
+	buf->size = 0;
+	buf->finished = false;
+
+	// Update the queue state.
+	if (++outq->bufs_pos == outq->bufs_allocated)
+		outq->bufs_pos = 0;
+
+	++outq->bufs_used;
+
+	return buf;
+}
+
+
+extern bool
+lzma_outq_is_readable(const lzma_outq *outq)
+{
+	uint32_t i = outq->bufs_pos - outq->bufs_used;
+	if (outq->bufs_pos < outq->bufs_used)
+		i += outq->bufs_allocated;
+
+	return outq->bufs[i].finished;
+}
+
+
+extern lzma_ret
+lzma_outq_read(lzma_outq *restrict outq, uint8_t *restrict out,
+		size_t *restrict out_pos, size_t out_size,
+		lzma_vli *restrict unpadded_size,
+		lzma_vli *restrict uncompressed_size)
+{
+	// There must be at least one buffer from which to read.
+	if (outq->bufs_used == 0)
+		return LZMA_OK;
+
+	// Get the buffer.
+	uint32_t i = outq->bufs_pos - outq->bufs_used;
+	if (outq->bufs_pos < outq->bufs_used)
+		i += outq->bufs_allocated;
+
+	lzma_outbuf *buf = &outq->bufs[i];
+
+	// If it isn't finished yet, we cannot read from it.
+	if (!buf->finished)
+		return LZMA_OK;
+
+	// Copy from the buffer to output.
+	lzma_bufcpy(buf->buf, &outq->read_pos, buf->size,
+			out, out_pos, out_size);
+
+	// Return if we didn't get all the data from the buffer.
+	if (outq->read_pos < buf->size)
+		return LZMA_OK;
+
+	// The buffer was finished. Tell the caller its size information.
+	*unpadded_size = buf->unpadded_size;
+	*uncompressed_size = buf->uncompressed_size;
+
+	// Free this buffer for further use.
+	--outq->bufs_used;
+	outq->read_pos = 0;
+
+	return LZMA_STREAM_END;
+}
diff --git a/src/liblzma/common/outqueue.h b/src/liblzma/common/outqueue.h
new file mode 100644
index 000000000000..079634de4587
--- /dev/null
+++ b/src/liblzma/common/outqueue.h
@@ -0,0 +1,156 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       outqueue.h
+/// \brief      Output queue handling in multithreaded coding
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+
+/// Output buffer for a single thread
+typedef struct {
+	/// Pointer to the output buffer of lzma_outq.buf_size_max bytes
+	uint8_t *buf;
+
+	/// Amount of data written to buf
+	size_t size;
+
+	/// Additional size information
+	lzma_vli unpadded_size;
+	lzma_vli uncompressed_size;
+
+	/// True when no more data will be written into this buffer.
+	///
+	/// \note       This is read by another thread and thus access
+	///             to this variable needs a mutex.
+	bool finished;
+
+} lzma_outbuf;
+
+
+typedef struct {
+	/// Array of buffers that are used cyclically.
+	lzma_outbuf *bufs;
+
+	/// Memory allocated for all the buffers
+	uint8_t *bufs_mem;
+
+	/// Amount of buffer space available in each buffer
+	size_t buf_size_max;
+
+	/// Number of buffers allocated
+	uint32_t bufs_allocated;
+
+	/// Position in the bufs array. The next buffer to be taken
+	/// into use is bufs[bufs_pos].
+	uint32_t bufs_pos;
+
+	/// Number of buffers in use
+	uint32_t bufs_used;
+
+	/// Position in the buffer in lzma_outq_read()
+	size_t read_pos;
+
+} lzma_outq;
+
+
+/**
+ * \brief       Calculate the memory usage of an output queue
+ *
+ * \return      Approximate memory usage in bytes or UINT64_MAX on error.
+ */
+extern uint64_t lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads);
+
+
+/// \brief      Initialize an output queue
+///
+/// \param      outq            Pointer to an output queue. Before calling
+///                             this function the first time, *outq should
+///                             have been zeroed with memzero() so that this
+///                             function knows that there are no previous
+///                             allocations to free.
+/// \param      allocator       Pointer to allocator or NULL
+/// \param      buf_size_max    Maximum amount of data that a single buffer
+///                             in the queue may need to store.
+/// \param      threads         Number of buffers that may be in use
+///                             concurrently. Note that more than this number
+///                             of buffers will actually get allocated to
+///                             improve performance when buffers finish
+///                             out of order.
+///
+/// \return     - LZMA_OK
+///             - LZMA_MEM_ERROR
+///
+extern lzma_ret lzma_outq_init(
+		lzma_outq *outq, const lzma_allocator *allocator,
+		uint64_t buf_size_max, uint32_t threads);
+
+
+/// \brief      Free the memory associated with the output queue
+extern void lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator);
+
+
+/// \brief      Get a new buffer
+///
+/// lzma_outq_has_buf() must be used to check that there is a buffer
+/// available before calling lzma_outq_get_buf().
+///
+extern lzma_outbuf *lzma_outq_get_buf(lzma_outq *outq);
+
+
+/// \brief      Test if there is data ready to be read
+///
+/// Call to this function must be protected with the same mutex that
+/// is used to protect lzma_outbuf.finished.
+///
+extern bool lzma_outq_is_readable(const lzma_outq *outq);
+
+
+/// \brief      Read finished data
+///
+/// \param      outq            Pointer to an output queue
+/// \param      out             Beginning of the output buffer
+/// \param      out_pos         The next byte will be written to
+///                             out[*out_pos].
+/// \param      out_size        Size of the out buffer; the first byte into
+///                             which no data is written to is out[out_size].
+/// \param      unpadded_size   Unpadded Size from the Block encoder
+/// \param      uncompressed_size Uncompressed Size from the Block encoder
+///
+/// \return     - LZMA: All OK. Either no data was available or the buffer
+///               being read didn't become empty yet.
+///             - LZMA_STREAM_END: The buffer being read was finished.
+///               *unpadded_size and *uncompressed_size were set.
+///
+/// \note       This reads lzma_outbuf.finished variables and thus call
+///             to this function needs to be protected with a mutex.
+///
+extern lzma_ret lzma_outq_read(lzma_outq *restrict outq,
+		uint8_t *restrict out, size_t *restrict out_pos,
+		size_t out_size, lzma_vli *restrict unpadded_size,
+		lzma_vli *restrict uncompressed_size);
+
+
+/// \brief      Test if there is at least one buffer free
+///
+/// This must be used before getting a new buffer with lzma_outq_get_buf().
+///
+static inline bool
+lzma_outq_has_buf(const lzma_outq *outq)
+{
+	return outq->bufs_used < outq->bufs_allocated;
+}
+
+
+/// \brief      Test if the queue is completely empty
+static inline bool
+lzma_outq_is_empty(const lzma_outq *outq)
+{
+	return outq->bufs_used == 0;
+}
diff --git a/src/liblzma/common/stream_buffer_decoder.c b/src/liblzma/common/stream_buffer_decoder.c
index ae753155180d..b9745b5dbe18 100644
--- a/src/liblzma/common/stream_buffer_decoder.c
+++ b/src/liblzma/common/stream_buffer_decoder.c
@@ -15,7 +15,7 @@
 
 extern LZMA_API(lzma_ret)
 lzma_stream_buffer_decode(uint64_t *memlimit, uint32_t flags,
-		lzma_allocator *allocator,
+		const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
diff --git a/src/liblzma/common/stream_buffer_encoder.c b/src/liblzma/common/stream_buffer_encoder.c
index 2450ee2e1fa0..af49554a6b0c 100644
--- a/src/liblzma/common/stream_buffer_encoder.c
+++ b/src/liblzma/common/stream_buffer_encoder.c
@@ -42,7 +42,8 @@ lzma_stream_buffer_bound(size_t uncompressed_size)
 
 extern LZMA_API(lzma_ret)
 lzma_stream_buffer_encode(lzma_filter *filters, lzma_check check,
-		lzma_allocator *allocator, const uint8_t *in, size_t in_size,
+		const lzma_allocator *allocator,
+		const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos_ptr, size_t out_size)
 {
 	// Sanity checks
diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c
index 37ea71edbd71..3ab938c9f142 100644
--- a/src/liblzma/common/stream_decoder.c
+++ b/src/liblzma/common/stream_decoder.c
@@ -57,6 +57,10 @@ struct lzma_coder_s {
 	/// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
 	bool tell_any_check;
 
+	/// If true, we will tell the Block decoder to skip calculating
+	/// and verifying the integrity check.
+	bool ignore_check;
+
 	/// If true, we will decode concatenated Streams that possibly have
 	/// Stream Padding between or after them. LZMA_STREAM_END is returned
 	/// once the application isn't giving us any new input, and we aren't
@@ -80,7 +84,7 @@ struct lzma_coder_s {
 
 
 static lzma_ret
-stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator)
+stream_decoder_reset(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	// Initialize the Index hash used to verify the Index.
 	coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
@@ -96,7 +100,7 @@ stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-stream_decode(lzma_coder *coder, lzma_allocator *allocator,
+stream_decode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
@@ -182,8 +186,8 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
 
 		coder->pos = 0;
 
-		// Version 0 is currently the only possible version.
-		coder->block_options.version = 0;
+		// Version 1 is needed to support the .ignore_check option.
+		coder->block_options.version = 1;
 
 		// Set up a buffer to hold the filter chain. Block Header
 		// decoder will initialize all members of this array so
@@ -195,6 +199,11 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
 		return_if_error(lzma_block_header_decode(&coder->block_options,
 				allocator, coder->buffer));
 
+		// If LZMA_IGNORE_CHECK was used, this flag needs to be set.
+		// It has to be set after lzma_block_header_decode() because
+		// it always resets this to false.
+		coder->block_options.ignore_check = coder->ignore_check;
+
 		// Check the memory usage limit.
 		const uint64_t memusage = lzma_raw_decoder_memusage(filters);
 		lzma_ret ret;
@@ -366,7 +375,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+stream_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->block_decoder, allocator);
 	lzma_index_hash_end(coder->index_hash, allocator);
@@ -401,7 +410,8 @@ stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
 
 
 extern lzma_ret
-lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_stream_decoder_init(
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		uint64_t memlimit, uint32_t flags)
 {
 	lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
@@ -432,6 +442,7 @@ lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 	next->coder->tell_unsupported_check
 			= (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
 	next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
+	next->coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
 	next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
 	next->coder->first_stream = true;
 
diff --git a/src/liblzma/common/stream_decoder.h b/src/liblzma/common/stream_decoder.h
index e54ac28f44a5..c13c6ba12706 100644
--- a/src/liblzma/common/stream_decoder.h
+++ b/src/liblzma/common/stream_decoder.h
@@ -15,7 +15,8 @@
 
 #include "common.h"
 
-extern lzma_ret lzma_stream_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, uint64_t memlimit, uint32_t flags);
+extern lzma_ret lzma_stream_decoder_init(
+		lzma_next_coder *next, const lzma_allocator *allocator,
+		uint64_t memlimit, uint32_t flags);
 
 #endif
diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c
index 97a7a23a8135..a7663bc48db3 100644
--- a/src/liblzma/common/stream_encoder.c
+++ b/src/liblzma/common/stream_encoder.c
@@ -10,7 +10,6 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-#include "stream_encoder.h"
 #include "block_encoder.h"
 #include "index_encoder.h"
 
@@ -26,7 +25,7 @@ struct lzma_coder_s {
 	} sequence;
 
 	/// True if Block encoder has been initialized by
-	/// lzma_stream_encoder_init() or stream_encoder_update()
+	/// stream_encoder_init() or stream_encoder_update()
 	/// and thus doesn't need to be initialized in stream_encode().
 	bool block_encoder_is_initialized;
 
@@ -60,7 +59,7 @@ struct lzma_coder_s {
 
 
 static lzma_ret
-block_encoder_init(lzma_coder *coder, lzma_allocator *allocator)
+block_encoder_init(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	// Prepare the Block options. Even though Block encoder doesn't need
 	// compressed_size, uncompressed_size, and header_size to be
@@ -79,7 +78,7 @@ block_encoder_init(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-stream_encode(lzma_coder *coder, lzma_allocator *allocator,
+stream_encode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
@@ -126,7 +125,7 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator,
 		}
 
 		// Initialize the Block encoder unless it was already
-		// initialized by lzma_stream_encoder_init() or
+		// initialized by stream_encoder_init() or
 		// stream_encoder_update().
 		if (!coder->block_encoder_is_initialized)
 			return_if_error(block_encoder_init(coder, allocator));
@@ -147,11 +146,12 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator,
 	}
 
 	case SEQ_BLOCK_ENCODE: {
-		static const lzma_action convert[4] = {
+		static const lzma_action convert[LZMA_ACTION_MAX + 1] = {
 			LZMA_RUN,
 			LZMA_SYNC_FLUSH,
 			LZMA_FINISH,
 			LZMA_FINISH,
+			LZMA_FINISH,
 		};
 
 		const lzma_ret ret = coder->block_encoder.code(
@@ -209,7 +209,7 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+stream_encoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->block_encoder, allocator);
 	lzma_next_end(&coder->index_encoder, allocator);
@@ -224,7 +224,7 @@ stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-stream_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
+stream_encoder_update(lzma_coder *coder, const lzma_allocator *allocator,
 		const lzma_filter *filters,
 		const lzma_filter *reversed_filters)
 {
@@ -262,11 +262,11 @@ stream_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
 }
 
 
-extern lzma_ret
-lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+static lzma_ret
+stream_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *filters, lzma_check check)
 {
-	lzma_next_coder_init(&lzma_stream_encoder_init, next, allocator);
+	lzma_next_coder_init(&stream_encoder_init, next, allocator);
 
 	if (filters == NULL)
 		return LZMA_PROG_ERROR;
@@ -320,11 +320,12 @@ extern LZMA_API(lzma_ret)
 lzma_stream_encoder(lzma_stream *strm,
 		const lzma_filter *filters, lzma_check check)
 {
-	lzma_next_strm_init(lzma_stream_encoder_init, strm, filters, check);
+	lzma_next_strm_init(stream_encoder_init, strm, filters, check);
 
 	strm->internal->supported_actions[LZMA_RUN] = true;
 	strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
 	strm->internal->supported_actions[LZMA_FULL_FLUSH] = true;
+	strm->internal->supported_actions[LZMA_FULL_BARRIER] = true;
 	strm->internal->supported_actions[LZMA_FINISH] = true;
 
 	return LZMA_OK;
diff --git a/src/liblzma/common/stream_encoder_mt.c b/src/liblzma/common/stream_encoder_mt.c
new file mode 100644
index 000000000000..9780ed04cac4
--- /dev/null
+++ b/src/liblzma/common/stream_encoder_mt.c
@@ -0,0 +1,1131 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       stream_encoder_mt.c
+/// \brief      Multithreaded .xz Stream encoder
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_encoder.h"
+#include "easy_preset.h"
+#include "block_encoder.h"
+#include "block_buffer_encoder.h"
+#include "index_encoder.h"
+#include "outqueue.h"
+
+
+/// Maximum supported block size. This makes it simpler to prevent integer
+/// overflows if we are given unusually large block size.
+#define BLOCK_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX)
+
+
+typedef enum {
+	/// Waiting for work.
+	THR_IDLE,
+
+	/// Encoding is in progress.
+	THR_RUN,
+
+	/// Encoding is in progress but no more input data will
+	/// be read.
+	THR_FINISH,
+
+	/// The main thread wants the thread to stop whatever it was doing
+	/// but not exit.
+	THR_STOP,
+
+	/// The main thread wants the thread to exit. We could use
+	/// cancellation but since there's stopped anyway, this is lazier.
+	THR_EXIT,
+
+} worker_state;
+
+
+typedef struct worker_thread_s worker_thread;
+struct worker_thread_s {
+	worker_state state;
+
+	/// Input buffer of coder->block_size bytes. The main thread will
+	/// put new input into this and update in_size accordingly. Once
+	/// no more input is coming, state will be set to THR_FINISH.
+	uint8_t *in;
+
+	/// Amount of data available in the input buffer. This is modified
+	/// only by the main thread.
+	size_t in_size;
+
+	/// Output buffer for this thread. This is set by the main
+	/// thread every time a new Block is started with this thread
+	/// structure.
+	lzma_outbuf *outbuf;
+
+	/// Pointer to the main structure is needed when putting this
+	/// thread back to the stack of free threads.
+	lzma_coder *coder;
+
+	/// The allocator is set by the main thread. Since a copy of the
+	/// pointer is kept here, the application must not change the
+	/// allocator before calling lzma_end().
+	const lzma_allocator *allocator;
+
+	/// Amount of uncompressed data that has already been compressed.
+	uint64_t progress_in;
+
+	/// Amount of compressed data that is ready.
+	uint64_t progress_out;
+
+	/// Block encoder
+	lzma_next_coder block_encoder;
+
+	/// Compression options for this Block
+	lzma_block block_options;
+
+	/// Next structure in the stack of free worker threads.
+	worker_thread *next;
+
+	mythread_mutex mutex;
+	mythread_cond cond;
+
+	/// The ID of this thread is used to join the thread
+	/// when it's not needed anymore.
+	mythread thread_id;
+};
+
+
+struct lzma_coder_s {
+	enum {
+		SEQ_STREAM_HEADER,
+		SEQ_BLOCK,
+		SEQ_INDEX,
+		SEQ_STREAM_FOOTER,
+	} sequence;
+
+	/// Start a new Block every block_size bytes of input unless
+	/// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier.
+	size_t block_size;
+
+	/// The filter chain currently in use
+	lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+
+	/// Index to hold sizes of the Blocks
+	lzma_index *index;
+
+	/// Index encoder
+	lzma_next_coder index_encoder;
+
+
+	/// Stream Flags for encoding the Stream Header and Stream Footer.
+	lzma_stream_flags stream_flags;
+
+	/// Buffer to hold Stream Header and Stream Footer.
+	uint8_t header[LZMA_STREAM_HEADER_SIZE];
+
+	/// Read position in header[]
+	size_t header_pos;
+
+
+	/// Output buffer queue for compressed data
+	lzma_outq outq;
+
+
+	/// Maximum wait time if cannot use all the input and cannot
+	/// fill the output buffer. This is in milliseconds.
+	uint32_t timeout;
+
+
+	/// Error code from a worker thread
+	lzma_ret thread_error;
+
+	/// Array of allocated thread-specific structures
+	worker_thread *threads;
+
+	/// Number of structures in "threads" above. This is also the
+	/// number of threads that will be created at maximum.
+	uint32_t threads_max;
+
+	/// Number of thread structures that have been initialized, and
+	/// thus the number of worker threads actually created so far.
+	uint32_t threads_initialized;
+
+	/// Stack of free threads. When a thread finishes, it puts itself
+	/// back into this stack. This starts as empty because threads
+	/// are created only when actually needed.
+	worker_thread *threads_free;
+
+	/// The most recent worker thread to which the main thread writes
+	/// the new input from the application.
+	worker_thread *thr;
+
+
+	/// Amount of uncompressed data in Blocks that have already
+	/// been finished.
+	uint64_t progress_in;
+
+	/// Amount of compressed data in Stream Header + Blocks that
+	/// have already been finished.
+	uint64_t progress_out;
+
+
+	mythread_mutex mutex;
+	mythread_cond cond;
+};
+
+
+/// Tell the main thread that something has gone wrong.
+static void
+worker_error(worker_thread *thr, lzma_ret ret)
+{
+	assert(ret != LZMA_OK);
+	assert(ret != LZMA_STREAM_END);
+
+	mythread_sync(thr->coder->mutex) {
+		if (thr->coder->thread_error == LZMA_OK)
+			thr->coder->thread_error = ret;
+
+		mythread_cond_signal(&thr->coder->cond);
+	}
+
+	return;
+}
+
+
+static worker_state
+worker_encode(worker_thread *thr, worker_state state)
+{
+	assert(thr->progress_in == 0);
+	assert(thr->progress_out == 0);
+
+	// Set the Block options.
+	thr->block_options = (lzma_block){
+		.version = 0,
+		.check = thr->coder->stream_flags.check,
+		.compressed_size = thr->coder->outq.buf_size_max,
+		.uncompressed_size = thr->coder->block_size,
+
+		// TODO: To allow changing the filter chain, the filters
+		// array must be copied to each worker_thread.
+		.filters = thr->coder->filters,
+	};
+
+	// Calculate maximum size of the Block Header. This amount is
+	// reserved in the beginning of the buffer so that Block Header
+	// along with Compressed Size and Uncompressed Size can be
+	// written there.
+	lzma_ret ret = lzma_block_header_size(&thr->block_options);
+	if (ret != LZMA_OK) {
+		worker_error(thr, ret);
+		return THR_STOP;
+	}
+
+	// Initialize the Block encoder.
+	ret = lzma_block_encoder_init(&thr->block_encoder,
+			thr->allocator, &thr->block_options);
+	if (ret != LZMA_OK) {
+		worker_error(thr, ret);
+		return THR_STOP;
+	}
+
+	size_t in_pos = 0;
+	size_t in_size = 0;
+
+	thr->outbuf->size = thr->block_options.header_size;
+	const size_t out_size = thr->coder->outq.buf_size_max;
+
+	do {
+		mythread_sync(thr->mutex) {
+			// Store in_pos and out_pos into *thr so that
+			// an application may read them via
+			// lzma_get_progress() to get progress information.
+			//
+			// NOTE: These aren't updated when the encoding
+			// finishes. Instead, the final values are taken
+			// later from thr->outbuf.
+			thr->progress_in = in_pos;
+			thr->progress_out = thr->outbuf->size;
+
+			while (in_size == thr->in_size
+					&& thr->state == THR_RUN)
+				mythread_cond_wait(&thr->cond, &thr->mutex);
+
+			state = thr->state;
+			in_size = thr->in_size;
+		}
+
+		// Return if we were asked to stop or exit.
+		if (state >= THR_STOP)
+			return state;
+
+		lzma_action action = state == THR_FINISH
+				? LZMA_FINISH : LZMA_RUN;
+
+		// Limit the amount of input given to the Block encoder
+		// at once. This way this thread can react fairly quickly
+		// if the main thread wants us to stop or exit.
+		static const size_t in_chunk_max = 16384;
+		size_t in_limit = in_size;
+		if (in_size - in_pos > in_chunk_max) {
+			in_limit = in_pos + in_chunk_max;
+			action = LZMA_RUN;
+		}
+
+		ret = thr->block_encoder.code(
+				thr->block_encoder.coder, thr->allocator,
+				thr->in, &in_pos, in_limit, thr->outbuf->buf,
+				&thr->outbuf->size, out_size, action);
+	} while (ret == LZMA_OK && thr->outbuf->size < out_size);
+
+	switch (ret) {
+	case LZMA_STREAM_END:
+		assert(state == THR_FINISH);
+
+		// Encode the Block Header. By doing it after
+		// the compression, we can store the Compressed Size
+		// and Uncompressed Size fields.
+		ret = lzma_block_header_encode(&thr->block_options,
+				thr->outbuf->buf);
+		if (ret != LZMA_OK) {
+			worker_error(thr, ret);
+			return THR_STOP;
+		}
+
+		break;
+
+	case LZMA_OK:
+		// The data was incompressible. Encode it using uncompressed
+		// LZMA2 chunks.
+		//
+		// First wait that we have gotten all the input.
+		mythread_sync(thr->mutex) {
+			while (thr->state == THR_RUN)
+				mythread_cond_wait(&thr->cond, &thr->mutex);
+
+			state = thr->state;
+			in_size = thr->in_size;
+		}
+
+		if (state >= THR_STOP)
+			return state;
+
+		// Do the encoding. This takes care of the Block Header too.
+		thr->outbuf->size = 0;
+		ret = lzma_block_uncomp_encode(&thr->block_options,
+				thr->in, in_size, thr->outbuf->buf,
+				&thr->outbuf->size, out_size);
+
+		// It shouldn't fail.
+		if (ret != LZMA_OK) {
+			worker_error(thr, LZMA_PROG_ERROR);
+			return THR_STOP;
+		}
+
+		break;
+
+	default:
+		worker_error(thr, ret);
+		return THR_STOP;
+	}
+
+	// Set the size information that will be read by the main thread
+	// to write the Index field.
+	thr->outbuf->unpadded_size
+			= lzma_block_unpadded_size(&thr->block_options);
+	assert(thr->outbuf->unpadded_size != 0);
+	thr->outbuf->uncompressed_size = thr->block_options.uncompressed_size;
+
+	return THR_FINISH;
+}
+
+
+static MYTHREAD_RET_TYPE
+worker_start(void *thr_ptr)
+{
+	worker_thread *thr = thr_ptr;
+	worker_state state = THR_IDLE; // Init to silence a warning
+
+	while (true) {
+		// Wait for work.
+		mythread_sync(thr->mutex) {
+			while (true) {
+				// The thread is already idle so if we are
+				// requested to stop, just set the state.
+				if (thr->state == THR_STOP) {
+					thr->state = THR_IDLE;
+					mythread_cond_signal(&thr->cond);
+				}
+
+				state = thr->state;
+				if (state != THR_IDLE)
+					break;
+
+				mythread_cond_wait(&thr->cond, &thr->mutex);
+			}
+		}
+
+		assert(state != THR_IDLE);
+		assert(state != THR_STOP);
+
+		if (state <= THR_FINISH)
+			state = worker_encode(thr, state);
+
+		if (state == THR_EXIT)
+			break;
+
+		// Mark the thread as idle unless the main thread has
+		// told us to exit. Signal is needed for the case
+		// where the main thread is waiting for the threads to stop.
+		mythread_sync(thr->mutex) {
+			if (thr->state != THR_EXIT) {
+				thr->state = THR_IDLE;
+				mythread_cond_signal(&thr->cond);
+			}
+		}
+
+		mythread_sync(thr->coder->mutex) {
+			// Mark the output buffer as finished if
+			// no errors occurred.
+			thr->outbuf->finished = state == THR_FINISH;
+
+			// Update the main progress info.
+			thr->coder->progress_in
+					+= thr->outbuf->uncompressed_size;
+			thr->coder->progress_out += thr->outbuf->size;
+			thr->progress_in = 0;
+			thr->progress_out = 0;
+
+			// Return this thread to the stack of free threads.
+			thr->next = thr->coder->threads_free;
+			thr->coder->threads_free = thr;
+
+			mythread_cond_signal(&thr->coder->cond);
+		}
+	}
+
+	// Exiting, free the resources.
+	mythread_mutex_destroy(&thr->mutex);
+	mythread_cond_destroy(&thr->cond);
+
+	lzma_next_end(&thr->block_encoder, thr->allocator);
+	lzma_free(thr->in, thr->allocator);
+	return MYTHREAD_RET_VALUE;
+}
+
+
+/// Make the threads stop but not exit. Optionally wait for them to stop.
+static void
+threads_stop(lzma_coder *coder, bool wait_for_threads)
+{
+	// Tell the threads to stop.
+	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+		mythread_sync(coder->threads[i].mutex) {
+			coder->threads[i].state = THR_STOP;
+			mythread_cond_signal(&coder->threads[i].cond);
+		}
+	}
+
+	if (!wait_for_threads)
+		return;
+
+	// Wait for the threads to settle in the idle state.
+	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+		mythread_sync(coder->threads[i].mutex) {
+			while (coder->threads[i].state != THR_IDLE)
+				mythread_cond_wait(&coder->threads[i].cond,
+						&coder->threads[i].mutex);
+		}
+	}
+
+	return;
+}
+
+
+/// Stop the threads and free the resources associated with them.
+/// Wait until the threads have exited.
+static void
+threads_end(lzma_coder *coder, const lzma_allocator *allocator)
+{
+	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+		mythread_sync(coder->threads[i].mutex) {
+			coder->threads[i].state = THR_EXIT;
+			mythread_cond_signal(&coder->threads[i].cond);
+		}
+	}
+
+	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+		int ret = mythread_join(coder->threads[i].thread_id);
+		assert(ret == 0);
+		(void)ret;
+	}
+
+	lzma_free(coder->threads, allocator);
+	return;
+}
+
+
+/// Initialize a new worker_thread structure and create a new thread.
+static lzma_ret
+initialize_new_thread(lzma_coder *coder, const lzma_allocator *allocator)
+{
+	worker_thread *thr = &coder->threads[coder->threads_initialized];
+
+	thr->in = lzma_alloc(coder->block_size, allocator);
+	if (thr->in == NULL)
+		return LZMA_MEM_ERROR;
+
+	if (mythread_mutex_init(&thr->mutex))
+		goto error_mutex;
+
+	if (mythread_cond_init(&thr->cond))
+		goto error_cond;
+
+	thr->state = THR_IDLE;
+	thr->allocator = allocator;
+	thr->coder = coder;
+	thr->progress_in = 0;
+	thr->progress_out = 0;
+	thr->block_encoder = LZMA_NEXT_CODER_INIT;
+
+	if (mythread_create(&thr->thread_id, &worker_start, thr))
+		goto error_thread;
+
+	++coder->threads_initialized;
+	coder->thr = thr;
+
+	return LZMA_OK;
+
+error_thread:
+	mythread_cond_destroy(&thr->cond);
+
+error_cond:
+	mythread_mutex_destroy(&thr->mutex);
+
+error_mutex:
+	lzma_free(thr->in, allocator);
+	return LZMA_MEM_ERROR;
+}
+
+
+static lzma_ret
+get_thread(lzma_coder *coder, const lzma_allocator *allocator)
+{
+	// If there are no free output subqueues, there is no
+	// point to try getting a thread.
+	if (!lzma_outq_has_buf(&coder->outq))
+		return LZMA_OK;
+
+	// If there is a free structure on the stack, use it.
+	mythread_sync(coder->mutex) {
+		if (coder->threads_free != NULL) {
+			coder->thr = coder->threads_free;
+			coder->threads_free = coder->threads_free->next;
+		}
+	}
+
+	if (coder->thr == NULL) {
+		// If there are no uninitialized structures left, return.
+		if (coder->threads_initialized == coder->threads_max)
+			return LZMA_OK;
+
+		// Initialize a new thread.
+		return_if_error(initialize_new_thread(coder, allocator));
+	}
+
+	// Reset the parts of the thread state that have to be done
+	// in the main thread.
+	mythread_sync(coder->thr->mutex) {
+		coder->thr->state = THR_RUN;
+		coder->thr->in_size = 0;
+		coder->thr->outbuf = lzma_outq_get_buf(&coder->outq);
+		mythread_cond_signal(&coder->thr->cond);
+	}
+
+	return LZMA_OK;
+}
+
+
+static lzma_ret
+stream_encode_in(lzma_coder *coder, const lzma_allocator *allocator,
+		const uint8_t *restrict in, size_t *restrict in_pos,
+		size_t in_size, lzma_action action)
+{
+	while (*in_pos < in_size
+			|| (coder->thr != NULL && action != LZMA_RUN)) {
+		if (coder->thr == NULL) {
+			// Get a new thread.
+			const lzma_ret ret = get_thread(coder, allocator);
+			if (coder->thr == NULL)
+				return ret;
+		}
+
+		// Copy the input data to thread's buffer.
+		size_t thr_in_size = coder->thr->in_size;
+		lzma_bufcpy(in, in_pos, in_size, coder->thr->in,
+				&thr_in_size, coder->block_size);
+
+		// Tell the Block encoder to finish if
+		//  - it has got block_size bytes of input; or
+		//  - all input was used and LZMA_FINISH, LZMA_FULL_FLUSH,
+		//    or LZMA_FULL_BARRIER was used.
+		//
+		// TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER.
+		const bool finish = thr_in_size == coder->block_size
+				|| (*in_pos == in_size && action != LZMA_RUN);
+
+		bool block_error = false;
+
+		mythread_sync(coder->thr->mutex) {
+			if (coder->thr->state == THR_IDLE) {
+				// Something has gone wrong with the Block
+				// encoder. It has set coder->thread_error
+				// which we will read a few lines later.
+				block_error = true;
+			} else {
+				// Tell the Block encoder its new amount
+				// of input and update the state if needed.
+				coder->thr->in_size = thr_in_size;
+
+				if (finish)
+					coder->thr->state = THR_FINISH;
+
+				mythread_cond_signal(&coder->thr->cond);
+			}
+		}
+
+		if (block_error) {
+			lzma_ret ret;
+
+			mythread_sync(coder->mutex) {
+				ret = coder->thread_error;
+			}
+
+			return ret;
+		}
+
+		if (finish)
+			coder->thr = NULL;
+	}
+
+	return LZMA_OK;
+}
+
+
+/// Wait until more input can be consumed, more output can be read, or
+/// an optional timeout is reached.
+static bool
+wait_for_work(lzma_coder *coder, mythread_condtime *wait_abs,
+		bool *has_blocked, bool has_input)
+{
+	if (coder->timeout != 0 && !*has_blocked) {
+		// Every time when stream_encode_mt() is called via
+		// lzma_code(), *has_blocked starts as false. We set it
+		// to true here and calculate the absolute time when
+		// we must return if there's nothing to do.
+		//
+		// The idea of *has_blocked is to avoid unneeded calls
+		// to mythread_condtime_set(), which may do a syscall
+		// depending on the operating system.
+		*has_blocked = true;
+		mythread_condtime_set(wait_abs, &coder->cond, coder->timeout);
+	}
+
+	bool timed_out = false;
+
+	mythread_sync(coder->mutex) {
+		// There are four things that we wait. If one of them
+		// becomes possible, we return.
+		//  - If there is input left, we need to get a free
+		//    worker thread and an output buffer for it.
+		//  - Data ready to be read from the output queue.
+		//  - A worker thread indicates an error.
+		//  - Time out occurs.
+		while ((!has_input || coder->threads_free == NULL
+					|| !lzma_outq_has_buf(&coder->outq))
+				&& !lzma_outq_is_readable(&coder->outq)
+				&& coder->thread_error == LZMA_OK
+				&& !timed_out) {
+			if (coder->timeout != 0)
+				timed_out = mythread_cond_timedwait(
+						&coder->cond, &coder->mutex,
+						wait_abs) != 0;
+			else
+				mythread_cond_wait(&coder->cond,
+						&coder->mutex);
+		}
+	}
+
+	return timed_out;
+}
+
+
+static lzma_ret
+stream_encode_mt(lzma_coder *coder, const lzma_allocator *allocator,
+		const uint8_t *restrict in, size_t *restrict in_pos,
+		size_t in_size, uint8_t *restrict out,
+		size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+	switch (coder->sequence) {
+	case SEQ_STREAM_HEADER:
+		lzma_bufcpy(coder->header, &coder->header_pos,
+				sizeof(coder->header),
+				out, out_pos, out_size);
+		if (coder->header_pos < sizeof(coder->header))
+			return LZMA_OK;
+
+		coder->header_pos = 0;
+		coder->sequence = SEQ_BLOCK;
+
+	// Fall through
+
+	case SEQ_BLOCK: {
+		// Initialized to silence warnings.
+		lzma_vli unpadded_size = 0;
+		lzma_vli uncompressed_size = 0;
+		lzma_ret ret = LZMA_OK;
+
+		// These are for wait_for_work().
+		bool has_blocked = false;
+		mythread_condtime wait_abs;
+
+		while (true) {
+			mythread_sync(coder->mutex) {
+				// Check for Block encoder errors.
+				ret = coder->thread_error;
+				if (ret != LZMA_OK) {
+					assert(ret != LZMA_STREAM_END);
+					break;
+				}
+
+				// Try to read compressed data to out[].
+				ret = lzma_outq_read(&coder->outq,
+						out, out_pos, out_size,
+						&unpadded_size,
+						&uncompressed_size);
+			}
+
+			if (ret == LZMA_STREAM_END) {
+				// End of Block. Add it to the Index.
+				ret = lzma_index_append(coder->index,
+						allocator, unpadded_size,
+						uncompressed_size);
+
+				// If we didn't fill the output buffer yet,
+				// try to read more data. Maybe the next
+				// outbuf has been finished already too.
+				if (*out_pos < out_size)
+					continue;
+			}
+
+			if (ret != LZMA_OK) {
+				// coder->thread_error was set or
+				// lzma_index_append() failed.
+				threads_stop(coder, false);
+				return ret;
+			}
+
+			// Try to give uncompressed data to a worker thread.
+			ret = stream_encode_in(coder, allocator,
+					in, in_pos, in_size, action);
+			if (ret != LZMA_OK) {
+				threads_stop(coder, false);
+				return ret;
+			}
+
+			// See if we should wait or return.
+			//
+			// TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER.
+			if (*in_pos == in_size) {
+				// LZMA_RUN: More data is probably coming
+				// so return to let the caller fill the
+				// input buffer.
+				if (action == LZMA_RUN)
+					return LZMA_OK;
+
+				// LZMA_FULL_BARRIER: The same as with
+				// LZMA_RUN but tell the caller that the
+				// barrier was completed.
+				if (action == LZMA_FULL_BARRIER)
+					return LZMA_STREAM_END;
+
+				// Finishing or flushing isn't completed until
+				// all input data has been encoded and copied
+				// to the output buffer.
+				if (lzma_outq_is_empty(&coder->outq)) {
+					// LZMA_FINISH: Continue to encode
+					// the Index field.
+					if (action == LZMA_FINISH)
+						break;
+
+					// LZMA_FULL_FLUSH: Return to tell
+					// the caller that flushing was
+					// completed.
+					if (action == LZMA_FULL_FLUSH)
+						return LZMA_STREAM_END;
+				}
+			}
+
+			// Return if there is no output space left.
+			// This check must be done after testing the input
+			// buffer, because we might want to use a different
+			// return code.
+			if (*out_pos == out_size)
+				return LZMA_OK;
+
+			// Neither in nor out has been used completely.
+			// Wait until there's something we can do.
+			if (wait_for_work(coder, &wait_abs, &has_blocked,
+					*in_pos < in_size))
+				return LZMA_TIMED_OUT;
+		}
+
+		// All Blocks have been encoded and the threads have stopped.
+		// Prepare to encode the Index field.
+		return_if_error(lzma_index_encoder_init(
+				&coder->index_encoder, allocator,
+				coder->index));
+		coder->sequence = SEQ_INDEX;
+
+		// Update the progress info to take the Index and
+		// Stream Footer into account. Those are very fast to encode
+		// so in terms of progress information they can be thought
+		// to be ready to be copied out.
+		coder->progress_out += lzma_index_size(coder->index)
+				+ LZMA_STREAM_HEADER_SIZE;
+	}
+
+	// Fall through
+
+	case SEQ_INDEX: {
+		// Call the Index encoder. It doesn't take any input, so
+		// those pointers can be NULL.
+		const lzma_ret ret = coder->index_encoder.code(
+				coder->index_encoder.coder, allocator,
+				NULL, NULL, 0,
+				out, out_pos, out_size, LZMA_RUN);
+		if (ret != LZMA_STREAM_END)
+			return ret;
+
+		// Encode the Stream Footer into coder->buffer.
+		coder->stream_flags.backward_size
+				= lzma_index_size(coder->index);
+		if (lzma_stream_footer_encode(&coder->stream_flags,
+				coder->header) != LZMA_OK)
+			return LZMA_PROG_ERROR;
+
+		coder->sequence = SEQ_STREAM_FOOTER;
+	}
+
+	// Fall through
+
+	case SEQ_STREAM_FOOTER:
+		lzma_bufcpy(coder->header, &coder->header_pos,
+				sizeof(coder->header),
+				out, out_pos, out_size);
+		return coder->header_pos < sizeof(coder->header)
+				? LZMA_OK : LZMA_STREAM_END;
+	}
+
+	assert(0);
+	return LZMA_PROG_ERROR;
+}
+
+
+static void
+stream_encoder_mt_end(lzma_coder *coder, const lzma_allocator *allocator)
+{
+	// Threads must be killed before the output queue can be freed.
+	threads_end(coder, allocator);
+	lzma_outq_end(&coder->outq, allocator);
+
+	for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
+		lzma_free(coder->filters[i].options, allocator);
+
+	lzma_next_end(&coder->index_encoder, allocator);
+	lzma_index_end(coder->index, allocator);
+
+	mythread_cond_destroy(&coder->cond);
+	mythread_mutex_destroy(&coder->mutex);
+
+	lzma_free(coder, allocator);
+	return;
+}
+
+
+/// Options handling for lzma_stream_encoder_mt_init() and
+/// lzma_stream_encoder_mt_memusage()
+static lzma_ret
+get_options(const lzma_mt *options, lzma_options_easy *opt_easy,
+		const lzma_filter **filters, uint64_t *block_size,
+		uint64_t *outbuf_size_max)
+{
+	// Validate some of the options.
+	if (options == NULL)
+		return LZMA_PROG_ERROR;
+
+	if (options->flags != 0 || options->threads == 0
+			|| options->threads > LZMA_THREADS_MAX)
+		return LZMA_OPTIONS_ERROR;
+
+	if (options->filters != NULL) {
+		// Filter chain was given, use it as is.
+		*filters = options->filters;
+	} else {
+		// Use a preset.
+		if (lzma_easy_preset(opt_easy, options->preset))
+			return LZMA_OPTIONS_ERROR;
+
+		*filters = opt_easy->filters;
+	}
+
+	// Block size
+	if (options->block_size > 0) {
+		if (options->block_size > BLOCK_SIZE_MAX)
+			return LZMA_OPTIONS_ERROR;
+
+		*block_size = options->block_size;
+	} else {
+		// Determine the Block size from the filter chain.
+		*block_size = lzma_mt_block_size(*filters);
+		if (*block_size == 0)
+			return LZMA_OPTIONS_ERROR;
+
+		assert(*block_size <= BLOCK_SIZE_MAX);
+	}
+
+	// Calculate the maximum amount output that a single output buffer
+	// may need to hold. This is the same as the maximum total size of
+	// a Block.
+	*outbuf_size_max = lzma_block_buffer_bound64(*block_size);
+	if (*outbuf_size_max == 0)
+		return LZMA_MEM_ERROR;
+
+	return LZMA_OK;
+}
+
+
+static void
+get_progress(lzma_coder *coder, uint64_t *progress_in, uint64_t *progress_out)
+{
+	// Lock coder->mutex to prevent finishing threads from moving their
+	// progress info from the worker_thread structure to lzma_coder.
+	mythread_sync(coder->mutex) {
+		*progress_in = coder->progress_in;
+		*progress_out = coder->progress_out;
+
+		for (size_t i = 0; i < coder->threads_initialized; ++i) {
+			mythread_sync(coder->threads[i].mutex) {
+				*progress_in += coder->threads[i].progress_in;
+				*progress_out += coder->threads[i]
+						.progress_out;
+			}
+		}
+	}
+
+	return;
+}
+
+
+static lzma_ret
+stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
+		const lzma_mt *options)
+{
+	lzma_next_coder_init(&stream_encoder_mt_init, next, allocator);
+
+	// Get the filter chain.
+	lzma_options_easy easy;
+	const lzma_filter *filters;
+	uint64_t block_size;
+	uint64_t outbuf_size_max;
+	return_if_error(get_options(options, &easy, &filters,
+			&block_size, &outbuf_size_max));
+
+#if SIZE_MAX < UINT64_MAX
+	if (block_size > SIZE_MAX)
+		return LZMA_MEM_ERROR;
+#endif
+
+	// Validate the filter chain so that we can give an error in this
+	// function instead of delaying it to the first call to lzma_code().
+	// The memory usage calculation verifies the filter chain as
+	// a side effect so we take advatange of that.
+	if (lzma_raw_encoder_memusage(filters) == UINT64_MAX)
+		return LZMA_OPTIONS_ERROR;
+
+	// Validate the Check ID.
+	if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
+		return LZMA_PROG_ERROR;
+
+	if (!lzma_check_is_supported(options->check))
+		return LZMA_UNSUPPORTED_CHECK;
+
+	// Allocate and initialize the base structure if needed.
+	if (next->coder == NULL) {
+		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+		if (next->coder == NULL)
+			return LZMA_MEM_ERROR;
+
+		// For the mutex and condition variable initializations
+		// the error handling has to be done here because
+		// stream_encoder_mt_end() doesn't know if they have
+		// already been initialized or not.
+		if (mythread_mutex_init(&next->coder->mutex)) {
+			lzma_free(next->coder, allocator);
+			next->coder = NULL;
+			return LZMA_MEM_ERROR;
+		}
+
+		if (mythread_cond_init(&next->coder->cond)) {
+			mythread_mutex_destroy(&next->coder->mutex);
+			lzma_free(next->coder, allocator);
+			next->coder = NULL;
+			return LZMA_MEM_ERROR;
+		}
+
+		next->code = &stream_encode_mt;
+		next->end = &stream_encoder_mt_end;
+		next->get_progress = &get_progress;
+// 		next->update = &stream_encoder_mt_update;
+
+		next->coder->filters[0].id = LZMA_VLI_UNKNOWN;
+		next->coder->index_encoder = LZMA_NEXT_CODER_INIT;
+		next->coder->index = NULL;
+		memzero(&next->coder->outq, sizeof(next->coder->outq));
+		next->coder->threads = NULL;
+		next->coder->threads_max = 0;
+		next->coder->threads_initialized = 0;
+	}
+
+	// Basic initializations
+	next->coder->sequence = SEQ_STREAM_HEADER;
+	next->coder->block_size = (size_t)(block_size);
+	next->coder->thread_error = LZMA_OK;
+	next->coder->thr = NULL;
+
+	// Allocate the thread-specific base structures.
+	assert(options->threads > 0);
+	if (next->coder->threads_max != options->threads) {
+		threads_end(next->coder, allocator);
+
+		next->coder->threads = NULL;
+		next->coder->threads_max = 0;
+
+		next->coder->threads_initialized = 0;
+		next->coder->threads_free = NULL;
+
+		next->coder->threads = lzma_alloc(
+				options->threads * sizeof(worker_thread),
+				allocator);
+		if (next->coder->threads == NULL)
+			return LZMA_MEM_ERROR;
+
+		next->coder->threads_max = options->threads;
+	} else {
+		// Reuse the old structures and threads. Tell the running
+		// threads to stop and wait until they have stopped.
+		threads_stop(next->coder, true);
+	}
+
+	// Output queue
+	return_if_error(lzma_outq_init(&next->coder->outq, allocator,
+			outbuf_size_max, options->threads));
+
+	// Timeout
+	next->coder->timeout = options->timeout;
+
+	// Free the old filter chain and copy the new one.
+	for (size_t i = 0; next->coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
+		lzma_free(next->coder->filters[i].options, allocator);
+
+	return_if_error(lzma_filters_copy(
+			filters, next->coder->filters, allocator));
+
+	// Index
+	lzma_index_end(next->coder->index, allocator);
+	next->coder->index = lzma_index_init(allocator);
+	if (next->coder->index == NULL)
+		return LZMA_MEM_ERROR;
+
+	// Stream Header
+	next->coder->stream_flags.version = 0;
+	next->coder->stream_flags.check = options->check;
+	return_if_error(lzma_stream_header_encode(
+			&next->coder->stream_flags, next->coder->header));
+
+	next->coder->header_pos = 0;
+
+	// Progress info
+	next->coder->progress_in = 0;
+	next->coder->progress_out = LZMA_STREAM_HEADER_SIZE;
+
+	return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options)
+{
+	lzma_next_strm_init(stream_encoder_mt_init, strm, options);
+
+	strm->internal->supported_actions[LZMA_RUN] = true;
+// 	strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
+	strm->internal->supported_actions[LZMA_FULL_FLUSH] = true;
+	strm->internal->supported_actions[LZMA_FULL_BARRIER] = true;
+	strm->internal->supported_actions[LZMA_FINISH] = true;
+
+	return LZMA_OK;
+}
+
+
+// This function name is a monster but it's consistent with the older
+// monster names. :-( 31 chars is the max that C99 requires so in that
+// sense it's not too long. ;-)
+extern LZMA_API(uint64_t)
+lzma_stream_encoder_mt_memusage(const lzma_mt *options)
+{
+	lzma_options_easy easy;
+	const lzma_filter *filters;
+	uint64_t block_size;
+	uint64_t outbuf_size_max;
+
+	if (get_options(options, &easy, &filters, &block_size,
+			&outbuf_size_max) != LZMA_OK)
+		return UINT64_MAX;
+
+	// Memory usage of the input buffers
+	const uint64_t inbuf_memusage = options->threads * block_size;
+
+	// Memory usage of the filter encoders
+	uint64_t filters_memusage = lzma_raw_encoder_memusage(filters);
+	if (filters_memusage == UINT64_MAX)
+		return UINT64_MAX;
+
+	filters_memusage *= options->threads;
+
+	// Memory usage of the output queue
+	const uint64_t outq_memusage = lzma_outq_memusage(
+			outbuf_size_max, options->threads);
+	if (outq_memusage == UINT64_MAX)
+		return UINT64_MAX;
+
+	// Sum them with overflow checking.
+	uint64_t total_memusage = LZMA_MEMUSAGE_BASE + sizeof(lzma_coder)
+			+ options->threads * sizeof(worker_thread);
+
+	if (UINT64_MAX - total_memusage < inbuf_memusage)
+		return UINT64_MAX;
+
+	total_memusage += inbuf_memusage;
+
+	if (UINT64_MAX - total_memusage < filters_memusage)
+		return UINT64_MAX;
+
+	total_memusage += filters_memusage;
+
+	if (UINT64_MAX - total_memusage < outq_memusage)
+		return UINT64_MAX;
+
+	return total_memusage + outq_memusage;
+}
diff --git a/src/liblzma/delta/delta_common.c b/src/liblzma/delta/delta_common.c
index 930ad215131c..13dd46828d87 100644
--- a/src/liblzma/delta/delta_common.c
+++ b/src/liblzma/delta/delta_common.c
@@ -15,7 +15,7 @@
 
 
 static void
-delta_coder_end(lzma_coder *coder, lzma_allocator *allocator)
+delta_coder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
@@ -24,7 +24,7 @@ delta_coder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 extern lzma_ret
-lzma_delta_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_delta_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	// Allocate memory for the decoder if needed.
diff --git a/src/liblzma/delta/delta_decoder.c b/src/liblzma/delta/delta_decoder.c
index 2cf60d5bdc7c..726d023991c0 100644
--- a/src/liblzma/delta/delta_decoder.c
+++ b/src/liblzma/delta/delta_decoder.c
@@ -27,7 +27,7 @@ decode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size)
 
 
 static lzma_ret
-delta_decode(lzma_coder *coder, lzma_allocator *allocator,
+delta_decode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
@@ -47,7 +47,7 @@ delta_decode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_delta_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_delta_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	next->code = &delta_decode;
@@ -56,7 +56,7 @@ lzma_delta_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_delta_props_decode(void **options, lzma_allocator *allocator,
+lzma_delta_props_decode(void **options, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size)
 {
 	if (props_size != 1)
diff --git a/src/liblzma/delta/delta_decoder.h b/src/liblzma/delta/delta_decoder.h
index ae89acc59f8c..ad89cc659764 100644
--- a/src/liblzma/delta/delta_decoder.h
+++ b/src/liblzma/delta/delta_decoder.h
@@ -16,10 +16,11 @@
 #include "delta_common.h"
 
 extern lzma_ret lzma_delta_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern lzma_ret lzma_delta_props_decode(
-		void **options, lzma_allocator *allocator,
+		void **options, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size);
 
 #endif
diff --git a/src/liblzma/delta/delta_encoder.c b/src/liblzma/delta/delta_encoder.c
index 15c7951e104a..5a842636ba1b 100644
--- a/src/liblzma/delta/delta_encoder.c
+++ b/src/liblzma/delta/delta_encoder.c
@@ -49,7 +49,7 @@ encode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size)
 
 
 static lzma_ret
-delta_encode(lzma_coder *coder, lzma_allocator *allocator,
+delta_encode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
@@ -84,7 +84,7 @@ delta_encode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static lzma_ret
-delta_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
+delta_encoder_update(lzma_coder *coder, const lzma_allocator *allocator,
 		const lzma_filter *filters_null lzma_attribute((__unused__)),
 		const lzma_filter *reversed_filters)
 {
@@ -97,7 +97,7 @@ delta_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_delta_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_delta_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	next->code = &delta_encode;
diff --git a/src/liblzma/delta/delta_encoder.h b/src/liblzma/delta/delta_encoder.h
index a447862f205b..4ab984785171 100644
--- a/src/liblzma/delta/delta_encoder.h
+++ b/src/liblzma/delta/delta_encoder.h
@@ -16,7 +16,8 @@
 #include "delta_common.h"
 
 extern lzma_ret lzma_delta_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern lzma_ret lzma_delta_props_encode(const void *options, uint8_t *out);
 
diff --git a/src/liblzma/delta/delta_private.h b/src/liblzma/delta/delta_private.h
index 62b7fed86e45..46ce0c645091 100644
--- a/src/liblzma/delta/delta_private.h
+++ b/src/liblzma/delta/delta_private.h
@@ -31,7 +31,7 @@ struct lzma_coder_s {
 
 
 extern lzma_ret lzma_delta_coder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters);
 
 #endif
diff --git a/src/liblzma/liblzma.map b/src/liblzma/liblzma.map
new file mode 100644
index 000000000000..f53a4ea30a3c
--- /dev/null
+++ b/src/liblzma/liblzma.map
@@ -0,0 +1,108 @@
+XZ_5.0 {
+global:
+	lzma_alone_decoder;
+	lzma_alone_encoder;
+	lzma_auto_decoder;
+	lzma_block_buffer_bound;
+	lzma_block_buffer_decode;
+	lzma_block_buffer_encode;
+	lzma_block_compressed_size;
+	lzma_block_decoder;
+	lzma_block_encoder;
+	lzma_block_header_decode;
+	lzma_block_header_encode;
+	lzma_block_header_size;
+	lzma_block_total_size;
+	lzma_block_unpadded_size;
+	lzma_check_is_supported;
+	lzma_check_size;
+	lzma_code;
+	lzma_crc32;
+	lzma_crc64;
+	lzma_easy_buffer_encode;
+	lzma_easy_decoder_memusage;
+	lzma_easy_encoder;
+	lzma_easy_encoder_memusage;
+	lzma_end;
+	lzma_filter_decoder_is_supported;
+	lzma_filter_encoder_is_supported;
+	lzma_filter_flags_decode;
+	lzma_filter_flags_encode;
+	lzma_filter_flags_size;
+	lzma_filters_copy;
+	lzma_filters_update;
+	lzma_get_check;
+	lzma_index_append;
+	lzma_index_block_count;
+	lzma_index_buffer_decode;
+	lzma_index_buffer_encode;
+	lzma_index_cat;
+	lzma_index_checks;
+	lzma_index_decoder;
+	lzma_index_dup;
+	lzma_index_encoder;
+	lzma_index_end;
+	lzma_index_file_size;
+	lzma_index_hash_append;
+	lzma_index_hash_decode;
+	lzma_index_hash_end;
+	lzma_index_hash_init;
+	lzma_index_hash_size;
+	lzma_index_init;
+	lzma_index_iter_init;
+	lzma_index_iter_locate;
+	lzma_index_iter_next;
+	lzma_index_iter_rewind;
+	lzma_index_memusage;
+	lzma_index_memused;
+	lzma_index_size;
+	lzma_index_stream_count;
+	lzma_index_stream_flags;
+	lzma_index_stream_padding;
+	lzma_index_stream_size;
+	lzma_index_total_size;
+	lzma_index_uncompressed_size;
+	lzma_lzma_preset;
+	lzma_memlimit_get;
+	lzma_memlimit_set;
+	lzma_memusage;
+	lzma_mf_is_supported;
+	lzma_mode_is_supported;
+	lzma_physmem;
+	lzma_properties_decode;
+	lzma_properties_encode;
+	lzma_properties_size;
+	lzma_raw_buffer_decode;
+	lzma_raw_buffer_encode;
+	lzma_raw_decoder;
+	lzma_raw_decoder_memusage;
+	lzma_raw_encoder;
+	lzma_raw_encoder_memusage;
+	lzma_stream_buffer_bound;
+	lzma_stream_buffer_decode;
+	lzma_stream_buffer_encode;
+	lzma_stream_decoder;
+	lzma_stream_encoder;
+	lzma_stream_flags_compare;
+	lzma_stream_footer_decode;
+	lzma_stream_footer_encode;
+	lzma_stream_header_decode;
+	lzma_stream_header_encode;
+	lzma_version_number;
+	lzma_version_string;
+	lzma_vli_decode;
+	lzma_vli_encode;
+	lzma_vli_size;
+};
+
+XZ_5.2 {
+global:
+	lzma_block_uncomp_encode;
+	lzma_cputhreads;
+	lzma_get_progress;
+	lzma_stream_encoder_mt;
+	lzma_stream_encoder_mt_memusage;
+
+local:
+	*;
+} XZ_5.0;
diff --git a/src/liblzma/liblzma.pc.in b/src/liblzma/liblzma.pc.in
index 7f11f1a2009e..9fa489115a0a 100644
--- a/src/liblzma/liblzma.pc.in
+++ b/src/liblzma/liblzma.pc.in
@@ -16,4 +16,4 @@ URL: @PACKAGE_URL@
 Version: @PACKAGE_VERSION@
 Cflags: -I${includedir}
 Libs: -L${libdir} -llzma
-Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
+Libs.private: @PTHREAD_CFLAGS@ @LIBS@
diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c
index d74085cf4471..2328a8e73f07 100644
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@@ -126,7 +126,7 @@ decode_buffer(lzma_coder *coder,
 
 static lzma_ret
 lz_decode(lzma_coder *coder,
-		lzma_allocator *allocator lzma_attribute((__unused__)),
+		const lzma_allocator *allocator lzma_attribute((__unused__)),
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size,
@@ -184,7 +184,7 @@ lz_decode(lzma_coder *coder,
 
 
 static void
-lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+lz_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder->dict.buf, allocator);
@@ -200,10 +200,10 @@ lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 extern lzma_ret
-lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
-			lzma_allocator *allocator, const void *options,
+			const lzma_allocator *allocator, const void *options,
 			lzma_lz_options *lz_options))
 {
 	// Allocate the base structure if it isn't already allocated.
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h
index 7266e803165f..277900afb714 100644
--- a/src/liblzma/lz/lz_decoder.h
+++ b/src/liblzma/lz/lz_decoder.h
@@ -67,7 +67,7 @@ typedef struct {
 			lzma_vli uncompressed_size);
 
 	/// Free allocated resources
-	void (*end)(lzma_coder *coder, lzma_allocator *allocator);
+	void (*end)(lzma_coder *coder, const lzma_allocator *allocator);
 
 } lzma_lz_decoder;
 
@@ -83,9 +83,10 @@ typedef struct {
 
 
 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters,
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
-			lzma_allocator *allocator, const void *options,
+			const lzma_allocator *allocator, const void *options,
 			lzma_lz_options *lz_options));
 
 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c
index e2406965882c..2033844f5d35 100644
--- a/src/liblzma/lz/lz_encoder.c
+++ b/src/liblzma/lz/lz_encoder.c
@@ -20,6 +20,8 @@
 #	include "lz_encoder_hash_table.h"
 #endif
 
+#include "memcmplen.h"
+
 
 struct lzma_coder_s {
 	/// LZ-based encoder e.g. LZMA
@@ -76,8 +78,9 @@ move_window(lzma_mf *mf)
 /// This function must not be called once it has returned LZMA_STREAM_END.
 ///
 static lzma_ret
-fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
-		size_t *in_pos, size_t in_size, lzma_action action)
+fill_window(lzma_coder *coder, const lzma_allocator *allocator,
+		const uint8_t *in, size_t *in_pos, size_t in_size,
+		lzma_action action)
 {
 	assert(coder->mf.read_pos <= coder->mf.write_pos);
 
@@ -148,7 +151,7 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
 
 
 static lzma_ret
-lz_encode(lzma_coder *coder, lzma_allocator *allocator,
+lz_encode(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size,
 		uint8_t *restrict out, size_t *restrict out_pos,
@@ -179,7 +182,7 @@ lz_encode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static bool
-lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
+lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
 		const lzma_lz_options *lz_options)
 {
 	// For now, the dictionary size is limited to 1.5 GiB. This may grow
@@ -325,25 +328,22 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
 		hs += HASH_4_SIZE;
 */
 
-	// If the above code calculating hs is modified, make sure that
-	// this assertion stays valid (UINT32_MAX / 5 is not strictly the
-	// exact limit). If it doesn't, you need to calculate that
-	// hash_size_sum + sons_count cannot overflow.
-	assert(hs < UINT32_MAX / 5);
-
-	const uint32_t old_count = mf->hash_size_sum + mf->sons_count;
-	mf->hash_size_sum = hs;
+	const uint32_t old_hash_count = mf->hash_count;
+	const uint32_t old_sons_count = mf->sons_count;
+	mf->hash_count = hs;
 	mf->sons_count = mf->cyclic_size;
 	if (is_bt)
 		mf->sons_count *= 2;
 
-	const uint32_t new_count = mf->hash_size_sum + mf->sons_count;
-
 	// Deallocate the old hash array if it exists and has different size
 	// than what is needed now.
-	if (old_count != new_count) {
+	if (old_hash_count != mf->hash_count
+			|| old_sons_count != mf->sons_count) {
 		lzma_free(mf->hash, allocator);
 		mf->hash = NULL;
+
+		lzma_free(mf->son, allocator);
+		mf->son = NULL;
 	}
 
 	// Maximum number of match finder cycles
@@ -360,14 +360,23 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
 
 
 static bool
-lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
+lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator,
 		const lzma_lz_options *lz_options)
 {
 	// Allocate the history buffer.
 	if (mf->buffer == NULL) {
-		mf->buffer = lzma_alloc(mf->size, allocator);
+		// lzma_memcmplen() is used for the dictionary buffer
+		// so we need to allocate a few extra bytes to prevent
+		// it from reading past the end of the buffer.
+		mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA,
+				allocator);
 		if (mf->buffer == NULL)
 			return true;
+
+		// Keep Valgrind happy with lzma_memcmplen() and initialize
+		// the extra bytes whose value may get read but which will
+		// effectively get ignored.
+		memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA);
 	}
 
 	// Use cyclic_size as initial mf->offset. This allows
@@ -381,43 +390,48 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
 	mf->write_pos = 0;
 	mf->pending = 0;
 
-	// Allocate match finder's hash array.
-	const size_t alloc_count = mf->hash_size_sum + mf->sons_count;
-
 #if UINT32_MAX >= SIZE_MAX / 4
 	// Check for integer overflow. (Huge dictionaries are not
 	// possible on 32-bit CPU.)
-	if (alloc_count > SIZE_MAX / sizeof(uint32_t))
+	if (mf->hash_count > SIZE_MAX / sizeof(uint32_t)
+			|| mf->sons_count > SIZE_MAX / sizeof(uint32_t))
 		return true;
 #endif
 
+	// Allocate and initialize the hash table. Since EMPTY_HASH_VALUE
+	// is zero, we can use lzma_alloc_zero() or memzero() for mf->hash.
+	//
+	// We don't need to initialize mf->son, but not doing that may
+	// make Valgrind complain in normalization (see normalize() in
+	// lz_encoder_mf.c). Skipping the initialization is *very* good
+	// when big dictionary is used but only small amount of data gets
+	// actually compressed: most of the mf->son won't get actually
+	// allocated by the kernel, so we avoid wasting RAM and improve
+	// initialization speed a lot.
 	if (mf->hash == NULL) {
-		mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t),
+		mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t),
+				allocator);
+		mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t),
 				allocator);
-		if (mf->hash == NULL)
-			return true;
-	}
 
-	mf->son = mf->hash + mf->hash_size_sum;
-	mf->cyclic_pos = 0;
+		if (mf->hash == NULL || mf->son == NULL) {
+			lzma_free(mf->hash, allocator);
+			mf->hash = NULL;
+
+			lzma_free(mf->son, allocator);
+			mf->son = NULL;
 
-	// Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we
-	// can use memset().
+			return true;
+		}
+	} else {
 /*
-	for (uint32_t i = 0; i < hash_size_sum; ++i)
-		mf->hash[i] = EMPTY_HASH_VALUE;
+		for (uint32_t i = 0; i < mf->hash_count; ++i)
+			mf->hash[i] = EMPTY_HASH_VALUE;
 */
-	memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t));
+		memzero(mf->hash, mf->hash_count * sizeof(uint32_t));
+	}
 
-	// We don't need to initialize mf->son, but not doing that will
-	// make Valgrind complain in normalization (see normalize() in
-	// lz_encoder_mf.c).
-	//
-	// Skipping this initialization is *very* good when big dictionary is
-	// used but only small amount of data gets actually compressed: most
-	// of the mf->hash won't get actually allocated by the kernel, so
-	// we avoid wasting RAM and improve initialization speed a lot.
-	//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
+	mf->cyclic_pos = 0;
 
 	// Handle preset dictionary.
 	if (lz_options->preset_dict != NULL
@@ -445,7 +459,8 @@ lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
 	lzma_mf mf = {
 		.buffer = NULL,
 		.hash = NULL,
-		.hash_size_sum = 0,
+		.son = NULL,
+		.hash_count = 0,
 		.sons_count = 0,
 	};
 
@@ -454,17 +469,17 @@ lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
 		return UINT64_MAX;
 
 	// Calculate the memory usage.
-	return (uint64_t)(mf.hash_size_sum + mf.sons_count)
-				* sizeof(uint32_t)
-			+ (uint64_t)(mf.size) + sizeof(lzma_coder);
+	return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t)
+			+ mf.size + sizeof(lzma_coder);
 }
 
 
 static void
-lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+lz_encoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 
+	lzma_free(coder->mf.son, allocator);
 	lzma_free(coder->mf.hash, allocator);
 	lzma_free(coder->mf.buffer, allocator);
 
@@ -479,7 +494,7 @@ lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
+lz_encoder_update(lzma_coder *coder, const lzma_allocator *allocator,
 		const lzma_filter *filters_null lzma_attribute((__unused__)),
 		const lzma_filter *reversed_filters)
 {
@@ -495,10 +510,10 @@ lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_encoder *lz,
-			lzma_allocator *allocator, const void *options,
+			const lzma_allocator *allocator, const void *options,
 			lzma_lz_options *lz_options))
 {
 #ifdef HAVE_SMALL
@@ -522,7 +537,8 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 		next->coder->mf.buffer = NULL;
 		next->coder->mf.hash = NULL;
-		next->coder->mf.hash_size_sum = 0;
+		next->coder->mf.son = NULL;
+		next->coder->mf.hash_count = 0;
 		next->coder->mf.sons_count = 0;
 
 		next->coder->next = LZMA_NEXT_CODER_INIT;
diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h
index 741c4532280d..dad9c6b29980 100644
--- a/src/liblzma/lz/lz_encoder.h
+++ b/src/liblzma/lz/lz_encoder.h
@@ -119,7 +119,7 @@ struct lzma_mf_s {
 	lzma_action action;
 
 	/// Number of elements in hash[]
-	uint32_t hash_size_sum;
+	uint32_t hash_count;
 
 	/// Number of elements in son[]
 	uint32_t sons_count;
@@ -199,7 +199,7 @@ typedef struct {
 			size_t *restrict out_pos, size_t out_size);
 
 	/// Free allocated resources
-	void (*end)(lzma_coder *coder, lzma_allocator *allocator);
+	void (*end)(lzma_coder *coder, const lzma_allocator *allocator);
 
 	/// Update the options in the middle of the encoding.
 	lzma_ret (*options_update)(lzma_coder *coder,
@@ -296,10 +296,10 @@ mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size,
 
 
 extern lzma_ret lzma_lz_encoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_encoder *lz,
-			lzma_allocator *allocator, const void *options,
+			const lzma_allocator *allocator, const void *options,
 			lzma_lz_options *lz_options));
 
 
diff --git a/src/liblzma/lz/lz_encoder_mf.c b/src/liblzma/lz/lz_encoder_mf.c
index f82a1c1d2959..78520779f1b6 100644
--- a/src/liblzma/lz/lz_encoder_mf.c
+++ b/src/liblzma/lz/lz_encoder_mf.c
@@ -13,6 +13,7 @@
 
 #include "lz_encoder.h"
 #include "lz_encoder_hash.h"
+#include "memcmplen.h"
 
 
 /// \brief      Find matches starting from the current byte
@@ -65,9 +66,7 @@ lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches)
 			// here because the match distances are zero based.
 			const uint8_t *p2 = p1 - matches[count - 1].dist - 1;
 
-			while (len_best < limit
-					&& p1[len_best] == p2[len_best])
-				++len_best;
+			len_best = lzma_memcmplen(p1, p2, len_best, limit);
 		}
 	}
 
@@ -116,24 +115,27 @@ normalize(lzma_mf *mf)
 			= (MUST_NORMALIZE_POS - mf->cyclic_size);
 				// & (~(UINT32_C(1) << 10) - 1);
 
-	const uint32_t count = mf->hash_size_sum + mf->sons_count;
-	uint32_t *hash = mf->hash;
-
-	for (uint32_t i = 0; i < count; ++i) {
+	for (uint32_t i = 0; i < mf->hash_count; ++i) {
 		// If the distance is greater than the dictionary size,
 		// we can simply mark the hash element as empty.
+		if (mf->hash[i] <= subvalue)
+			mf->hash[i] = EMPTY_HASH_VALUE;
+		else
+			mf->hash[i] -= subvalue;
+	}
+
+	for (uint32_t i = 0; i < mf->sons_count; ++i) {
+		// Do the same for mf->son.
 		//
-		// NOTE: Only the first mf->hash_size_sum elements are
-		// initialized for sure. There may be uninitialized elements
-		// in mf->son. Since we go through both mf->hash and
-		// mf->son here in normalization, Valgrind may complain
-		// that the "if" below depends on uninitialized value. In
-		// this case it is safe to ignore the warning. See also the
-		// comments in lz_encoder_init() in lz_encoder.c.
-		if (hash[i] <= subvalue)
-			hash[i] = EMPTY_HASH_VALUE;
+		// NOTE: There may be uninitialized elements in mf->son.
+		// Valgrind may complain that the "if" below depends on
+		// an uninitialized value. In this case it is safe to ignore
+		// the warning. See also the comments in lz_encoder_init()
+		// in lz_encoder.c.
+		if (mf->son[i] <= subvalue)
+			mf->son[i] = EMPTY_HASH_VALUE;
 		else
-			hash[i] -= subvalue;
+			mf->son[i] -= subvalue;
 	}
 
 	// Update offset to match the new locations.
@@ -269,10 +271,7 @@ hc_find_func(
 				+ (delta > cyclic_pos ? cyclic_size : 0)];
 
 		if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) {
-			uint32_t len = 0;
-			while (++len != len_limit)
-				if (pb[len] != cur[len])
-					break;
+			uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit);
 
 			if (len_best < len) {
 				len_best = len;
@@ -318,9 +317,8 @@ lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches)
 	uint32_t len_best = 2;
 
 	if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
-		for ( ; len_best != len_limit; ++len_best)
-			if (*(cur + len_best - delta2) != cur[len_best])
-				break;
+		len_best = lzma_memcmplen(cur - delta2, cur,
+				len_best, len_limit);
 
 		matches[0].len = len_best;
 		matches[0].dist = delta2 - 1;
@@ -397,9 +395,8 @@ lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches)
 	}
 
 	if (matches_count != 0) {
-		for ( ; len_best != len_limit; ++len_best)
-			if (*(cur + len_best - delta2) != cur[len_best])
-				break;
+		len_best = lzma_memcmplen(cur - delta2, cur,
+				len_best, len_limit);
 
 		matches[matches_count - 1].len = len_best;
 
@@ -484,9 +481,7 @@ bt_find_func(
 		uint32_t len = my_min(len0, len1);
 
 		if (pb[len] == cur[len]) {
-			while (++len != len_limit)
-				if (pb[len] != cur[len])
-					break;
+			len = lzma_memcmplen(pb, cur, len + 1, len_limit);
 
 			if (len_best < len) {
 				len_best = len;
@@ -549,9 +544,7 @@ bt_skip_func(
 		uint32_t len = my_min(len0, len1);
 
 		if (pb[len] == cur[len]) {
-			while (++len != len_limit)
-				if (pb[len] != cur[len])
-					break;
+			len = lzma_memcmplen(pb, cur, len + 1, len_limit);
 
 			if (len == len_limit) {
 				*ptr1 = pair[0];
@@ -639,9 +632,8 @@ lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches)
 	uint32_t len_best = 2;
 
 	if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
-		for ( ; len_best != len_limit; ++len_best)
-			if (*(cur + len_best - delta2) != cur[len_best])
-				break;
+		len_best = lzma_memcmplen(
+				cur, cur - delta2, len_best, len_limit);
 
 		matches[0].len = len_best;
 		matches[0].dist = delta2 - 1;
@@ -712,9 +704,8 @@ lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches)
 	}
 
 	if (matches_count != 0) {
-		for ( ; len_best != len_limit; ++len_best)
-			if (*(cur + len_best - delta2) != cur[len_best])
-				break;
+		len_best = lzma_memcmplen(
+				cur, cur - delta2, len_best, len_limit);
 
 		matches[matches_count - 1].len = len_best;
 
diff --git a/src/liblzma/lzma/fastpos.h b/src/liblzma/lzma/fastpos.h
index 4aea23181ab6..a3feea58d8d3 100644
--- a/src/liblzma/lzma/fastpos.h
+++ b/src/liblzma/lzma/fastpos.h
@@ -14,15 +14,15 @@
 #ifndef LZMA_FASTPOS_H
 #define LZMA_FASTPOS_H
 
-// LZMA encodes match distances (positions) by storing the highest two
-// bits using a six-bit value [0, 63], and then the missing lower bits.
-// Dictionary size is also stored using this encoding in the new .lzma
+// LZMA encodes match distances by storing the highest two bits using
+// a six-bit value [0, 63], and then the missing lower bits.
+// Dictionary size is also stored using this encoding in the .xz
 // file format header.
 //
 // fastpos.h provides a way to quickly find out the correct six-bit
 // values. The following table gives some examples of this encoding:
 //
-//      pos   return
+//     dist   return
 //       0       0
 //       1       1
 //       2       2
@@ -48,10 +48,10 @@
 // Provided functions or macros
 // ----------------------------
 //
-// get_pos_slot(pos) is the basic version. get_pos_slot_2(pos)
-// assumes that pos >= FULL_DISTANCES, thus the result is at least
-// FULL_DISTANCES_BITS * 2. Using get_pos_slot(pos) instead of
-// get_pos_slot_2(pos) would give the same result, but get_pos_slot_2(pos)
+// get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)
+// assumes that dist >= FULL_DISTANCES, thus the result is at least
+// FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of
+// get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)
 // should be tiny bit faster due to the assumption being made.
 //
 //
@@ -76,13 +76,14 @@
 // slightly faster, but sometimes it is a lot slower.
 
 #ifdef HAVE_SMALL
-#	define get_pos_slot(pos) ((pos) <= 4 ? (pos) : get_pos_slot_2(pos))
+#	define get_dist_slot(dist) \
+		((dist) <= 4 ? (dist) : get_dist_slot_2(dist))
 
 static inline uint32_t
-get_pos_slot_2(uint32_t pos)
+get_dist_slot_2(uint32_t dist)
 {
-	const uint32_t i = bsr32(pos);
-	return (i + i) + ((pos >> (i - 1)) & 1);
+	const uint32_t i = bsr32(dist);
+	return (i + i) + ((dist >> (i - 1)) & 1);
 }
 
 
@@ -99,39 +100,39 @@ extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
 #define fastpos_limit(extra, n) \
 	(UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
 
-#define fastpos_result(pos, extra, n) \
-	lzma_fastpos[(pos) >> fastpos_shift(extra, n)] \
+#define fastpos_result(dist, extra, n) \
+	lzma_fastpos[(dist) >> fastpos_shift(extra, n)] \
 			+ 2 * fastpos_shift(extra, n)
 
 
 static inline uint32_t
-get_pos_slot(uint32_t pos)
+get_dist_slot(uint32_t dist)
 {
 	// If it is small enough, we can pick the result directly from
 	// the precalculated table.
-	if (pos < fastpos_limit(0, 0))
-		return lzma_fastpos[pos];
+	if (dist < fastpos_limit(0, 0))
+		return lzma_fastpos[dist];
 
-	if (pos < fastpos_limit(0, 1))
-		return fastpos_result(pos, 0, 1);
+	if (dist < fastpos_limit(0, 1))
+		return fastpos_result(dist, 0, 1);
 
-	return fastpos_result(pos, 0, 2);
+	return fastpos_result(dist, 0, 2);
 }
 
 
 #ifdef FULL_DISTANCES_BITS
 static inline uint32_t
-get_pos_slot_2(uint32_t pos)
+get_dist_slot_2(uint32_t dist)
 {
-	assert(pos >= FULL_DISTANCES);
+	assert(dist >= FULL_DISTANCES);
 
-	if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
-		return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0);
+	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
+		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);
 
-	if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
-		return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 1);
+	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
+		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);
 
-	return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 2);
+	return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
 }
 #endif
 
diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c
index 3e42575d5b80..84982d2c4286 100644
--- a/src/liblzma/lzma/lzma2_decoder.c
+++ b/src/liblzma/lzma/lzma2_decoder.c
@@ -209,7 +209,7 @@ lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict,
 
 
 static void
-lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+lzma2_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	assert(coder->lzma.end == NULL);
 	lzma_free(coder->lzma.coder, allocator);
@@ -221,7 +221,7 @@ lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
+lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
 		const void *opt, lzma_lz_options *lz_options)
 {
 	if (lz->coder == NULL) {
@@ -248,7 +248,7 @@ lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	// LZMA2 can only be the last filter in the chain. This is enforced
@@ -269,7 +269,7 @@ lzma_lzma2_decoder_memusage(const void *options)
 
 
 extern lzma_ret
-lzma_lzma2_props_decode(void **options, lzma_allocator *allocator,
+lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size)
 {
 	if (props_size != 1)
diff --git a/src/liblzma/lzma/lzma2_decoder.h b/src/liblzma/lzma/lzma2_decoder.h
index fac4ac487b07..ef2dcbfa76f0 100644
--- a/src/liblzma/lzma/lzma2_decoder.h
+++ b/src/liblzma/lzma/lzma2_decoder.h
@@ -17,12 +17,13 @@
 #include "common.h"
 
 extern lzma_ret lzma_lzma2_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern uint64_t lzma_lzma2_decoder_memusage(const void *options);
 
 extern lzma_ret lzma_lzma2_props_decode(
-		void **options, lzma_allocator *allocator,
+		void **options, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size);
 
 #endif
diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c
index 992720ca6d5a..b6756bfc2b1c 100644
--- a/src/liblzma/lzma/lzma2_encoder.c
+++ b/src/liblzma/lzma/lzma2_encoder.c
@@ -262,7 +262,7 @@ lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
 
 
 static void
-lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+lzma2_encoder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_free(coder->lzma, allocator);
 	lzma_free(coder, allocator);
@@ -304,7 +304,7 @@ lzma2_encoder_options_update(lzma_coder *coder, const lzma_filter *filter)
 
 
 static lzma_ret
-lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
+lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
 		const void *options, lzma_lz_options *lz_options)
 {
 	if (options == NULL)
@@ -349,7 +349,7 @@ lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_lzma2_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	return lzma_lz_encoder_init(
@@ -387,7 +387,17 @@ lzma_lzma2_props_encode(const void *options, uint8_t *out)
 	if (d == UINT32_MAX)
 		out[0] = 40;
 	else
-		out[0] = get_pos_slot(d + 1) - 24;
+		out[0] = get_dist_slot(d + 1) - 24;
 
 	return LZMA_OK;
 }
+
+
+extern uint64_t
+lzma_lzma2_block_size(const void *options)
+{
+	const lzma_options_lzma *const opt = options;
+
+	// Use at least 1 MiB to keep compression ratio better.
+	return my_max((uint64_t)(opt->dict_size) * 3, UINT64_C(1) << 20);
+}
diff --git a/src/liblzma/lzma/lzma2_encoder.h b/src/liblzma/lzma/lzma2_encoder.h
index ca19ef4691cc..515f1839347a 100644
--- a/src/liblzma/lzma/lzma2_encoder.h
+++ b/src/liblzma/lzma/lzma2_encoder.h
@@ -31,11 +31,13 @@
 
 
 extern lzma_ret lzma_lzma2_encoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters);
 
 extern uint64_t lzma_lzma2_encoder_memusage(const void *options);
 
 extern lzma_ret lzma_lzma2_props_encode(const void *options, uint8_t *out);
 
+extern uint64_t lzma_lzma2_block_size(const void *options);
+
 #endif
diff --git a/src/liblzma/lzma/lzma_common.h b/src/liblzma/lzma/lzma_common.h
index e31e285f9a52..09efd3872917 100644
--- a/src/liblzma/lzma/lzma_common.h
+++ b/src/liblzma/lzma/lzma_common.h
@@ -171,53 +171,54 @@ literal_init(probability (*probs)[LITERAL_CODER_SIZE],
 // Match distance //
 ////////////////////
 
-// Different set of probabilities is used for match distances that have very
+// Different sets of probabilities are used for match distances that have very
 // short match length: Lengths of 2, 3, and 4 bytes have a separate set of
 // probabilities for each length. The matches with longer length use a shared
 // set of probabilities.
-#define LEN_TO_POS_STATES 4
+#define DIST_STATES 4
 
 // Macro to get the index of the appropriate probability array.
-#define get_len_to_pos_state(len) \
-	((len) < LEN_TO_POS_STATES + MATCH_LEN_MIN \
+#define get_dist_state(len) \
+	((len) < DIST_STATES + MATCH_LEN_MIN \
 		? (len) - MATCH_LEN_MIN \
-		: LEN_TO_POS_STATES - 1)
+		: DIST_STATES - 1)
 
-// The highest two bits of a match distance (pos slot) are encoded using six
-// bits. See fastpos.h for more explanation.
-#define POS_SLOT_BITS 6
-#define POS_SLOTS (1 << POS_SLOT_BITS)
+// The highest two bits of a match distance (distance slot) are encoded
+// using six bits. See fastpos.h for more explanation.
+#define DIST_SLOT_BITS 6
+#define DIST_SLOTS (1 << DIST_SLOT_BITS)
 
 // Match distances up to 127 are fully encoded using probabilities. Since
-// the highest two bits (pos slot) are always encoded using six bits, the
-// distances 0-3 don't need any additional bits to encode, since the pos
-// slot itself is the same as the actual distance. START_POS_MODEL_INDEX
-// indicates the first pos slot where at least one additional bit is needed.
-#define START_POS_MODEL_INDEX 4
+// the highest two bits (distance slot) are always encoded using six bits,
+// the distances 0-3 don't need any additional bits to encode, since the
+// distance slot itself is the same as the actual distance. DIST_MODEL_START
+// indicates the first distance slot where at least one additional bit is
+// needed.
+#define DIST_MODEL_START 4
 
 // Match distances greater than 127 are encoded in three pieces:
-//   - pos slot: the highest two bits
+//   - distance slot: the highest two bits
 //   - direct bits: 2-26 bits below the highest two bits
 //   - alignment bits: four lowest bits
 //
 // Direct bits don't use any probabilities.
 //
-// The pos slot value of 14 is for distances 128-191 (see the table in
+// The distance slot value of 14 is for distances 128-191 (see the table in
 // fastpos.h to understand why).
-#define END_POS_MODEL_INDEX 14
+#define DIST_MODEL_END 14
 
-// Pos slots that indicate a distance <= 127.
-#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2)
+// Distance slots that indicate a distance <= 127.
+#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
 #define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
 
 // For match distances greater than 127, only the highest two bits and the
 // lowest four bits (alignment) is encoded using probabilities.
 #define ALIGN_BITS 4
-#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS)
-#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1)
+#define ALIGN_SIZE (1 << ALIGN_BITS)
+#define ALIGN_MASK (ALIGN_SIZE - 1)
 
 // LZMA remembers the four most recent match distances. Reusing these distances
 // tends to take less space than re-encoding the actual distance value.
-#define REP_DISTANCES 4
+#define REPS 4
 
 #endif
diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c
index 9979bb4261b8..b8f931705bf9 100644
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@@ -193,15 +193,15 @@ struct lzma_coder_s {
 	/// Probability tree for the highest two bits of the match distance.
 	/// There is a separate probability tree for match lengths of
 	/// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
-	probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS];
+	probability dist_slot[DIST_STATES][DIST_SLOTS];
 
 	/// Probability trees for additional bits for match distance when the
 	/// distance is in the range [4, 127].
-	probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX];
+	probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
 
 	/// Probability tree for the lowest four bits of a match distance
 	/// that is equal to or greater than 128.
-	probability pos_align[ALIGN_TABLE_SIZE];
+	probability pos_align[ALIGN_SIZE];
 
 	/// Length of a normal match
 	lzma_length_decoder match_len_decoder;
@@ -245,8 +245,8 @@ struct lzma_coder_s {
 		SEQ_LITERAL_WRITE,
 		SEQ_IS_REP,
 		seq_len(SEQ_MATCH_LEN),
-		seq_6(SEQ_POS_SLOT),
-		SEQ_POS_MODEL,
+		seq_6(SEQ_DIST_SLOT),
+		SEQ_DIST_MODEL,
 		SEQ_DIRECT,
 		seq_4(SEQ_ALIGN),
 		SEQ_EOPM,
@@ -289,8 +289,12 @@ lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
 	// Initialization //
 	////////////////////
 
-	if (!rc_read_init(&coder->rc, in, in_pos, in_size))
-		return LZMA_OK;
+	{
+		const lzma_ret ret = rc_read_init(
+				&coder->rc, in, in_pos, in_size);
+		if (ret != LZMA_STREAM_END)
+			return ret;
+	}
 
 	///////////////
 	// Variables //
@@ -502,28 +506,28 @@ lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
 
 			// Prepare to decode the highest two bits of the
 			// match distance.
-			probs = coder->pos_slot[get_len_to_pos_state(len)];
+			probs = coder->dist_slot[get_dist_state(len)];
 			symbol = 1;
 
 #ifdef HAVE_SMALL
-	case SEQ_POS_SLOT:
+	case SEQ_DIST_SLOT:
 			do {
-				rc_bit(probs[symbol], , , SEQ_POS_SLOT);
-			} while (symbol < POS_SLOTS);
+				rc_bit(probs[symbol], , , SEQ_DIST_SLOT);
+			} while (symbol < DIST_SLOTS);
 #else
-			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0);
-			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1);
-			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2);
-			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3);
-			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4);
-			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5);
+			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0);
+			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1);
+			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2);
+			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3);
+			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4);
+			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5);
 #endif
 			// Get rid of the highest bit that was needed for
 			// indexing of the probability array.
-			symbol -= POS_SLOTS;
+			symbol -= DIST_SLOTS;
 			assert(symbol <= 63);
 
-			if (symbol < START_POS_MODEL_INDEX) {
+			if (symbol < DIST_MODEL_START) {
 				// Match distances [0, 3] have only two bits.
 				rep0 = symbol;
 			} else {
@@ -533,7 +537,7 @@ lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
 				assert(limit >= 1 && limit <= 30);
 				rep0 = 2 + (symbol & 1);
 
-				if (symbol < END_POS_MODEL_INDEX) {
+				if (symbol < DIST_MODEL_END) {
 					// Prepare to decode the low bits for
 					// a distance of [4, 127].
 					assert(limit <= 5);
@@ -553,12 +557,12 @@ lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
 							- symbol - 1;
 					symbol = 1;
 					offset = 0;
-	case SEQ_POS_MODEL:
+	case SEQ_DIST_MODEL:
 #ifdef HAVE_SMALL
 					do {
 						rc_bit(probs[symbol], ,
 							rep0 += 1 << offset,
-							SEQ_POS_MODEL);
+							SEQ_DIST_MODEL);
 					} while (++offset < limit);
 #else
 					switch (limit) {
@@ -566,25 +570,25 @@ lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
 						assert(offset == 0);
 						rc_bit(probs[symbol], ,
 							rep0 += 1,
-							SEQ_POS_MODEL);
+							SEQ_DIST_MODEL);
 						++offset;
 						--limit;
 					case 4:
 						rc_bit(probs[symbol], ,
 							rep0 += 1 << offset,
-							SEQ_POS_MODEL);
+							SEQ_DIST_MODEL);
 						++offset;
 						--limit;
 					case 3:
 						rc_bit(probs[symbol], ,
 							rep0 += 1 << offset,
-							SEQ_POS_MODEL);
+							SEQ_DIST_MODEL);
 						++offset;
 						--limit;
 					case 2:
 						rc_bit(probs[symbol], ,
 							rep0 += 1 << offset,
-							SEQ_POS_MODEL);
+							SEQ_DIST_MODEL);
 						++offset;
 						--limit;
 					case 1:
@@ -596,7 +600,7 @@ lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
 						// "symbol".
 						rc_bit_last(probs[symbol], ,
 							rep0 += 1 << offset,
-							SEQ_POS_MODEL);
+							SEQ_DIST_MODEL);
 					}
 #endif
 				} else {
@@ -637,7 +641,7 @@ lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
 					rc_bit(coder->pos_align[symbol], ,
 							rep0 += 4, SEQ_ALIGN2);
 	case SEQ_ALIGN3:
-					// Like in SEQ_POS_MODEL, we don't
+					// Like in SEQ_DIST_MODEL, we don't
 					// need "symbol" for anything else
 					// than indexing the probability array.
 					rc_bit_last(coder->pos_align[symbol], ,
@@ -891,10 +895,10 @@ lzma_decoder_reset(lzma_coder *coder, const void *opt)
 		bit_reset(coder->is_rep2[i]);
 	}
 
-	for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i)
-		bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);
+	for (uint32_t i = 0; i < DIST_STATES; ++i)
+		bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
 
-	for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)
+	for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
 		bit_reset(coder->pos_special[i]);
 
 	bittree_reset(coder->pos_align, ALIGN_BITS);
@@ -933,7 +937,7 @@ lzma_decoder_reset(lzma_coder *coder, const void *opt)
 
 
 extern lzma_ret
-lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
+lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
 		const void *opt, lzma_lz_options *lz_options)
 {
 	if (lz->coder == NULL) {
@@ -961,7 +965,7 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
 /// initialization (lzma_lzma_decoder_init() passes function pointer to
 /// the LZ initialization).
 static lzma_ret
-lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
+lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
 		const void *options, lzma_lz_options *lz_options)
 {
 	if (!is_lclppb_valid(options))
@@ -978,7 +982,7 @@ lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	// LZMA can only be the last filter in the chain. This is enforced
@@ -1025,7 +1029,7 @@ lzma_lzma_decoder_memusage(const void *options)
 
 
 extern lzma_ret
-lzma_lzma_props_decode(void **options, lzma_allocator *allocator,
+lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size)
 {
 	if (props_size != 5)
diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h
index a463a76fc694..fa8ecb23e43c 100644
--- a/src/liblzma/lzma/lzma_decoder.h
+++ b/src/liblzma/lzma/lzma_decoder.h
@@ -19,12 +19,13 @@
 
 /// Allocates and initializes LZMA decoder
 extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern uint64_t lzma_lzma_decoder_memusage(const void *options);
 
 extern lzma_ret lzma_lzma_props_decode(
-		void **options, lzma_allocator *allocator,
+		void **options, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size);
 
 
@@ -40,7 +41,7 @@ extern bool lzma_lzma_lclppb_decode(
 /// Allocate and setup function pointers only. This is used by LZMA1 and
 /// LZMA2 decoders.
 extern lzma_ret lzma_lzma_decoder_create(
-		lzma_lz_decoder *lz, lzma_allocator *allocator,
+		lzma_lz_decoder *lz, const lzma_allocator *allocator,
 		const void *opt, lzma_lz_options *lz_options);
 
 /// Gets memory usage without validating lc/lp/pb. This is used by LZMA2
diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c
index 0b9ee9e15079..31cb4f044829 100644
--- a/src/liblzma/lzma/lzma_encoder.c
+++ b/src/liblzma/lzma/lzma_encoder.c
@@ -148,28 +148,28 @@ match(lzma_coder *coder, const uint32_t pos_state,
 	length(&coder->rc, &coder->match_len_encoder, pos_state, len,
 			coder->fast_mode);
 
-	const uint32_t pos_slot = get_pos_slot(distance);
-	const uint32_t len_to_pos_state = get_len_to_pos_state(len);
-	rc_bittree(&coder->rc, coder->pos_slot[len_to_pos_state],
-			POS_SLOT_BITS, pos_slot);
-
-	if (pos_slot >= START_POS_MODEL_INDEX) {
-		const uint32_t footer_bits = (pos_slot >> 1) - 1;
-		const uint32_t base = (2 | (pos_slot & 1)) << footer_bits;
-		const uint32_t pos_reduced = distance - base;
-
-		if (pos_slot < END_POS_MODEL_INDEX) {
-			// Careful here: base - pos_slot - 1 can be -1, but
+	const uint32_t dist_slot = get_dist_slot(distance);
+	const uint32_t dist_state = get_dist_state(len);
+	rc_bittree(&coder->rc, coder->dist_slot[dist_state],
+			DIST_SLOT_BITS, dist_slot);
+
+	if (dist_slot >= DIST_MODEL_START) {
+		const uint32_t footer_bits = (dist_slot >> 1) - 1;
+		const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
+		const uint32_t dist_reduced = distance - base;
+
+		if (dist_slot < DIST_MODEL_END) {
+			// Careful here: base - dist_slot - 1 can be -1, but
 			// rc_bittree_reverse starts at probs[1], not probs[0].
 			rc_bittree_reverse(&coder->rc,
-				coder->pos_special + base - pos_slot - 1,
-				footer_bits, pos_reduced);
+				coder->dist_special + base - dist_slot - 1,
+				footer_bits, dist_reduced);
 		} else {
-			rc_direct(&coder->rc, pos_reduced >> ALIGN_BITS,
+			rc_direct(&coder->rc, dist_reduced >> ALIGN_BITS,
 					footer_bits - ALIGN_BITS);
 			rc_bittree_reverse(
-					&coder->rc, coder->pos_align,
-					ALIGN_BITS, pos_reduced & ALIGN_MASK);
+					&coder->rc, coder->dist_align,
+					ALIGN_BITS, dist_reduced & ALIGN_MASK);
 			++coder->align_price_count;
 		}
 	}
@@ -247,7 +247,7 @@ encode_symbol(lzma_coder *coder, lzma_mf *mf,
 		rc_bit(&coder->rc,
 			&coder->is_match[coder->state][pos_state], 1);
 
-		if (back < REP_DISTANCES) {
+		if (back < REPS) {
 			// It's a repeated match i.e. the same distance
 			// has been used earlier.
 			rc_bit(&coder->rc, &coder->is_rep[coder->state], 1);
@@ -255,7 +255,7 @@ encode_symbol(lzma_coder *coder, lzma_mf *mf,
 		} else {
 			// Normal match
 			rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
-			match(coder, pos_state, back - REP_DISTANCES, len);
+			match(coder, pos_state, back - REPS, len);
 		}
 	}
 
@@ -353,9 +353,9 @@ lzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
 
 		// Get optimal match (repeat position and length).
 		// Value ranges for pos:
-		//   - [0, REP_DISTANCES): repeated match
-		//   - [REP_DISTANCES, UINT32_MAX):
-		//     match at (pos - REP_DISTANCES)
+		//   - [0, REPS): repeated match
+		//   - [REPS, UINT32_MAX):
+		//     match at (pos - REPS)
 		//   - UINT32_MAX: not a match but a literal
 		// Value ranges for len:
 		//   - [MATCH_LEN_MIN, MATCH_LEN_MAX]
@@ -487,7 +487,7 @@ lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options)
 
 	// State
 	coder->state = STATE_LIT_LIT;
-	for (size_t i = 0; i < REP_DISTANCES; ++i)
+	for (size_t i = 0; i < REPS; ++i)
 		coder->reps[i] = 0;
 
 	literal_init(coder->literal, options->lc, options->lp);
@@ -505,14 +505,14 @@ lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options)
 		bit_reset(coder->is_rep2[i]);
 	}
 
-	for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)
-		bit_reset(coder->pos_special[i]);
+	for (size_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
+		bit_reset(coder->dist_special[i]);
 
 	// Bit tree encoders
-	for (size_t i = 0; i < LEN_TO_POS_STATES; ++i)
-		bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);
+	for (size_t i = 0; i < DIST_STATES; ++i)
+		bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
 
-	bittree_reset(coder->pos_align, ALIGN_BITS);
+	bittree_reset(coder->dist_align, ALIGN_BITS);
 
 	// Length encoders
 	length_encoder_reset(&coder->match_len_encoder,
@@ -545,7 +545,8 @@ lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options)
 
 
 extern lzma_ret
-lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
+lzma_lzma_encoder_create(lzma_coder **coder_ptr,
+		const lzma_allocator *allocator,
 		const lzma_options_lzma *options, lzma_lz_options *lz_options)
 {
 	// Allocate lzma_coder if it wasn't already allocated.
@@ -604,7 +605,7 @@ lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
 
 
 static lzma_ret
-lzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
+lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
 		const void *options, lzma_lz_options *lz_options)
 {
 	lz->code = &lzma_encode;
@@ -614,7 +615,7 @@ lzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_lzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	return lzma_lz_encoder_init(
diff --git a/src/liblzma/lzma/lzma_encoder.h b/src/liblzma/lzma/lzma_encoder.h
index 835e1f583304..cc9cc2f27ecb 100644
--- a/src/liblzma/lzma/lzma_encoder.h
+++ b/src/liblzma/lzma/lzma_encoder.h
@@ -18,7 +18,8 @@
 
 
 extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 
 extern uint64_t lzma_lzma_encoder_memusage(const void *options);
@@ -35,7 +36,7 @@ extern bool lzma_lzma_lclppb_encode(
 
 /// Initializes raw LZMA encoder; this is used by LZMA2.
 extern lzma_ret lzma_lzma_encoder_create(
-		lzma_coder **coder_ptr, lzma_allocator *allocator,
+		lzma_coder **coder_ptr, const lzma_allocator *allocator,
 		const lzma_options_lzma *options, lzma_lz_options *lz_options);
 
 
diff --git a/src/liblzma/lzma/lzma_encoder_optimum_fast.c b/src/liblzma/lzma/lzma_encoder_optimum_fast.c
index f835f69356bd..8922cbd91d3f 100644
--- a/src/liblzma/lzma/lzma_encoder_optimum_fast.c
+++ b/src/liblzma/lzma/lzma_encoder_optimum_fast.c
@@ -10,6 +10,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "lzma_encoder_private.h"
+#include "memcmplen.h"
 
 
 #define change_pair(small_dist, big_dist) \
@@ -46,7 +47,7 @@ lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf,
 	uint32_t rep_len = 0;
 	uint32_t rep_index = 0;
 
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
+	for (uint32_t i = 0; i < REPS; ++i) {
 		// Pointer to the beginning of the match candidate
 		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
 
@@ -57,9 +58,8 @@ lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf,
 
 		// The first two bytes matched.
 		// Calculate the length of the match.
-		uint32_t len;
-		for (len = 2; len < buf_avail
-				&& buf[len] == buf_back[len]; ++len) ;
+		const uint32_t len = lzma_memcmplen(
+				buf, buf_back, 2, buf_avail);
 
 		// If we have found a repeated match that is at least
 		// nice_len long, return it immediately.
@@ -79,8 +79,7 @@ lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf,
 	// We didn't find a long enough repeated match. Encode it as a normal
 	// match if the match length is at least nice_len.
 	if (len_main >= nice_len) {
-		*back_res = coder->matches[matches_count - 1].dist
-				+ REP_DISTANCES;
+		*back_res = coder->matches[matches_count - 1].dist + REPS;
 		*len_res = len_main;
 		mf_skip(mf, len_main - 1);
 		return;
@@ -155,24 +154,15 @@ lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf,
 
 	const uint32_t limit = len_main - 1;
 
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
-		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
-
-		if (not_equal_16(buf, buf_back))
-			continue;
-
-		uint32_t len;
-		for (len = 2; len < limit
-				&& buf[len] == buf_back[len]; ++len) ;
-
-		if (len >= limit) {
+	for (uint32_t i = 0; i < REPS; ++i) {
+		if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) {
 			*back_res = UINT32_MAX;
 			*len_res = 1;
 			return;
 		}
 	}
 
-	*back_res = back_main + REP_DISTANCES;
+	*back_res = back_main + REPS;
 	*len_res = len_main;
 	mf_skip(mf, len_main - 2);
 	return;
diff --git a/src/liblzma/lzma/lzma_encoder_optimum_normal.c b/src/liblzma/lzma/lzma_encoder_optimum_normal.c
index 7e856493c8cd..a360579885db 100644
--- a/src/liblzma/lzma/lzma_encoder_optimum_normal.c
+++ b/src/liblzma/lzma/lzma_encoder_optimum_normal.c
@@ -11,6 +11,7 @@
 
 #include "lzma_encoder_private.h"
 #include "fastpos.h"
+#include "memcmplen.h"
 
 
 ////////////
@@ -108,18 +109,18 @@ get_rep_price(const lzma_coder *const coder, const uint32_t rep_index,
 
 
 static inline uint32_t
-get_pos_len_price(const lzma_coder *const coder, const uint32_t pos,
+get_dist_len_price(const lzma_coder *const coder, const uint32_t dist,
 		const uint32_t len, const uint32_t pos_state)
 {
-	const uint32_t len_to_pos_state = get_len_to_pos_state(len);
+	const uint32_t dist_state = get_dist_state(len);
 	uint32_t price;
 
-	if (pos < FULL_DISTANCES) {
-		price = coder->distances_prices[len_to_pos_state][pos];
+	if (dist < FULL_DISTANCES) {
+		price = coder->dist_prices[dist_state][dist];
 	} else {
-		const uint32_t pos_slot = get_pos_slot_2(pos);
-		price = coder->pos_slot_prices[len_to_pos_state][pos_slot]
-				+ coder->align_prices[pos & ALIGN_MASK];
+		const uint32_t dist_slot = get_dist_slot_2(dist);
+		price = coder->dist_slot_prices[dist_state][dist_slot]
+				+ coder->align_prices[dist & ALIGN_MASK];
 	}
 
 	price += get_len_price(&coder->match_len_encoder, len, pos_state);
@@ -129,55 +130,53 @@ get_pos_len_price(const lzma_coder *const coder, const uint32_t pos,
 
 
 static void
-fill_distances_prices(lzma_coder *coder)
+fill_dist_prices(lzma_coder *coder)
 {
-	for (uint32_t len_to_pos_state = 0;
-			len_to_pos_state < LEN_TO_POS_STATES;
-			++len_to_pos_state) {
+	for (uint32_t dist_state = 0; dist_state < DIST_STATES; ++dist_state) {
 
-		uint32_t *const pos_slot_prices
-				= coder->pos_slot_prices[len_to_pos_state];
+		uint32_t *const dist_slot_prices
+				= coder->dist_slot_prices[dist_state];
 
-		// Price to encode the pos_slot.
-		for (uint32_t pos_slot = 0;
-				pos_slot < coder->dist_table_size; ++pos_slot)
-			pos_slot_prices[pos_slot] = rc_bittree_price(
-					coder->pos_slot[len_to_pos_state],
-					POS_SLOT_BITS, pos_slot);
+		// Price to encode the dist_slot.
+		for (uint32_t dist_slot = 0;
+				dist_slot < coder->dist_table_size; ++dist_slot)
+			dist_slot_prices[dist_slot] = rc_bittree_price(
+					coder->dist_slot[dist_state],
+					DIST_SLOT_BITS, dist_slot);
 
 		// For matches with distance >= FULL_DISTANCES, add the price
 		// of the direct bits part of the match distance. (Align bits
 		// are handled by fill_align_prices()).
-		for (uint32_t pos_slot = END_POS_MODEL_INDEX;
-				pos_slot < coder->dist_table_size; ++pos_slot)
-			pos_slot_prices[pos_slot] += rc_direct_price(
-					((pos_slot >> 1) - 1) - ALIGN_BITS);
+		for (uint32_t dist_slot = DIST_MODEL_END;
+				dist_slot < coder->dist_table_size;
+				++dist_slot)
+			dist_slot_prices[dist_slot] += rc_direct_price(
+					((dist_slot >> 1) - 1) - ALIGN_BITS);
 
 		// Distances in the range [0, 3] are fully encoded with
-		// pos_slot, so they are used for coder->distances_prices
+		// dist_slot, so they are used for coder->dist_prices
 		// as is.
-		for (uint32_t i = 0; i < START_POS_MODEL_INDEX; ++i)
-			coder->distances_prices[len_to_pos_state][i]
-					= pos_slot_prices[i];
+		for (uint32_t i = 0; i < DIST_MODEL_START; ++i)
+			coder->dist_prices[dist_state][i]
+					= dist_slot_prices[i];
 	}
 
-	// Distances in the range [4, 127] depend on pos_slot and pos_special.
-	// We do this in a loop separate from the above loop to avoid
-	// redundant calls to get_pos_slot().
-	for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) {
-		const uint32_t pos_slot = get_pos_slot(i);
-		const uint32_t footer_bits = ((pos_slot >> 1) - 1);
-		const uint32_t base = (2 | (pos_slot & 1)) << footer_bits;
+	// Distances in the range [4, 127] depend on dist_slot and
+	// dist_special. We do this in a loop separate from the above
+	// loop to avoid redundant calls to get_dist_slot().
+	for (uint32_t i = DIST_MODEL_START; i < FULL_DISTANCES; ++i) {
+		const uint32_t dist_slot = get_dist_slot(i);
+		const uint32_t footer_bits = ((dist_slot >> 1) - 1);
+		const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
 		const uint32_t price = rc_bittree_reverse_price(
-				coder->pos_special + base - pos_slot - 1,
+				coder->dist_special + base - dist_slot - 1,
 				footer_bits, i - base);
 
-		for (uint32_t len_to_pos_state = 0;
-				len_to_pos_state < LEN_TO_POS_STATES;
-				++len_to_pos_state)
-			coder->distances_prices[len_to_pos_state][i]
-					= price + coder->pos_slot_prices[
-						len_to_pos_state][pos_slot];
+		for (uint32_t dist_state = 0; dist_state < DIST_STATES;
+				++dist_state)
+			coder->dist_prices[dist_state][i]
+					= price + coder->dist_slot_prices[
+						dist_state][dist_slot];
 	}
 
 	coder->match_price_count = 0;
@@ -188,9 +187,9 @@ fill_distances_prices(lzma_coder *coder)
 static void
 fill_align_prices(lzma_coder *coder)
 {
-	for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i)
+	for (uint32_t i = 0; i < ALIGN_SIZE; ++i)
 		coder->align_prices[i] = rc_bittree_reverse_price(
-				coder->pos_align, ALIGN_BITS, i);
+				coder->dist_align, ALIGN_BITS, i);
 
 	coder->align_price_count = 0;
 	return;
@@ -296,10 +295,10 @@ helper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
 
 	const uint8_t *const buf = mf_ptr(mf) - 1;
 
-	uint32_t rep_lens[REP_DISTANCES];
+	uint32_t rep_lens[REPS];
 	uint32_t rep_max_index = 0;
 
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
+	for (uint32_t i = 0; i < REPS; ++i) {
 		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
 
 		if (not_equal_16(buf, buf_back)) {
@@ -307,13 +306,9 @@ helper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
 			continue;
 		}
 
-		uint32_t len_test;
-		for (len_test = 2; len_test < buf_avail
-				&& buf[len_test] == buf_back[len_test];
-				++len_test) ;
+		rep_lens[i] = lzma_memcmplen(buf, buf_back, 2, buf_avail);
 
-		rep_lens[i] = len_test;
-		if (len_test > rep_lens[rep_max_index])
+		if (rep_lens[i] > rep_lens[rep_max_index])
 			rep_max_index = i;
 	}
 
@@ -326,8 +321,7 @@ helper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
 
 
 	if (len_main >= nice_len) {
-		*back_res = coder->matches[matches_count - 1].dist
-				+ REP_DISTANCES;
+		*back_res = coder->matches[matches_count - 1].dist + REPS;
 		*len_res = len_main;
 		mf_skip(mf, len_main - 1);
 		return UINT32_MAX;
@@ -381,7 +375,7 @@ helper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
 
 	coder->opts[1].pos_prev = 0;
 
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
+	for (uint32_t i = 0; i < REPS; ++i)
 		coder->opts[0].backs[i] = coder->reps[i];
 
 	uint32_t len = len_end;
@@ -390,7 +384,7 @@ helper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
 	} while (--len >= 2);
 
 
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
+	for (uint32_t i = 0; i < REPS; ++i) {
 		uint32_t rep_len = rep_lens[i];
 		if (rep_len < 2)
 			continue;
@@ -426,14 +420,13 @@ helper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
 		for(; ; ++len) {
 			const uint32_t dist = coder->matches[i].dist;
 			const uint32_t cur_and_len_price = normal_match_price
-					+ get_pos_len_price(coder,
+					+ get_dist_len_price(coder,
 						dist, len, pos_state);
 
 			if (cur_and_len_price < coder->opts[len].price) {
 				coder->opts[len].price = cur_and_len_price;
 				coder->opts[len].pos_prev = 0;
-				coder->opts[len].back_prev
-						= dist + REP_DISTANCES;
+				coder->opts[len].back_prev = dist + REPS;
 				coder->opts[len].prev_1_is_literal = false;
 			}
 
@@ -463,7 +456,7 @@ helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
 		if (coder->opts[cur].prev_2) {
 			state = coder->opts[coder->opts[cur].pos_prev_2].state;
 
-			if (coder->opts[cur].back_prev_2 < REP_DISTANCES)
+			if (coder->opts[cur].back_prev_2 < REPS)
 				update_long_rep(state);
 			else
 				update_match(state);
@@ -492,33 +485,33 @@ helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
 			update_long_rep(state);
 		} else {
 			pos = coder->opts[cur].back_prev;
-			if (pos < REP_DISTANCES)
+			if (pos < REPS)
 				update_long_rep(state);
 			else
 				update_match(state);
 		}
 
-		if (pos < REP_DISTANCES) {
+		if (pos < REPS) {
 			reps[0] = coder->opts[pos_prev].backs[pos];
 
 			uint32_t i;
 			for (i = 1; i <= pos; ++i)
 				reps[i] = coder->opts[pos_prev].backs[i - 1];
 
-			for (; i < REP_DISTANCES; ++i)
+			for (; i < REPS; ++i)
 				reps[i] = coder->opts[pos_prev].backs[i];
 
 		} else {
-			reps[0] = pos - REP_DISTANCES;
+			reps[0] = pos - REPS;
 
-			for (uint32_t i = 1; i < REP_DISTANCES; ++i)
+			for (uint32_t i = 1; i < REPS; ++i)
 				reps[i] = coder->opts[pos_prev].backs[i - 1];
 		}
 	}
 
 	coder->opts[cur].state = state;
 
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
+	for (uint32_t i = 0; i < REPS; ++i)
 		coder->opts[cur].backs[i] = reps[i];
 
 	const uint32_t cur_price = coder->opts[cur].price;
@@ -572,11 +565,7 @@ helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
 		const uint8_t *const buf_back = buf - reps[0] - 1;
 		const uint32_t limit = my_min(buf_avail_full, nice_len + 1);
 
-		uint32_t len_test = 1;
-		while (len_test < limit && buf[len_test] == buf_back[len_test])
-			++len_test;
-
-		--len_test;
+		const uint32_t len_test = lzma_memcmplen(buf, buf_back, 1, limit) - 1;
 
 		if (len_test >= 2) {
 			lzma_lzma_state state_2 = state;
@@ -611,15 +600,12 @@ helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
 
 	uint32_t start_len = 2; // speed optimization
 
-	for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) {
+	for (uint32_t rep_index = 0; rep_index < REPS; ++rep_index) {
 		const uint8_t *const buf_back = buf - reps[rep_index] - 1;
 		if (not_equal_16(buf, buf_back))
 			continue;
 
-		uint32_t len_test;
-		for (len_test = 2; len_test < buf_avail
-				&& buf[len_test] == buf_back[len_test];
-				++len_test) ;
+		uint32_t len_test = lzma_memcmplen(buf, buf_back, 2, buf_avail);
 
 		while (len_end < cur + len_test)
 			coder->opts[++len_end].price = RC_INFINITY_PRICE;
@@ -728,14 +714,14 @@ helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
 		for (uint32_t len_test = start_len; ; ++len_test) {
 			const uint32_t cur_back = coder->matches[i].dist;
 			uint32_t cur_and_len_price = normal_match_price
-					+ get_pos_len_price(coder,
+					+ get_dist_len_price(coder,
 						cur_back, len_test, pos_state);
 
 			if (cur_and_len_price < coder->opts[cur + len_test].price) {
 				coder->opts[cur + len_test].price = cur_and_len_price;
 				coder->opts[cur + len_test].pos_prev = cur;
 				coder->opts[cur + len_test].back_prev
-						= cur_back + REP_DISTANCES;
+						= cur_back + REPS;
 				coder->opts[cur + len_test].prev_1_is_literal = false;
 			}
 
@@ -795,7 +781,7 @@ helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
 						coder->opts[offset].prev_2 = true;
 						coder->opts[offset].pos_prev_2 = cur;
 						coder->opts[offset].back_prev_2
-								= cur_back + REP_DISTANCES;
+								= cur_back + REPS;
 					}
 					//}
 				}
@@ -831,9 +817,9 @@ lzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf,
 	// In liblzma they were moved into this single place.
 	if (mf->read_ahead == 0) {
 		if (coder->match_price_count >= (1 << 7))
-			fill_distances_prices(coder);
+			fill_dist_prices(coder);
 
-		if (coder->align_price_count >= ALIGN_TABLE_SIZE)
+		if (coder->align_price_count >= ALIGN_SIZE)
 			fill_align_prices(coder);
 	}
 
@@ -845,7 +831,7 @@ lzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf,
 	if (len_end == UINT32_MAX)
 		return;
 
-	uint32_t reps[REP_DISTANCES];
+	uint32_t reps[REPS];
 	memcpy(reps, coder->reps, sizeof(reps));
 
 	uint32_t cur;
diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h
index 684745236c82..2f62d6cba0b7 100644
--- a/src/liblzma/lzma/lzma_encoder_private.h
+++ b/src/liblzma/lzma/lzma_encoder_private.h
@@ -64,7 +64,7 @@ typedef struct {
 	uint32_t pos_prev;  // pos_next;
 	uint32_t back_prev;
 
-	uint32_t backs[REP_DISTANCES];
+	uint32_t backs[REPS];
 
 } lzma_optimal;
 
@@ -77,7 +77,7 @@ struct lzma_coder_s {
 	lzma_lzma_state state;
 
 	/// The four most recent match distances
-	uint32_t reps[REP_DISTANCES];
+	uint32_t reps[REPS];
 
 	/// Array of match candidates
 	lzma_match matches[MATCH_LEN_MAX + 1];
@@ -112,9 +112,9 @@ struct lzma_coder_s {
 	probability is_rep1[STATES];
 	probability is_rep2[STATES];
 	probability is_rep0_long[STATES][POS_STATES_MAX];
-	probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS];
-	probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX];
-	probability pos_align[ALIGN_TABLE_SIZE];
+	probability dist_slot[DIST_STATES][DIST_SLOTS];
+	probability dist_special[FULL_DISTANCES - DIST_MODEL_END];
+	probability dist_align[ALIGN_SIZE];
 
 	// These are the same as in lzma_decoder.c except that the encoders
 	// include also price tables.
@@ -122,12 +122,12 @@ struct lzma_coder_s {
 	lzma_length_encoder rep_len_encoder;
 
 	// Price tables
-	uint32_t pos_slot_prices[LEN_TO_POS_STATES][POS_SLOTS];
-	uint32_t distances_prices[LEN_TO_POS_STATES][FULL_DISTANCES];
+	uint32_t dist_slot_prices[DIST_STATES][DIST_SLOTS];
+	uint32_t dist_prices[DIST_STATES][FULL_DISTANCES];
 	uint32_t dist_table_size;
 	uint32_t match_price_count;
 
-	uint32_t align_prices[ALIGN_TABLE_SIZE];
+	uint32_t align_prices[ALIGN_SIZE];
 	uint32_t align_price_count;
 
 	// Optimal
diff --git a/src/liblzma/rangecoder/range_decoder.h b/src/liblzma/rangecoder/range_decoder.h
index fb96180fb399..e0b051fac2d2 100644
--- a/src/liblzma/rangecoder/range_decoder.h
+++ b/src/liblzma/rangecoder/range_decoder.h
@@ -25,20 +25,26 @@ typedef struct {
 
 
 /// Reads the first five bytes to initialize the range decoder.
-static inline bool
+static inline lzma_ret
 rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in,
 		size_t *restrict in_pos, size_t in_size)
 {
 	while (rc->init_bytes_left > 0) {
 		if (*in_pos == in_size)
-			return false;
+			return LZMA_OK;
+
+		// The first byte is always 0x00. It could have been omitted
+		// in LZMA2 but it wasn't, so one byte is wasted in every
+		// LZMA2 chunk.
+		if (rc->init_bytes_left == 5 && in[*in_pos] != 0x00)
+			return LZMA_DATA_ERROR;
 
 		rc->code = (rc->code << 8) | in[*in_pos];
 		++*in_pos;
 		--rc->init_bytes_left;
 	}
 
-	return true;
+	return LZMA_STREAM_END;
 }
 
 
diff --git a/src/liblzma/simple/arm.c b/src/liblzma/simple/arm.c
index a84702ac62f2..258d870fead3 100644
--- a/src/liblzma/simple/arm.c
+++ b/src/liblzma/simple/arm.c
@@ -45,7 +45,7 @@ arm_code(lzma_simple *simple lzma_attribute((__unused__)),
 
 
 static lzma_ret
-arm_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+arm_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters, bool is_encoder)
 {
 	return lzma_simple_coder_init(next, allocator, filters,
@@ -54,7 +54,8 @@ arm_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_simple_arm_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_simple_arm_encoder_init(lzma_next_coder *next,
+		const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	return arm_coder_init(next, allocator, filters, true);
@@ -62,7 +63,8 @@ lzma_simple_arm_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_simple_arm_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_simple_arm_decoder_init(lzma_next_coder *next,
+		const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	return arm_coder_init(next, allocator, filters, false);
diff --git a/src/liblzma/simple/armthumb.c b/src/liblzma/simple/armthumb.c
index 4b49175fec4d..06c21e4067a5 100644
--- a/src/liblzma/simple/armthumb.c
+++ b/src/liblzma/simple/armthumb.c
@@ -50,7 +50,7 @@ armthumb_code(lzma_simple *simple lzma_attribute((__unused__)),
 
 
 static lzma_ret
-armthumb_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+armthumb_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters, bool is_encoder)
 {
 	return lzma_simple_coder_init(next, allocator, filters,
@@ -60,7 +60,8 @@ armthumb_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 extern lzma_ret
 lzma_simple_armthumb_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters)
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
 {
 	return armthumb_coder_init(next, allocator, filters, true);
 }
@@ -68,7 +69,8 @@ lzma_simple_armthumb_encoder_init(lzma_next_coder *next,
 
 extern lzma_ret
 lzma_simple_armthumb_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters)
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
 {
 	return armthumb_coder_init(next, allocator, filters, false);
 }
diff --git a/src/liblzma/simple/ia64.c b/src/liblzma/simple/ia64.c
index ce3692b98f1d..ba7249c00178 100644
--- a/src/liblzma/simple/ia64.c
+++ b/src/liblzma/simple/ia64.c
@@ -86,7 +86,7 @@ ia64_code(lzma_simple *simple lzma_attribute((__unused__)),
 
 
 static lzma_ret
-ia64_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+ia64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters, bool is_encoder)
 {
 	return lzma_simple_coder_init(next, allocator, filters,
@@ -96,7 +96,8 @@ ia64_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 extern lzma_ret
 lzma_simple_ia64_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters)
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
 {
 	return ia64_coder_init(next, allocator, filters, true);
 }
@@ -104,7 +105,8 @@ lzma_simple_ia64_encoder_init(lzma_next_coder *next,
 
 extern lzma_ret
 lzma_simple_ia64_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters)
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
 {
 	return ia64_coder_init(next, allocator, filters, false);
 }
diff --git a/src/liblzma/simple/powerpc.c b/src/liblzma/simple/powerpc.c
index 6f8351176c5e..46899196af50 100644
--- a/src/liblzma/simple/powerpc.c
+++ b/src/liblzma/simple/powerpc.c
@@ -49,7 +49,7 @@ powerpc_code(lzma_simple *simple lzma_attribute((__unused__)),
 
 
 static lzma_ret
-powerpc_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+powerpc_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters, bool is_encoder)
 {
 	return lzma_simple_coder_init(next, allocator, filters,
@@ -59,7 +59,8 @@ powerpc_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 extern lzma_ret
 lzma_simple_powerpc_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters)
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
 {
 	return powerpc_coder_init(next, allocator, filters, true);
 }
@@ -67,7 +68,8 @@ lzma_simple_powerpc_encoder_init(lzma_next_coder *next,
 
 extern lzma_ret
 lzma_simple_powerpc_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters)
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
 {
 	return powerpc_coder_init(next, allocator, filters, false);
 }
diff --git a/src/liblzma/simple/simple_coder.c b/src/liblzma/simple/simple_coder.c
index a02b039aa0bb..dba5417b4264 100644
--- a/src/liblzma/simple/simple_coder.c
+++ b/src/liblzma/simple/simple_coder.c
@@ -18,7 +18,7 @@
 
 /// Copied or encodes/decodes more data to out[].
 static lzma_ret
-copy_or_code(lzma_coder *coder, lzma_allocator *allocator,
+copy_or_code(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
@@ -66,7 +66,7 @@ call_filter(lzma_coder *coder, uint8_t *buffer, size_t size)
 
 
 static lzma_ret
-simple_code(lzma_coder *coder, lzma_allocator *allocator,
+simple_code(lzma_coder *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
@@ -198,7 +198,7 @@ simple_code(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-simple_coder_end(lzma_coder *coder, lzma_allocator *allocator)
+simple_coder_end(lzma_coder *coder, const lzma_allocator *allocator)
 {
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder->simple, allocator);
@@ -208,7 +208,7 @@ simple_coder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-simple_coder_update(lzma_coder *coder, lzma_allocator *allocator,
+simple_coder_update(lzma_coder *coder, const lzma_allocator *allocator,
 		const lzma_filter *filters_null lzma_attribute((__unused__)),
 		const lzma_filter *reversed_filters)
 {
@@ -219,7 +219,7 @@ simple_coder_update(lzma_coder *coder, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_simple_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_simple_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters,
 		size_t (*filter)(lzma_simple *simple, uint32_t now_pos,
 			bool is_encoder, uint8_t *buffer, size_t size),
diff --git a/src/liblzma/simple/simple_coder.h b/src/liblzma/simple/simple_coder.h
index 0952fad33b34..19c2ee03affd 100644
--- a/src/liblzma/simple/simple_coder.h
+++ b/src/liblzma/simple/simple_coder.h
@@ -17,44 +17,56 @@
 
 
 extern lzma_ret lzma_simple_x86_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern lzma_ret lzma_simple_x86_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 
 extern lzma_ret lzma_simple_powerpc_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern lzma_ret lzma_simple_powerpc_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 
 extern lzma_ret lzma_simple_ia64_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern lzma_ret lzma_simple_ia64_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 
 extern lzma_ret lzma_simple_arm_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern lzma_ret lzma_simple_arm_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 
 extern lzma_ret lzma_simple_armthumb_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern lzma_ret lzma_simple_armthumb_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 
 extern lzma_ret lzma_simple_sparc_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 extern lzma_ret lzma_simple_sparc_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 #endif
diff --git a/src/liblzma/simple/simple_decoder.c b/src/liblzma/simple/simple_decoder.c
index 0beccd32a7d8..1d864f2bf781 100644
--- a/src/liblzma/simple/simple_decoder.c
+++ b/src/liblzma/simple/simple_decoder.c
@@ -14,7 +14,7 @@
 
 
 extern lzma_ret
-lzma_simple_props_decode(void **options, lzma_allocator *allocator,
+lzma_simple_props_decode(void **options, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size)
 {
 	if (props_size == 0)
diff --git a/src/liblzma/simple/simple_decoder.h b/src/liblzma/simple/simple_decoder.h
index b8bf590f7681..bed8d37a9653 100644
--- a/src/liblzma/simple/simple_decoder.h
+++ b/src/liblzma/simple/simple_decoder.h
@@ -16,7 +16,7 @@
 #include "simple_coder.h"
 
 extern lzma_ret lzma_simple_props_decode(
-		void **options, lzma_allocator *allocator,
+		void **options, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size);
 
 #endif
diff --git a/src/liblzma/simple/simple_private.h b/src/liblzma/simple/simple_private.h
index fcf9f7c1980f..bb20cb419f8c 100644
--- a/src/liblzma/simple/simple_private.h
+++ b/src/liblzma/simple/simple_private.h
@@ -66,7 +66,8 @@ struct lzma_coder_s {
 
 
 extern lzma_ret lzma_simple_coder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters,
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters,
 		size_t (*filter)(lzma_simple *simple, uint32_t now_pos,
 			bool is_encoder, uint8_t *buffer, size_t size),
 		size_t simple_size, size_t unfiltered_max,
diff --git a/src/liblzma/simple/sparc.c b/src/liblzma/simple/sparc.c
index 8270d6ab19f1..53ee49d154f1 100644
--- a/src/liblzma/simple/sparc.c
+++ b/src/liblzma/simple/sparc.c
@@ -57,7 +57,7 @@ sparc_code(lzma_simple *simple lzma_attribute((__unused__)),
 
 
 static lzma_ret
-sparc_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+sparc_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters, bool is_encoder)
 {
 	return lzma_simple_coder_init(next, allocator, filters,
@@ -67,7 +67,8 @@ sparc_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 extern lzma_ret
 lzma_simple_sparc_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters)
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
 {
 	return sparc_coder_init(next, allocator, filters, true);
 }
@@ -75,7 +76,8 @@ lzma_simple_sparc_encoder_init(lzma_next_coder *next,
 
 extern lzma_ret
 lzma_simple_sparc_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters)
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
 {
 	return sparc_coder_init(next, allocator, filters, false);
 }
diff --git a/src/liblzma/simple/x86.c b/src/liblzma/simple/x86.c
index 5d1509bb2278..3b2b4f852067 100644
--- a/src/liblzma/simple/x86.c
+++ b/src/liblzma/simple/x86.c
@@ -123,7 +123,7 @@ x86_code(lzma_simple *simple, uint32_t now_pos, bool is_encoder,
 
 
 static lzma_ret
-x86_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+x86_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters, bool is_encoder)
 {
 	const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters,
@@ -139,7 +139,8 @@ x86_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_simple_x86_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_simple_x86_encoder_init(lzma_next_coder *next,
+		const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	return x86_coder_init(next, allocator, filters, true);
@@ -147,7 +148,8 @@ lzma_simple_x86_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_simple_x86_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_simple_x86_decoder_init(lzma_next_coder *next,
+		const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	return x86_coder_init(next, allocator, filters, false);
diff --git a/src/liblzma/validate_map.sh b/src/liblzma/validate_map.sh
new file mode 100755
index 000000000000..3aee46687c38
--- /dev/null
+++ b/src/liblzma/validate_map.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+###############################################################################
+#
+# Check liblzma.map for certain types of errors
+#
+# Author: Lasse Collin
+#
+# This file has been put into the public domain.
+# You can do whatever you want with this file.
+#
+###############################################################################
+
+LC_ALL=C
+export LC_ALL
+
+STATUS=0
+
+cd "$(dirname "$0")"
+
+# Get the list of symbols that aren't defined in liblzma.map.
+SYMS=$(sed -n 's/^extern LZMA_API([^)]*) \([a-z0-9_]*\)(.*$/\1;/p' \
+		api/lzma/*.h \
+	| sort \
+	| grep -Fve "$(sed '/[{}:*]/d;/^$/d;s/^	//' liblzma.map)")
+
+# Check that there are no old alpha or beta versions listed.
+VER=$(cd ../.. && sh build-aux/version.sh)
+NAMES=
+case $VER in
+	*alpha | *beta)
+		NAMES=$(sed -n 's/^.*XZ_\([^ ]*\)\(alpha\|beta\) .*$/\1\2/p' \
+			liblzma.map | grep -Fv "$VER")
+		;;
+esac
+
+# Check for duplicate lines. It can catch missing dependencies.
+DUPS=$(sort liblzma.map | sed '/^$/d;/^global:$/d' | uniq -d)
+
+# Print error messages if needed.
+if test -n "$SYMS$NAMES$DUPS"; then
+	echo
+	echo 'validate_map.sh found problems from liblzma.map:'
+	echo
+
+	if test -n "$SYMS"; then
+		echo 'liblzma.map lacks the following symbols:'
+		echo "$SYMS"
+		echo
+	fi
+
+	if test -n "$NAMES"; then
+		echo 'Obsolete alpha or beta version names:'
+		echo "$NAMES"
+		echo
+	fi
+
+	if test -n "$DUPS"; then
+		echo 'Duplicate lines:'
+		echo "$DUPS"
+		echo
+	fi
+
+	STATUS=1
+fi
+
+# Exit status is 1 if problems were found, 0 otherwise.
+exit "$STATUS"
diff --git a/src/xz/args.c b/src/xz/args.c
index 34761d45d67e..041c80073e6d 100644
--- a/src/xz/args.c
+++ b/src/xz/args.c
@@ -22,6 +22,7 @@ bool opt_stdout = false;
 bool opt_force = false;
 bool opt_keep_original = false;
 bool opt_robot = false;
+bool opt_ignore_check = false;
 
 // We don't modify or free() this, but we need to assign it in some
 // non-const pointers.
@@ -55,6 +56,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str,
 
 
 static void
+parse_block_list(char *str)
+{
+	// It must be non-empty and not begin with a comma.
+	if (str[0] == '\0' || str[0] == ',')
+		message_fatal(_("%s: Invalid argument to --block-list"), str);
+
+	// Count the number of comma-separated strings.
+	size_t count = 1;
+	for (size_t i = 0; str[i] != '\0'; ++i)
+		if (str[i] == ',')
+			++count;
+
+	// Prevent an unlikely integer overflow.
+	if (count > SIZE_MAX / sizeof(uint64_t) - 1)
+		message_fatal(_("%s: Too many arguments to --block-list"),
+				str);
+
+	// Allocate memory to hold all the sizes specified.
+	// If --block-list was specified already, its value is forgotten.
+	free(opt_block_list);
+	opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
+
+	for (size_t i = 0; i < count; ++i) {
+		// Locate the next comma and replace it with \0.
+		char *p = strchr(str, ',');
+		if (p != NULL)
+			*p = '\0';
+
+		if (str[0] == '\0') {
+			// There is no string, that is, a comma follows
+			// another comma. Use the previous value.
+			//
+			// NOTE: We checked earler that the first char
+			// of the whole list cannot be a comma.
+			assert(i > 0);
+			opt_block_list[i] = opt_block_list[i - 1];
+		} else {
+			opt_block_list[i] = str_to_uint64("block-list", str,
+					0, UINT64_MAX);
+
+			// Zero indicates no more new Blocks.
+			if (opt_block_list[i] == 0) {
+				if (i + 1 != count)
+					message_fatal(_("0 can only be used "
+							"as the last element "
+							"in --block-list"));
+
+				opt_block_list[i] = UINT64_MAX;
+			}
+		}
+
+		str = p + 1;
+	}
+
+	// Terminate the array.
+	opt_block_list[count] = 0;
+	return;
+}
+
+
+static void
 parse_real(args_info *args, int argc, char **argv)
 {
 	enum {
@@ -68,14 +130,19 @@ parse_real(args_info *args, int argc, char **argv)
 		OPT_LZMA1,
 		OPT_LZMA2,
 
+		OPT_SINGLE_STREAM,
 		OPT_NO_SPARSE,
 		OPT_FILES,
 		OPT_FILES0,
+		OPT_BLOCK_SIZE,
+		OPT_BLOCK_LIST,
 		OPT_MEM_COMPRESS,
 		OPT_MEM_DECOMPRESS,
 		OPT_NO_ADJUST,
 		OPT_INFO_MEMORY,
 		OPT_ROBOT,
+		OPT_FLUSH_TIMEOUT,
+		OPT_IGNORE_CHECK,
 	};
 
 	static const char short_opts[]
@@ -94,6 +161,7 @@ parse_real(args_info *args, int argc, char **argv)
 		{ "force",        no_argument,       NULL,  'f' },
 		{ "stdout",       no_argument,       NULL,  'c' },
 		{ "to-stdout",    no_argument,       NULL,  'c' },
+		{ "single-stream", no_argument,      NULL,  OPT_SINGLE_STREAM },
 		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
 		{ "suffix",       required_argument, NULL,  'S' },
 		// { "recursive",      no_argument,       NULL,  'r' }, // TODO
@@ -103,12 +171,16 @@ parse_real(args_info *args, int argc, char **argv)
 		// Basic compression settings
 		{ "format",       required_argument, NULL,  'F' },
 		{ "check",        required_argument, NULL,  'C' },
+		{ "ignore-check", no_argument,       NULL,  OPT_IGNORE_CHECK },
+		{ "block-size",   required_argument, NULL,  OPT_BLOCK_SIZE },
+		{ "block-list",  required_argument, NULL,  OPT_BLOCK_LIST },
 		{ "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
 		{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
 		{ "memlimit",     required_argument, NULL,  'M' },
 		{ "memory",       required_argument, NULL,  'M' }, // Old alias
 		{ "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
 		{ "threads",      required_argument, NULL,  'T' },
+		{ "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
 
 		{ "extreme",      no_argument,       NULL,  'e' },
 		{ "fast",         no_argument,       NULL,  '0' },
@@ -175,8 +247,9 @@ parse_real(args_info *args, int argc, char **argv)
 			break;
 
 		case 'T':
-			hardware_threadlimit_set(str_to_uint64(
-					"threads", optarg, 0, UINT32_MAX));
+			// The max is from src/liblzma/common/common.h.
+			hardware_threads_set(str_to_uint64("threads",
+					optarg, 0, 16384));
 			break;
 
 		// --version
@@ -368,6 +441,24 @@ parse_real(args_info *args, int argc, char **argv)
 			break;
 		}
 
+		case OPT_IGNORE_CHECK:
+			opt_ignore_check = true;
+			break;
+
+		case OPT_BLOCK_SIZE:
+			opt_block_size = str_to_uint64("block-size", optarg,
+					0, LZMA_VLI_MAX);
+			break;
+
+		case OPT_BLOCK_LIST: {
+			parse_block_list(optarg);
+			break;
+		}
+
+		case OPT_SINGLE_STREAM:
+			opt_single_stream = true;
+			break;
+
 		case OPT_NO_SPARSE:
 			io_no_sparse();
 			break;
@@ -401,6 +492,11 @@ parse_real(args_info *args, int argc, char **argv)
 			opt_auto_adjust = false;
 			break;
 
+		case OPT_FLUSH_TIMEOUT:
+			opt_flush_timeout = str_to_uint64("flush-timeout",
+					optarg, 0, UINT64_MAX);
+			break;
+
 		default:
 			message_try_help();
 			tuklib_exit(E_ERROR, E_ERROR, false);
@@ -576,3 +672,13 @@ args_parse(args_info *args, int argc, char **argv)
 
 	return;
 }
+
+
+#ifndef NDEBUG
+extern void
+args_free(void)
+{
+	free(opt_block_list);
+	return;
+}
+#endif
diff --git a/src/xz/args.h b/src/xz/args.h
index b23f4ef12c92..1defad12e0dd 100644
--- a/src/xz/args.h
+++ b/src/xz/args.h
@@ -36,7 +36,9 @@ extern bool opt_force;
 extern bool opt_keep_original;
 // extern bool opt_recursive;
 extern bool opt_robot;
+extern bool opt_ignore_check;
 
 extern const char stdin_filename[];
 
 extern void args_parse(args_info *args, int argc, char **argv);
+extern void args_free(void);
diff --git a/src/xz/coder.c b/src/xz/coder.c
index 017e04127e95..a94bdb83266f 100644
--- a/src/xz/coder.c
+++ b/src/xz/coder.c
@@ -24,6 +24,9 @@ enum coder_init_ret {
 enum operation_mode opt_mode = MODE_COMPRESS;
 enum format_type opt_format = FORMAT_AUTO;
 bool opt_auto_adjust = true;
+bool opt_single_stream = false;
+uint64_t opt_block_size = 0;
+uint64_t *opt_block_list = NULL;
 
 
 /// Stream used to communicate with liblzma
@@ -48,6 +51,14 @@ static lzma_check check;
 /// This becomes false if the --check=CHECK option is used.
 static bool check_default = true;
 
+#ifdef MYTHREAD_ENABLED
+static lzma_mt mt_options = {
+	.flags = 0,
+	.timeout = 300,
+	.filters = filters,
+};
+#endif
+
 
 extern void
 coder_set_check(lzma_check new_check)
@@ -125,6 +136,15 @@ memlimit_too_small(uint64_t memory_usage)
 extern void
 coder_set_compression_settings(void)
 {
+	// The default check type is CRC64, but fallback to CRC32
+	// if CRC64 isn't supported by the copy of liblzma we are
+	// using. CRC32 is always supported.
+	if (check_default) {
+		check = LZMA_CHECK_CRC64;
+		if (!lzma_check_is_supported(check))
+			check = LZMA_CHECK_CRC32;
+	}
+
 	// Options for LZMA1 or LZMA2 in case we are using a preset.
 	static lzma_options_lzma opt_lzma;
 
@@ -175,15 +195,53 @@ coder_set_compression_settings(void)
 	// Print the selected filter chain.
 	message_filters_show(V_DEBUG, filters);
 
-	// If using --format=raw, we can be decoding. The memusage function
-	// also validates the filter chain and the options used for the
-	// filters.
+	// The --flush-timeout option requires LZMA_SYNC_FLUSH support
+	// from the filter chain. Currently threaded encoder doesn't support
+	// LZMA_SYNC_FLUSH so single-threaded mode must be used.
+	if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
+		for (size_t i = 0; i < filters_count; ++i) {
+			switch (filters[i].id) {
+			case LZMA_FILTER_LZMA2:
+			case LZMA_FILTER_DELTA:
+				break;
+
+			default:
+				message_fatal(_("The filter chain is "
+					"incompatible with --flush-timeout"));
+			}
+		}
+
+		if (hardware_threads_get() > 1) {
+			message(V_WARNING, _("Switching to single-threaded "
+					"mode due to --flush-timeout"));
+			hardware_threads_set(1);
+		}
+	}
+
+	// Get the memory usage. Note that if --format=raw was used,
+	// we can be decompressing.
 	const uint64_t memory_limit = hardware_memlimit_get(opt_mode);
 	uint64_t memory_usage;
-	if (opt_mode == MODE_COMPRESS)
-		memory_usage = lzma_raw_encoder_memusage(filters);
-	else
+	if (opt_mode == MODE_COMPRESS) {
+#ifdef MYTHREAD_ENABLED
+		if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) {
+			mt_options.threads = hardware_threads_get();
+			mt_options.block_size = opt_block_size;
+			mt_options.check = check;
+			memory_usage = lzma_stream_encoder_mt_memusage(
+					&mt_options);
+			if (memory_usage != UINT64_MAX)
+				message(V_DEBUG, _("Using up to %" PRIu32
+						" threads."),
+						mt_options.threads);
+		} else
+#endif
+		{
+			memory_usage = lzma_raw_encoder_memusage(filters);
+		}
+	} else {
 		memory_usage = lzma_raw_decoder_memusage(filters);
+	}
 
 	if (memory_usage == UINT64_MAX)
 		message_fatal(_("Unsupported filter chain or filter options"));
@@ -199,90 +257,99 @@ coder_set_compression_settings(void)
 						round_up_to_mib(decmem), 0));
 	}
 
-	if (memory_usage > memory_limit) {
-		// If --no-adjust was used or we didn't find LZMA1 or
-		// LZMA2 as the last filter, give an error immediately.
-		// --format=raw implies --no-adjust.
-		if (!opt_auto_adjust || opt_format == FORMAT_RAW)
-			memlimit_too_small(memory_usage);
-
-		assert(opt_mode == MODE_COMPRESS);
-
-		// Look for the last filter if it is LZMA2 or LZMA1, so
-		// we can make it use less RAM. With other filters we don't
-		// know what to do.
-		size_t i = 0;
-		while (filters[i].id != LZMA_FILTER_LZMA2
-				&& filters[i].id != LZMA_FILTER_LZMA1) {
-			if (filters[i].id == LZMA_VLI_UNKNOWN)
-				memlimit_too_small(memory_usage);
-
-			++i;
-		}
+	if (memory_usage <= memory_limit)
+		return;
 
-		// Decrease the dictionary size until we meet the memory
-		// usage limit. First round down to full mebibytes.
-		lzma_options_lzma *opt = filters[i].options;
-		const uint32_t orig_dict_size = opt->dict_size;
-		opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
-		while (true) {
-			// If it is below 1 MiB, auto-adjusting failed. We
-			// could be more sophisticated and scale it down even
-			// more, but let's see if many complain about this
-			// version.
-			//
-			// FIXME: Displays the scaled memory usage instead
-			// of the original.
-			if (opt->dict_size < (UINT32_C(1) << 20))
+	// If --no-adjust was used or we didn't find LZMA1 or
+	// LZMA2 as the last filter, give an error immediately.
+	// --format=raw implies --no-adjust.
+	if (!opt_auto_adjust || opt_format == FORMAT_RAW)
+		memlimit_too_small(memory_usage);
+
+	assert(opt_mode == MODE_COMPRESS);
+
+#ifdef MYTHREAD_ENABLED
+	if (opt_format == FORMAT_XZ && mt_options.threads > 1) {
+		// Try to reduce the number of threads before
+		// adjusting the compression settings down.
+		do {
+			// FIXME? The real single-threaded mode has
+			// lower memory usage, but it's not comparable
+			// because it doesn't write the size info
+			// into Block Headers.
+			if (--mt_options.threads == 0)
 				memlimit_too_small(memory_usage);
 
-			memory_usage = lzma_raw_encoder_memusage(filters);
+			memory_usage = lzma_stream_encoder_mt_memusage(
+					&mt_options);
 			if (memory_usage == UINT64_MAX)
 				message_bug();
 
-			// Accept it if it is low enough.
-			if (memory_usage <= memory_limit)
-				break;
+		} while (memory_usage > memory_limit);
 
-			// Otherwise 1 MiB down and try again. I hope this
-			// isn't too slow method for cases where the original
-			// dict_size is very big.
-			opt->dict_size -= UINT32_C(1) << 20;
-		}
+		message(V_WARNING, _("Adjusted the number of threads "
+			"from %s to %s to not exceed "
+			"the memory usage limit of %s MiB"),
+			uint64_to_str(hardware_threads_get(), 0),
+			uint64_to_str(mt_options.threads, 1),
+			uint64_to_str(round_up_to_mib(
+				memory_limit), 2));
+	}
+#endif
+
+	if (memory_usage <= memory_limit)
+		return;
 
-		// Tell the user that we decreased the dictionary size.
-		message(V_WARNING, _("Adjusted LZMA%c dictionary size "
-				"from %s MiB to %s MiB to not exceed "
-				"the memory usage limit of %s MiB"),
-				filters[i].id == LZMA_FILTER_LZMA2
-					? '2' : '1',
-				uint64_to_str(orig_dict_size >> 20, 0),
-				uint64_to_str(opt->dict_size >> 20, 1),
-				uint64_to_str(round_up_to_mib(
-					memory_limit), 2));
+	// Look for the last filter if it is LZMA2 or LZMA1, so we can make
+	// it use less RAM. With other filters we don't know what to do.
+	size_t i = 0;
+	while (filters[i].id != LZMA_FILTER_LZMA2
+			&& filters[i].id != LZMA_FILTER_LZMA1) {
+		if (filters[i].id == LZMA_VLI_UNKNOWN)
+			memlimit_too_small(memory_usage);
+
+		++i;
 	}
 
-/*
-	// Limit the number of worker threads so that memory usage
-	// limit isn't exceeded.
-	assert(memory_usage > 0);
-	size_t thread_limit = memory_limit / memory_usage;
-	if (thread_limit == 0)
-		thread_limit = 1;
+	// Decrease the dictionary size until we meet the memory
+	// usage limit. First round down to full mebibytes.
+	lzma_options_lzma *opt = filters[i].options;
+	const uint32_t orig_dict_size = opt->dict_size;
+	opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
+	while (true) {
+		// If it is below 1 MiB, auto-adjusting failed. We could be
+		// more sophisticated and scale it down even more, but let's
+		// see if many complain about this version.
+		//
+		// FIXME: Displays the scaled memory usage instead
+		// of the original.
+		if (opt->dict_size < (UINT32_C(1) << 20))
+			memlimit_too_small(memory_usage);
 
-	if (opt_threads > thread_limit)
-		opt_threads = thread_limit;
-*/
+		memory_usage = lzma_raw_encoder_memusage(filters);
+		if (memory_usage == UINT64_MAX)
+			message_bug();
 
-	if (check_default) {
-		// The default check type is CRC64, but fallback to CRC32
-		// if CRC64 isn't supported by the copy of liblzma we are
-		// using. CRC32 is always supported.
-		check = LZMA_CHECK_CRC64;
-		if (!lzma_check_is_supported(check))
-			check = LZMA_CHECK_CRC32;
+		// Accept it if it is low enough.
+		if (memory_usage <= memory_limit)
+			break;
+
+		// Otherwise 1 MiB down and try again. I hope this
+		// isn't too slow method for cases where the original
+		// dict_size is very big.
+		opt->dict_size -= UINT32_C(1) << 20;
 	}
 
+	// Tell the user that we decreased the dictionary size.
+	message(V_WARNING, _("Adjusted LZMA%c dictionary size "
+			"from %s MiB to %s MiB to not exceed "
+			"the memory usage limit of %s MiB"),
+			filters[i].id == LZMA_FILTER_LZMA2
+				? '2' : '1',
+			uint64_to_str(orig_dict_size >> 20, 0),
+			uint64_to_str(opt->dict_size >> 20, 1),
+			uint64_to_str(round_up_to_mib(memory_limit), 2));
+
 	return;
 }
 
@@ -364,7 +431,14 @@ coder_init(file_pair *pair)
 			break;
 
 		case FORMAT_XZ:
-			ret = lzma_stream_encoder(&strm, filters, check);
+#ifdef MYTHREAD_ENABLED
+			if (hardware_threads_get() > 1)
+				ret = lzma_stream_encoder_mt(
+						&strm, &mt_options);
+			else
+#endif
+				ret = lzma_stream_encoder(
+						&strm, filters, check);
 			break;
 
 		case FORMAT_LZMA:
@@ -376,8 +450,17 @@ coder_init(file_pair *pair)
 			break;
 		}
 	} else {
-		const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK
-				| LZMA_CONCATENATED;
+		uint32_t flags = 0;
+
+		// It seems silly to warn about unsupported check if the
+		// check won't be verified anyway due to --ignore-check.
+		if (opt_ignore_check)
+			flags |= LZMA_IGNORE_CHECK;
+		else
+			flags |= LZMA_TELL_UNSUPPORTED_CHECK;
+
+		if (!opt_single_stream)
+			flags |= LZMA_CONCATENATED;
 
 		// We abuse FORMAT_AUTO to indicate unknown file format,
 		// for which we may consider passthru mode.
@@ -408,7 +491,7 @@ coder_init(file_pair *pair)
 
 		switch (init_format) {
 		case FORMAT_AUTO:
-			// Uknown file format. If --decompress --stdout
+			// Unknown file format. If --decompress --stdout
 			// --force have been given, then we copy the input
 			// as is to stdout. Checking for MODE_DECOMPRESS
 			// is needed, because we don't want to do use
@@ -462,6 +545,56 @@ coder_init(file_pair *pair)
 }
 
 
+/// Resolve conflicts between opt_block_size and opt_block_list in single
+/// threaded mode. We want to default to opt_block_list, except when it is
+/// larger than opt_block_size. If this is the case for the current Block
+/// at *list_pos, then we break into smaller Blocks. Otherwise advance
+/// to the next Block in opt_block_list, and break apart if needed.
+static void
+split_block(uint64_t *block_remaining,
+	    uint64_t *next_block_remaining,
+	    size_t *list_pos)
+{
+	if (*next_block_remaining > 0) {
+		// The Block at *list_pos has previously been split up.
+		assert(hardware_threads_get() == 1);
+		assert(opt_block_size > 0);
+		assert(opt_block_list != NULL);
+
+		if (*next_block_remaining > opt_block_size) {
+			// We have to split the current Block at *list_pos
+			// into another opt_block_size length Block.
+			*block_remaining = opt_block_size;
+		} else {
+			// This is the last remaining split Block for the
+			// Block at *list_pos.
+			*block_remaining = *next_block_remaining;
+		}
+
+		*next_block_remaining -= *block_remaining;
+
+	} else {
+		// The Block at *list_pos has been finished. Go to the next
+		// entry in the list. If the end of the list has been reached,
+		// reuse the size of the last Block.
+		if (opt_block_list[*list_pos + 1] != 0)
+			++*list_pos;
+
+		*block_remaining = opt_block_list[*list_pos];
+
+		// If in single-threaded mode, split up the Block if needed.
+		// This is not needed in multi-threaded mode because liblzma
+		// will do this due to how threaded encoding works.
+		if (hardware_threads_get() == 1 && opt_block_size > 0
+				&& *block_remaining > opt_block_size) {
+			*next_block_remaining
+					= *block_remaining - opt_block_size;
+			*block_remaining = opt_block_size;
+		}
+	}
+}
+
+
 /// Compress or decompress using liblzma.
 static bool
 coder_normal(file_pair *pair)
@@ -469,8 +602,8 @@ coder_normal(file_pair *pair)
 	// Encoder needs to know when we have given all the input to it.
 	// The decoders need to know it too when we are using
 	// LZMA_CONCATENATED. We need to check for src_eof here, because
-	// the first input chunk has been already read, and that may
-	// have been the only chunk we will read.
+	// the first input chunk has been already read if decompressing,
+	// and that may have been the only chunk we will read.
 	lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN;
 
 	lzma_ret ret;
@@ -478,22 +611,77 @@ coder_normal(file_pair *pair)
 	// Assume that something goes wrong.
 	bool success = false;
 
+	// block_remaining indicates how many input bytes to encode before
+	// finishing the current .xz Block. The Block size is set with
+	// --block-size=SIZE and --block-list. They have an effect only when
+	// compressing to the .xz format. If block_remaining == UINT64_MAX,
+	// only a single block is created.
+	uint64_t block_remaining = UINT64_MAX;
+
+	// next_block_remining for when we are in single-threaded mode and
+	// the Block in --block-list is larger than the --block-size=SIZE.
+	uint64_t next_block_remaining = 0;
+
+	// Position in opt_block_list. Unused if --block-list wasn't used.
+	size_t list_pos = 0;
+
+	// Handle --block-size for single-threaded mode and the first step
+	// of --block-list.
+	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
+		// --block-size doesn't do anything here in threaded mode,
+		// because the threaded encoder will take care of splitting
+		// to fixed-sized Blocks.
+		if (hardware_threads_get() == 1 && opt_block_size > 0)
+			block_remaining = opt_block_size;
+
+		// If --block-list was used, start with the first size.
+		//
+		// For threaded case, --block-size specifies how big Blocks
+		// the encoder needs to be prepared to create at maximum
+		// and --block-list will simultaneously cause new Blocks
+		// to be started at specified intervals. To keep things
+		// logical, the same is done in single-threaded mode. The
+		// output is still not identical because in single-threaded
+		// mode the size info isn't written into Block Headers.
+		if (opt_block_list != NULL) {
+			if (block_remaining < opt_block_list[list_pos]) {
+				assert(hardware_threads_get() == 1);
+				next_block_remaining = opt_block_list[list_pos]
+						- block_remaining;
+			} else {
+				block_remaining = opt_block_list[list_pos];
+			}
+		}
+	}
+
 	strm.next_out = out_buf.u8;
 	strm.avail_out = IO_BUFFER_SIZE;
 
 	while (!user_abort) {
-		// Fill the input buffer if it is empty and we haven't reached
-		// end of file yet.
-		if (strm.avail_in == 0 && !pair->src_eof) {
+		// Fill the input buffer if it is empty and we aren't
+		// flushing or finishing.
+		if (strm.avail_in == 0 && action == LZMA_RUN) {
 			strm.next_in = in_buf.u8;
-			strm.avail_in = io_read(
-					pair, &in_buf, IO_BUFFER_SIZE);
+			strm.avail_in = io_read(pair, &in_buf,
+					my_min(block_remaining,
+						IO_BUFFER_SIZE));
 
 			if (strm.avail_in == SIZE_MAX)
 				break;
 
-			if (pair->src_eof)
+			if (pair->src_eof) {
 				action = LZMA_FINISH;
+
+			} else if (block_remaining != UINT64_MAX) {
+				// Start a new Block after every
+				// opt_block_size bytes of input.
+				block_remaining -= strm.avail_in;
+				if (block_remaining == 0)
+					action = LZMA_FULL_BARRIER;
+			}
+
+			if (action == LZMA_RUN && flush_needed)
+				action = LZMA_SYNC_FLUSH;
 		}
 
 		// Let liblzma do the actual work.
@@ -509,7 +697,39 @@ coder_normal(file_pair *pair)
 			strm.avail_out = IO_BUFFER_SIZE;
 		}
 
-		if (ret != LZMA_OK) {
+		if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
+				|| action == LZMA_FULL_BARRIER)) {
+			if (action == LZMA_SYNC_FLUSH) {
+				// Flushing completed. Write the pending data
+				// out immediatelly so that the reading side
+				// can decompress everything compressed so far.
+				if (io_write(pair, &out_buf, IO_BUFFER_SIZE
+						- strm.avail_out))
+					break;
+
+				strm.next_out = out_buf.u8;
+				strm.avail_out = IO_BUFFER_SIZE;
+
+				// Set the time of the most recent flushing.
+				mytime_set_flush_time();
+			} else {
+				// Start a new Block after LZMA_FULL_BARRIER.
+				if (opt_block_list == NULL) {
+					assert(hardware_threads_get() == 1);
+					assert(opt_block_size > 0);
+					block_remaining = opt_block_size;
+				} else {
+					split_block(&block_remaining,
+							&next_block_remaining,
+							&list_pos);
+				}
+			}
+
+			// Start a new Block after LZMA_FULL_FLUSH or continue
+			// the same block after LZMA_SYNC_FLUSH.
+			action = LZMA_RUN;
+
+		} else if (ret != LZMA_OK) {
 			// Determine if the return value indicates that we
 			// won't continue coding.
 			const bool stop = ret != LZMA_NO_CHECK
@@ -528,6 +748,12 @@ coder_normal(file_pair *pair)
 			}
 
 			if (ret == LZMA_STREAM_END) {
+				if (opt_single_stream) {
+					io_fix_src_pos(pair, strm.avail_in);
+					success = true;
+					break;
+				}
+
 				// Check that there is no trailing garbage.
 				// This is needed for LZMA_Alone and raw
 				// streams.
@@ -630,10 +856,15 @@ coder_run(const char *filename)
 	// Assume that something goes wrong.
 	bool success = false;
 
-	// Read the first chunk of input data. This is needed to detect
-	// the input file type (for now, only for decompression).
-	strm.next_in = in_buf.u8;
-	strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
+	if (opt_mode == MODE_COMPRESS) {
+		strm.next_in = NULL;
+		strm.avail_in = 0;
+	} else {
+		// Read the first chunk of input data. This is needed
+		// to detect the input file type.
+		strm.next_in = in_buf.u8;
+		strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
+	}
 
 	if (strm.avail_in != SIZE_MAX) {
 		// Initialize the coder. This will detect the file format
@@ -648,6 +879,11 @@ coder_run(const char *filename)
 			// Don't open the destination file when --test
 			// is used.
 			if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
+				// Remember the current time. It is needed
+				// for progress indicator and for timed
+				// flushing.
+				mytime_set_start_time();
+
 				// Initialize the progress indicator.
 				const uint64_t in_size
 						= pair->src_st.st_size <= 0
@@ -671,3 +907,13 @@ coder_run(const char *filename)
 
 	return;
 }
+
+
+#ifndef NDEBUG
+extern void
+coder_free(void)
+{
+	lzma_end(&strm);
+	return;
+}
+#endif
diff --git a/src/xz/coder.h b/src/xz/coder.h
index 2d3add972745..583da8f68d50 100644
--- a/src/xz/coder.h
+++ b/src/xz/coder.h
@@ -41,6 +41,16 @@ extern enum format_type opt_format;
 /// they exceed the memory usage limit.
 extern bool opt_auto_adjust;
 
+/// If true, stop after decoding the first stream.
+extern bool opt_single_stream;
+
+/// If non-zero, start a new .xz Block after every opt_block_size bytes
+/// of input. This has an effect only when compressing to the .xz format.
+extern uint64_t opt_block_size;
+
+/// This is non-NULL if --block-list was used. This contains the Block sizes
+/// as an array that is terminated with 0.
+extern uint64_t *opt_block_list;
 
 /// Set the integrity check type used when compressing
 extern void coder_set_check(lzma_check check);
@@ -59,3 +69,8 @@ extern void coder_set_compression_settings(void);
 
 /// Compress or decompress the given file
 extern void coder_run(const char *filename);
+
+#ifndef NDEBUG
+/// Free the memory allocated for the coder and kill the worker threads.
+extern void coder_free(void);
+#endif
diff --git a/src/xz/file_io.c b/src/xz/file_io.c
index 871a099b6193..f135cf7cb6bd 100644
--- a/src/xz/file_io.c
+++ b/src/xz/file_io.c
@@ -17,6 +17,7 @@
 #ifdef TUKLIB_DOSLIKE
 #	include <io.h>
 #else
+#	include <poll.h>
 static bool warn_fchown;
 #endif
 
@@ -37,14 +38,30 @@ static bool warn_fchown;
 #endif
 
 
+typedef enum {
+	IO_WAIT_MORE,    // Reading or writing is possible.
+	IO_WAIT_ERROR,   // Error or user_abort
+	IO_WAIT_TIMEOUT, // poll() timed out
+} io_wait_ret;
+
+
 /// If true, try to create sparse files when decompressing.
 static bool try_sparse = true;
 
 #ifndef TUKLIB_DOSLIKE
+/// File status flags of standard input. This is used by io_open_src()
+/// and io_close_src().
+static int stdin_flags;
+static bool restore_stdin_flags = false;
+
 /// Original file status flags of standard output. This is used by
 /// io_open_dest() and io_close_dest() to save and restore the flags.
 static int stdout_flags;
 static bool restore_stdout_flags = false;
+
+/// Self-pipe used together with the user_abort variable to avoid
+/// race conditions with signal handling.
+static int user_abort_pipe[2];
 #endif
 
 
@@ -64,19 +81,43 @@ io_init(void)
 	// If fchown() fails setting the owner, we warn about it only if
 	// we are root.
 	warn_fchown = geteuid() == 0;
+
+	if (pipe(user_abort_pipe)
+			|| fcntl(user_abort_pipe[0], F_SETFL, O_NONBLOCK)
+				== -1
+			|| fcntl(user_abort_pipe[1], F_SETFL, O_NONBLOCK)
+				== -1)
+		message_fatal(_("Error creating a pipe: %s"),
+				strerror(errno));
 #endif
 
 #ifdef __DJGPP__
 	// Avoid doing useless things when statting files.
 	// This isn't important but doesn't hurt.
-	_djstat_flags = _STAT_INODE | _STAT_EXEC_EXT
-			| _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
+	_djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
 #endif
 
 	return;
 }
 
 
+#ifndef TUKLIB_DOSLIKE
+extern void
+io_write_to_user_abort_pipe(void)
+{
+	// If the write() fails, it's probably due to the pipe being full.
+	// Failing in that case is fine. If the reason is something else,
+	// there's not much we can do since this is called in a signal
+	// handler. So ignore the errors and try to avoid warnings with
+	// GCC and glibc when _FORTIFY_SOURCE=2 is used.
+	uint8_t b = '\0';
+	const int ret = write(user_abort_pipe[1], &b, 1);
+	(void)ret;
+	return;
+}
+#endif
+
+
 extern void
 io_no_sparse(void)
 {
@@ -85,6 +126,63 @@ io_no_sparse(void)
 }
 
 
+#ifndef TUKLIB_DOSLIKE
+/// \brief      Waits for input or output to become available or for a signal
+///
+/// This uses the self-pipe trick to avoid a race condition that can occur
+/// if a signal is caught after user_abort has been checked but before e.g.
+/// read() has been called. In that situation read() could block unless
+/// non-blocking I/O is used. With non-blocking I/O something like select()
+/// or poll() is needed to avoid a busy-wait loop, and the same race condition
+/// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
+/// POSIX) but neither is portable enough in 2013. The self-pipe trick is
+/// old and very portable.
+static io_wait_ret
+io_wait(file_pair *pair, int timeout, bool is_reading)
+{
+	struct pollfd pfd[2];
+
+	if (is_reading) {
+		pfd[0].fd = pair->src_fd;
+		pfd[0].events = POLLIN;
+	} else {
+		pfd[0].fd = pair->dest_fd;
+		pfd[0].events = POLLOUT;
+	}
+
+	pfd[1].fd = user_abort_pipe[0];
+	pfd[1].events = POLLIN;
+
+	while (true) {
+		const int ret = poll(pfd, 2, timeout);
+
+		if (user_abort)
+			return IO_WAIT_ERROR;
+
+		if (ret == -1) {
+			if (errno == EINTR || errno == EAGAIN)
+				continue;
+
+			message_error(_("%s: poll() failed: %s"),
+					is_reading ? pair->src_name
+						: pair->dest_name,
+					strerror(errno));
+			return IO_WAIT_ERROR;
+		}
+
+		if (ret == 0) {
+			assert(opt_flush_timeout != 0);
+			flush_needed = true;
+			return IO_WAIT_TIMEOUT;
+		}
+
+		if (pfd[0].revents != 0)
+			return IO_WAIT_MORE;
+	}
+}
+#endif
+
+
 /// \brief      Unlink a file
 ///
 /// This tries to verify that the file being unlinked really is the file that
@@ -294,6 +392,31 @@ io_open_src_real(file_pair *pair)
 		pair->src_fd = STDIN_FILENO;
 #ifdef TUKLIB_DOSLIKE
 		setmode(STDIN_FILENO, O_BINARY);
+#else
+		// Enable O_NONBLOCK for stdin.
+		stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
+		if (stdin_flags == -1) {
+			message_error(_("Error getting the file status flags "
+					"from standard input: %s"),
+					strerror(errno));
+			return true;
+		}
+
+		if ((stdin_flags & O_NONBLOCK) == 0) {
+			if (fcntl(STDIN_FILENO, F_SETFL,
+					stdin_flags | O_NONBLOCK) == -1) {
+				message_error(_("Error setting O_NONBLOCK "
+						"on standard input: %s"),
+						strerror(errno));
+				return true;
+			}
+
+			restore_stdin_flags = true;
+		}
+#endif
+#ifdef HAVE_POSIX_FADVISE
+		// It will fail if stdin is a pipe and that's fine.
+		(void)posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL);
 #endif
 		return false;
 	}
@@ -311,13 +434,12 @@ io_open_src_real(file_pair *pair)
 	int flags = O_RDONLY | O_BINARY | O_NOCTTY;
 
 #ifndef TUKLIB_DOSLIKE
-	// If we accept only regular files, we need to be careful to avoid
-	// problems with special files like devices and FIFOs. O_NONBLOCK
-	// prevents blocking when opening such files. When we want to accept
-	// special files, we must not use O_NONBLOCK, or otherwise we won't
-	// block waiting e.g. FIFOs to become readable.
-	if (reg_files_only)
-		flags |= O_NONBLOCK;
+	// Use non-blocking I/O:
+	//   - It prevents blocking when opening FIFOs and some other
+	//     special files, which is good if we want to accept only
+	//     regular files.
+	//   - It can help avoiding some race conditions with signal handling.
+	flags |= O_NONBLOCK;
 #endif
 
 #if defined(O_NOFOLLOW)
@@ -345,30 +467,13 @@ io_open_src_real(file_pair *pair)
 	(void)follow_symlinks;
 #endif
 
-	// Try to open the file. If we are accepting non-regular files,
-	// unblock the caught signals so that open() can be interrupted
-	// if it blocks e.g. due to a FIFO file.
-	if (!reg_files_only)
-		signals_unblock();
-
-	// Maybe this wouldn't need a loop, since all the signal handlers for
-	// which we don't use SA_RESTART set user_abort to true. But it
-	// doesn't hurt to have it just in case.
-	do {
-		pair->src_fd = open(pair->src_name, flags);
-	} while (pair->src_fd == -1 && errno == EINTR && !user_abort);
-
-	if (!reg_files_only)
-		signals_block();
+	// Try to open the file. Signals have been blocked so EINTR shouldn't
+	// be possible.
+	pair->src_fd = open(pair->src_name, flags);
 
 	if (pair->src_fd == -1) {
-		// If we were interrupted, don't display any error message.
-		if (errno == EINTR) {
-			// All the signals that don't have SA_RESTART
-			// set user_abort.
-			assert(user_abort);
-			return true;
-		}
+		// Signals (that have a signal handler) have been blocked.
+		assert(errno != EINTR);
 
 #ifdef O_NOFOLLOW
 		// Give an understandable error message if the reason
@@ -427,26 +532,20 @@ io_open_src_real(file_pair *pair)
 		return true;
 	}
 
-#ifndef TUKLIB_DOSLIKE
-	// Drop O_NONBLOCK, which is used only when we are accepting only
-	// regular files. After the open() call, we want things to block
-	// instead of giving EAGAIN.
-	if (reg_files_only) {
-		flags = fcntl(pair->src_fd, F_GETFL);
-		if (flags == -1)
-			goto error_msg;
-
-		flags &= ~O_NONBLOCK;
-
-		if (fcntl(pair->src_fd, F_SETFL, flags) == -1)
-			goto error_msg;
-	}
-#endif
-
 	// Stat the source file. We need the result also when we copy
 	// the permissions, and when unlinking.
+	//
+	// NOTE: Use stat() instead of fstat() with DJGPP, because
+	// then we have a better chance to get st_ino value that can
+	// be used in io_open_dest_real() to prevent overwriting the
+	// source file.
+#ifdef __DJGPP__
+	if (stat(pair->src_name, &pair->src_st))
+		goto error_msg;
+#else
 	if (fstat(pair->src_fd, &pair->src_st))
 		goto error_msg;
+#endif
 
 	if (S_ISDIR(pair->src_st.st_mode)) {
 		message_warning(_("%s: Is a directory, skipping"),
@@ -492,6 +591,23 @@ io_open_src_real(file_pair *pair)
 			goto error;
 		}
 	}
+
+	// If it is something else than a regular file, wait until
+	// there is input available. This way reading from FIFOs
+	// will work when open() is used with O_NONBLOCK.
+	if (!S_ISREG(pair->src_st.st_mode)) {
+		signals_unblock();
+		const io_wait_ret ret = io_wait(pair, -1, true);
+		signals_block();
+
+		if (ret != IO_WAIT_MORE)
+			goto error;
+	}
+#endif
+
+#ifdef HAVE_POSIX_FADVISE
+	// It will fail with some special files like FIFOs but that is fine.
+	(void)posix_fadvise(pair->src_fd, 0, 0, POSIX_FADV_SEQUENTIAL);
 #endif
 
 	return false;
@@ -542,6 +658,19 @@ io_open_src(const char *src_name)
 static void
 io_close_src(file_pair *pair, bool success)
 {
+#ifndef TUKLIB_DOSLIKE
+	if (restore_stdin_flags) {
+		assert(pair->src_fd == STDIN_FILENO);
+
+		restore_stdin_flags = false;
+
+		if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
+			message_error(_("Error restoring the status flags "
+					"to standard input: %s"),
+					strerror(errno));
+	}
+#endif
+
 	if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
 #ifdef TUKLIB_DOSLIKE
 		(void)close(pair->src_fd);
@@ -575,12 +704,58 @@ io_open_dest_real(file_pair *pair)
 		pair->dest_fd = STDOUT_FILENO;
 #ifdef TUKLIB_DOSLIKE
 		setmode(STDOUT_FILENO, O_BINARY);
+#else
+		// Set O_NONBLOCK if it isn't already set.
+		//
+		// NOTE: O_APPEND may be unset later in this function
+		// and it relies on stdout_flags being set here.
+		stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
+		if (stdout_flags == -1) {
+			message_error(_("Error getting the file status flags "
+					"from standard output: %s"),
+					strerror(errno));
+			return true;
+		}
+
+		if ((stdout_flags & O_NONBLOCK) == 0) {
+			if (fcntl(STDOUT_FILENO, F_SETFL,
+					stdout_flags | O_NONBLOCK) == -1) {
+				message_error(_("Error setting O_NONBLOCK "
+						"on standard output: %s"),
+						strerror(errno));
+				return true;
+			}
+
+			restore_stdout_flags = true;
+		}
 #endif
 	} else {
 		pair->dest_name = suffix_get_dest_name(pair->src_name);
 		if (pair->dest_name == NULL)
 			return true;
 
+#ifdef __DJGPP__
+		struct stat st;
+		if (stat(pair->dest_name, &st) == 0) {
+			// Check that it isn't a special file like "prn".
+			if (st.st_dev == -1) {
+				message_error("%s: Refusing to write to "
+						"a DOS special file",
+						pair->dest_name);
+				return true;
+			}
+
+			// Check that we aren't overwriting the source file.
+			if (st.st_dev == pair->src_st.st_dev
+					&& st.st_ino == pair->src_st.st_ino) {
+				message_error("%s: Output file is the same "
+						"as the input file",
+						pair->dest_name);
+				return true;
+			}
+		}
+#endif
+
 		// If --force was used, unlink the target file first.
 		if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
 			message_error(_("%s: Cannot remove: %s"),
@@ -590,8 +765,11 @@ io_open_dest_real(file_pair *pair)
 		}
 
 		// Open the file.
-		const int flags = O_WRONLY | O_BINARY | O_NOCTTY
+		int flags = O_WRONLY | O_BINARY | O_NOCTTY
 				| O_CREAT | O_EXCL;
+#ifndef TUKLIB_DOSLIKE
+		flags |= O_NONBLOCK;
+#endif
 		const mode_t mode = S_IRUSR | S_IWUSR;
 		pair->dest_fd = open(pair->dest_name, flags, mode);
 
@@ -603,17 +781,19 @@ io_open_dest_real(file_pair *pair)
 		}
 	}
 
-	// If this really fails... well, we have a safe fallback.
+#ifndef TUKLIB_DOSLIKE
+	// dest_st isn't used on DOS-like systems except as a dummy
+	// argument to io_unlink(), so don't fstat() on such systems.
 	if (fstat(pair->dest_fd, &pair->dest_st)) {
-#if defined(__VMS)
+		// If fstat() really fails, we have a safe fallback here.
+#	if defined(__VMS)
 		pair->dest_st.st_ino[0] = 0;
 		pair->dest_st.st_ino[1] = 0;
 		pair->dest_st.st_ino[2] = 0;
-#elif !defined(TUKLIB_DOSLIKE)
+#	else
 		pair->dest_st.st_dev = 0;
 		pair->dest_st.st_ino = 0;
-#endif
-#ifndef TUKLIB_DOSLIKE
+#	endif
 	} else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
 		// When writing to standard output, we need to be extra
 		// careful:
@@ -631,10 +811,6 @@ io_open_dest_real(file_pair *pair)
 			if (!S_ISREG(pair->dest_st.st_mode))
 				return false;
 
-			stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
-			if (stdout_flags == -1)
-				return false;
-
 			if (stdout_flags & O_APPEND) {
 				// Creating a sparse file is not possible
 				// when O_APPEND is active (it's used by
@@ -653,14 +829,23 @@ io_open_dest_real(file_pair *pair)
 				if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
 					return false;
 
+				// O_NONBLOCK was set earlier in this function
+				// so it must be kept here too. If this
+				// fcntl() call fails, we continue but won't
+				// try to create sparse output. The original
+				// flags will still be restored if needed (to
+				// unset O_NONBLOCK) when the file is finished.
 				if (fcntl(STDOUT_FILENO, F_SETFL,
-						stdout_flags & ~O_APPEND)
-						== -1)
+						(stdout_flags | O_NONBLOCK)
+						& ~O_APPEND) == -1)
 					return false;
 
 				// Disabling O_APPEND succeeded. Mark
 				// that the flags should be restored
-				// in io_close_dest().
+				// in io_close_dest(). This quite likely was
+				// already set when enabling O_NONBLOCK but
+				// just in case O_NONBLOCK was already set,
+				// set this again here.
 				restore_stdout_flags = true;
 
 			} else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
@@ -673,8 +858,8 @@ io_open_dest_real(file_pair *pair)
 		}
 
 		pair->dest_try_sparse = true;
-#endif
 	}
+#endif
 
 	return false;
 }
@@ -790,6 +975,21 @@ io_close(file_pair *pair, bool success)
 }
 
 
+extern void
+io_fix_src_pos(file_pair *pair, size_t rewind_size)
+{
+	assert(rewind_size <= IO_BUFFER_SIZE);
+
+	if (rewind_size > 0) {
+		// This doesn't need to work on unseekable file descriptors,
+		// so just ignore possible errors.
+		(void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
+	}
+
+	return;
+}
+
+
 extern size_t
 io_read(file_pair *pair, io_buf *buf_union, size_t size)
 {
@@ -815,12 +1015,30 @@ io_read(file_pair *pair, io_buf *buf_union, size_t size)
 				continue;
 			}
 
+#ifndef TUKLIB_DOSLIKE
+			if (errno == EAGAIN || errno == EWOULDBLOCK) {
+				const io_wait_ret ret = io_wait(pair,
+						mytime_get_flush_timeout(),
+						true);
+				switch (ret) {
+				case IO_WAIT_MORE:
+					continue;
+
+				case IO_WAIT_ERROR:
+					return SIZE_MAX;
+
+				case IO_WAIT_TIMEOUT:
+					return size - left;
+
+				default:
+					message_bug();
+				}
+			}
+#endif
+
 			message_error(_("%s: Read error: %s"),
 					pair->src_name, strerror(errno));
 
-			// FIXME Is this needed?
-			pair->src_eof = true;
-
 			return SIZE_MAX;
 		}
 
@@ -885,6 +1103,15 @@ io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
 				continue;
 			}
 
+#ifndef TUKLIB_DOSLIKE
+			if (errno == EAGAIN || errno == EWOULDBLOCK) {
+				if (io_wait(pair, -1, false) == IO_WAIT_MORE)
+					continue;
+
+				return true;
+			}
+#endif
+
 			// Handle broken pipe specially. gzip and bzip2
 			// don't print anything on SIGPIPE. In addition,
 			// gzip --quiet uses exit status 2 (warning) on
diff --git a/src/xz/file_io.h b/src/xz/file_io.h
index 967da868b079..2de3379238d6 100644
--- a/src/xz/file_io.h
+++ b/src/xz/file_io.h
@@ -68,6 +68,14 @@ typedef struct {
 extern void io_init(void);
 
 
+#ifndef TUKLIB_DOSLIKE
+/// \brief      Write a byte to user_abort_pipe[1]
+///
+/// This is called from a signal handler.
+extern void io_write_to_user_abort_pipe(void);
+#endif
+
+
 /// \brief      Disable creation of sparse files when decompressing
 extern void io_no_sparse(void);
 
@@ -102,6 +110,19 @@ extern void io_close(file_pair *pair, bool success);
 extern size_t io_read(file_pair *pair, io_buf *buf, size_t size);
 
 
+/// \brief      Fix the position in src_fd
+///
+/// This is used when --single-thream has been specified and decompression
+/// is successful. If the input file descriptor supports seeking, this
+/// function fixes the input position to point to the next byte after the
+/// decompressed stream.
+///
+/// \param      pair        File pair having the source file open for reading
+/// \param      rewind_size How many bytes of extra have been read i.e.
+///                         how much to seek backwards.
+extern void io_fix_src_pos(file_pair *pair, size_t rewind_size);
+
+
 /// \brief      Read from source file from given offset to a buffer
 ///
 /// This is remotely similar to standard pread(). This uses lseek() though,
diff --git a/src/xz/hardware.c b/src/xz/hardware.c
index a4733c27e118..ff32f6d30148 100644
--- a/src/xz/hardware.c
+++ b/src/xz/hardware.c
@@ -11,12 +11,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "private.h"
-#include "tuklib_cpucores.h"
 
 
-/// Maximum number of free *coder* threads. This can be set with
+/// Maximum number of worker threads. This can be set with
 /// the --threads=NUM command line option.
-static uint32_t threadlimit;
+static uint32_t threads_max = 1;
 
 /// Memory usage limit for compression
 static uint64_t memlimit_compress;
@@ -29,15 +28,23 @@ static uint64_t total_ram;
 
 
 extern void
-hardware_threadlimit_set(uint32_t new_threadlimit)
+hardware_threads_set(uint32_t n)
 {
-	if (new_threadlimit == 0) {
-		// The default is the number of available CPU cores.
-		threadlimit = tuklib_cpucores();
-		if (threadlimit == 0)
-			threadlimit = 1;
+	if (n == 0) {
+		// Automatic number of threads was requested.
+		// If threading support was enabled at build time,
+		// use the number of available CPU cores. Otherwise
+		// use one thread since disabling threading support
+		// omits lzma_cputhreads() from liblzma.
+#ifdef MYTHREAD_ENABLED
+		threads_max = lzma_cputhreads();
+		if (threads_max == 0)
+			threads_max = 1;
+#else
+		threads_max = 1;
+#endif
 	} else {
-		threadlimit = new_threadlimit;
+		threads_max = n;
 	}
 
 	return;
@@ -45,9 +52,9 @@ hardware_threadlimit_set(uint32_t new_threadlimit)
 
 
 extern uint32_t
-hardware_threadlimit_get(void)
+hardware_threads_get(void)
 {
-	return threadlimit;
+	return threads_max;
 }
 
 
@@ -139,6 +146,5 @@ hardware_init(void)
 
 	// Set the defaults.
 	hardware_memlimit_set(0, true, true, false);
-	hardware_threadlimit_set(0);
 	return;
 }
diff --git a/src/xz/hardware.h b/src/xz/hardware.h
index ad526f260bc1..4fae61815656 100644
--- a/src/xz/hardware.h
+++ b/src/xz/hardware.h
@@ -15,12 +15,11 @@
 extern void hardware_init(void);
 
 
-/// Set custom value for maximum number of coder threads.
-extern void hardware_threadlimit_set(uint32_t threadlimit);
+/// Set the maximum number of worker threads.
+extern void hardware_threads_set(uint32_t threadlimit);
 
-/// Get the maximum number of coder threads. Some additional helper threads
-/// are allowed on top of this).
-extern uint32_t hardware_threadlimit_get(void);
+/// Get the maximum number of worker threads.
+extern uint32_t hardware_threads_get(void);
 
 
 /// Set the memory usage limit. There are separate limits for compression
diff --git a/src/xz/list.c b/src/xz/list.c
index 0e73d519ea4a..449c2bc4e02f 100644
--- a/src/xz/list.c
+++ b/src/xz/list.c
@@ -29,9 +29,12 @@ typedef struct {
 	/// Uncompressed Size fields
 	bool all_have_sizes;
 
+	/// Oldest XZ Utils version that will decompress the file
+	uint32_t min_version;
+
 } xz_file_info;
 
-#define XZ_FILE_INFO_INIT { NULL, 0, 0, true }
+#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
 
 
 /// Information about a .xz Block
@@ -104,8 +107,32 @@ static struct {
 	uint64_t stream_padding;
 	uint64_t memusage_max;
 	uint32_t checks;
+	uint32_t min_version;
 	bool all_have_sizes;
-} totals = { 0, 0, 0, 0, 0, 0, 0, 0, true };
+} totals = { 0, 0, 0, 0, 0, 0, 0, 0, 0, true };
+
+
+/// Convert XZ Utils version number to a string.
+static const char *
+xz_ver_to_str(uint32_t ver)
+{
+	static char buf[32];
+
+	unsigned int major = ver / 10000000U;
+	ver -= major * 10000000U;
+
+	unsigned int minor = ver / 10000U;
+	ver -= minor * 10000U;
+
+	unsigned int patch = ver / 10U;
+	ver -= patch * 10U;
+
+	const char *stability = ver == 0 ? "alpha" : ver == 1 ? "beta" : "";
+
+	snprintf(buf, sizeof(buf), "%u.%u.%u%s",
+			major, minor, patch, stability);
+	return buf;
+}
 
 
 /// \brief      Parse the Index(es) from the given .xz file
@@ -478,6 +505,21 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter,
 	if (xfi->memusage_max < bhi->memusage)
 		xfi->memusage_max = bhi->memusage;
 
+	// Determine the minimum XZ Utils version that supports this Block.
+	//
+	// Currently the only thing that 5.0.0 doesn't support is empty
+	// LZMA2 Block. This decoder bug was fixed in 5.0.2.
+	{
+		size_t i = 0;
+		while (filters[i + 1].id != LZMA_VLI_UNKNOWN)
+			++i;
+
+		if (filters[i].id == LZMA_FILTER_LZMA2
+				&& iter->block.uncompressed_size == 0
+				&& xfi->min_version < 50000022U)
+			xfi->min_version = 50000022U;
+	}
+
 	// Convert the filter chain to human readable form.
 	message_filters_to_str(bhi->filter_chain, filters, false);
 
@@ -856,6 +898,8 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
 				round_up_to_mib(xfi->memusage_max), 0));
 		printf(_("  Sizes in headers:   %s\n"),
 				xfi->all_have_sizes ? _("Yes") : _("No"));
+		printf(_("  Minimum XZ Utils version: %s\n"),
+				xz_ver_to_str(xfi->min_version));
 	}
 
 	return false;
@@ -938,9 +982,10 @@ print_info_robot(xz_file_info *xfi, file_pair *pair)
 	}
 
 	if (message_verbosity_get() >= V_DEBUG)
-		printf("summary\t%" PRIu64 "\t%s\n",
+		printf("summary\t%" PRIu64 "\t%s\t%" PRIu32 "\n",
 				xfi->memusage_max,
-				xfi->all_have_sizes ? "yes" : "no");
+				xfi->all_have_sizes ? "yes" : "no",
+				xfi->min_version);
 
 	return false;
 }
@@ -961,6 +1006,9 @@ update_totals(const xz_file_info *xfi)
 	if (totals.memusage_max < xfi->memusage_max)
 		totals.memusage_max = xfi->memusage_max;
 
+	if (totals.min_version < xfi->min_version)
+		totals.min_version = xfi->min_version;
+
 	totals.all_have_sizes &= xfi->all_have_sizes;
 
 	return;
@@ -1025,6 +1073,8 @@ print_totals_adv(void)
 				round_up_to_mib(totals.memusage_max), 0));
 		printf(_("  Sizes in headers:   %s\n"),
 				totals.all_have_sizes ? _("Yes") : _("No"));
+		printf(_("  Minimum XZ Utils version: %s\n"),
+				xz_ver_to_str(totals.min_version));
 	}
 
 	return;
@@ -1050,9 +1100,10 @@ print_totals_robot(void)
 			totals.files);
 
 	if (message_verbosity_get() >= V_DEBUG)
-		printf("\t%" PRIu64 "\t%s",
+		printf("\t%" PRIu64 "\t%s\t%" PRIu32,
 				totals.memusage_max,
-				totals.all_have_sizes ? "yes" : "no");
+				totals.all_have_sizes ? "yes" : "no",
+				totals.min_version);
 
 	putchar('\n');
 
diff --git a/src/xz/main.c b/src/xz/main.c
index 8196c6e7e774..a8f0683a47bd 100644
--- a/src/xz/main.c
+++ b/src/xz/main.c
@@ -275,6 +275,11 @@ main(int argc, char **argv)
 		list_totals();
 	}
 
+#ifndef NDEBUG
+	coder_free();
+	args_free();
+#endif
+
 	// If we have got a signal, raise it to kill the program instead
 	// of calling tuklib_exit().
 	signals_exit();
diff --git a/src/xz/message.c b/src/xz/message.c
index 0a7a522f7afa..8a31b00ed89c 100644
--- a/src/xz/message.c
+++ b/src/xz/message.c
@@ -12,10 +12,6 @@
 
 #include "private.h"
 
-#ifdef HAVE_SYS_TIME_H
-#	include <sys/time.h>
-#endif
-
 #include <stdarg.h>
 
 
@@ -64,9 +60,6 @@ static lzma_stream *progress_strm;
 /// and estimate remaining time.
 static uint64_t expected_in_size;
 
-/// Time when we started processing the file
-static uint64_t start_time;
-
 
 // Use alarm() and SIGALRM when they are supported. This has two minor
 // advantages over the alternative of polling gettimeofday():
@@ -112,16 +105,6 @@ static uint64_t progress_next_update;
 #endif
 
 
-/// Get the current time as microseconds since epoch
-static uint64_t
-my_time(void)
-{
-	struct timeval tv;
-	gettimeofday(&tv, NULL);
-	return (uint64_t)(tv.tv_sec) * UINT64_C(1000000) + tv.tv_usec;
-}
-
-
 extern void
 message_init(void)
 {
@@ -264,11 +247,10 @@ message_progress_start(lzma_stream *strm, uint64_t in_size)
 	// It is needed to find out the position in the stream.
 	progress_strm = strm;
 
-	// Store the processing start time of the file and its expected size.
-	// If we aren't printing any statistics, then these are unused. But
-	// since it is possible that the user sends us a signal to show
-	// statistics, we need to have these available anyway.
-	start_time = my_time();
+	// Store the expected size of the file. If we aren't printing any
+	// statistics, then is will be unused. But since it is possible
+	// that the user sends us a signal to show statistics, we need
+	// to have it available anyway.
 	expected_in_size = in_size;
 
 	// Indicate that progress info may need to be printed before
@@ -290,7 +272,7 @@ message_progress_start(lzma_stream *strm, uint64_t in_size)
 		alarm(1);
 #else
 		progress_needs_updating = true;
-		progress_next_update = 1000000;
+		progress_next_update = 1000;
 #endif
 	}
 
@@ -364,7 +346,7 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
 {
 	// Don't print the speed immediately, since the early values look
 	// somewhat random.
-	if (elapsed < 3000000)
+	if (elapsed < 3000)
 		return "";
 
 	static const char unit[][8] = {
@@ -377,7 +359,7 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
 
 	// Calculate the speed as KiB/s.
 	double speed = (double)(uncompressed_pos)
-			/ ((double)(elapsed) * (1024.0 / 1e6));
+			/ ((double)(elapsed) * (1024.0 / 1000.0));
 
 	// Adjust the unit of the speed if needed.
 	while (speed > 999.0) {
@@ -402,12 +384,12 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
 /// Make a string indicating elapsed or remaining time. The format is either
 /// M:SS or H:MM:SS depending on if the time is an hour or more.
 static const char *
-progress_time(uint64_t useconds)
+progress_time(uint64_t mseconds)
 {
 	// 9999 hours = 416 days
 	static char buf[sizeof("9999:59:59")];
 
-	uint32_t seconds = useconds / 1000000;
+	uint32_t seconds = mseconds / 1000;
 
 	// Don't show anything if the time is zero or ridiculously big.
 	if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59)
@@ -445,14 +427,14 @@ progress_remaining(uint64_t in_pos, uint64_t elapsed)
 	//  - Only a few seconds has passed since we started (de)compressing,
 	//    so estimate would be too inaccurate.
 	if (expected_in_size == 0 || in_pos > expected_in_size
-			|| in_pos < (UINT64_C(1) << 19) || elapsed < 8000000)
+			|| in_pos < (UINT64_C(1) << 19) || elapsed < 8000)
 		return "";
 
 	// Calculate the estimate. Don't give an estimate of zero seconds,
 	// since it is possible that all the input has been already passed
 	// to the library, but there is still quite a bit of output pending.
 	uint32_t remaining = (double)(expected_in_size - in_pos)
-			* ((double)(elapsed) / 1e6) / (double)(in_pos);
+			* ((double)(elapsed) / 1000.0) / (double)(in_pos);
 	if (remaining < 1)
 		remaining = 1;
 
@@ -518,28 +500,26 @@ progress_remaining(uint64_t in_pos, uint64_t elapsed)
 }
 
 
-/// Calculate the elapsed time as microseconds.
-static uint64_t
-progress_elapsed(void)
-{
-	return my_time() - start_time;
-}
-
-
-/// Get information about position in the stream. This is currently simple,
-/// but it will become more complicated once we have multithreading support.
+/// Get how much uncompressed and compressed data has been processed.
 static void
 progress_pos(uint64_t *in_pos,
 		uint64_t *compressed_pos, uint64_t *uncompressed_pos)
 {
-	*in_pos = progress_strm->total_in;
+	uint64_t out_pos;
+	lzma_get_progress(progress_strm, in_pos, &out_pos);
+
+	// It cannot have processed more input than it has been given.
+	assert(*in_pos <= progress_strm->total_in);
+
+	// It cannot have produced more output than it claims to have ready.
+	assert(out_pos >= progress_strm->total_out);
 
 	if (opt_mode == MODE_COMPRESS) {
-		*compressed_pos = progress_strm->total_out;
-		*uncompressed_pos = progress_strm->total_in;
+		*compressed_pos = out_pos;
+		*uncompressed_pos = *in_pos;
 	} else {
-		*compressed_pos = progress_strm->total_in;
-		*uncompressed_pos = progress_strm->total_out;
+		*compressed_pos = *in_pos;
+		*uncompressed_pos = out_pos;
 	}
 
 	return;
@@ -553,13 +533,13 @@ message_progress_update(void)
 		return;
 
 	// Calculate how long we have been processing this file.
-	const uint64_t elapsed = progress_elapsed();
+	const uint64_t elapsed = mytime_get_elapsed();
 
 #ifndef SIGALRM
 	if (progress_next_update > elapsed)
 		return;
 
-	progress_next_update = elapsed + 1000000;
+	progress_next_update = elapsed + 1000;
 #endif
 
 	// Get our current position in the stream.
@@ -652,7 +632,7 @@ progress_flush(bool finished)
 
 	progress_active = false;
 
-	const uint64_t elapsed = progress_elapsed();
+	const uint64_t elapsed = mytime_get_elapsed();
 
 	signals_block();
 
@@ -1122,7 +1102,10 @@ message_help(bool long_help)
 "  -f, --force         force overwrite of output file and (de)compress links\n"
 "  -c, --stdout        write to standard output and don't delete input files"));
 
-	if (long_help)
+	if (long_help) {
+		puts(_(
+"      --single-stream decompress only the first stream, and silently\n"
+"                      ignore possible remaining input data"));
 		puts(_(
 "      --no-sparse     do not create sparse files when decompressing\n"
 "  -S, --suffix=.SUF   use the suffix `.SUF' on compressed files\n"
@@ -1130,6 +1113,7 @@ message_help(bool long_help)
 "                      omitted, filenames are read from the standard input;\n"
 "                      filenames must be terminated with the newline character\n"
 "      --files0[=FILE] like --files but use the null character as terminator"));
+	}
 
 	if (long_help) {
 		puts(_("\n Basic file format and compression options:\n"));
@@ -1138,6 +1122,8 @@ message_help(bool long_help)
 "                      `auto' (default), `xz', `lzma', and `raw'\n"
 "  -C, --check=CHECK   integrity check type: `none' (use with caution),\n"
 "                      `crc32', `crc64' (default), or `sha256'"));
+		puts(_(
+"      --ignore-check  don't verify the integrity check when decompressing"));
 	}
 
 	puts(_(
@@ -1148,7 +1134,25 @@ message_help(bool long_help)
 "  -e, --extreme       try to improve compression ratio by using more CPU time;\n"
 "                      does not affect decompressor memory requirements"));
 
+	puts(_(
+"  -T, --threads=NUM   use at most NUM threads; the default is 1; set to 0\n"
+"                      to use as many threads as there are processor cores"));
+
 	if (long_help) {
+		puts(_(
+"      --block-size=SIZE\n"
+"                      start a new .xz block after every SIZE bytes of input;\n"
+"                      use this to set the block size for threaded compression"));
+		puts(_(
+"      --block-list=SIZES\n"
+"                      start a new .xz block after the given comma-separated\n"
+"                      intervals of uncompressed data"));
+		puts(_(
+"      --flush-timeout=TIMEOUT\n"
+"                      when compressing, if more than TIMEOUT milliseconds has\n"
+"                      passed since the previous flush and reading more input\n"
+"                      would block, all pending data is flushed out"
+		));
 		puts(_( // xgettext:no-c-format
 "      --memlimit-compress=LIMIT\n"
 "      --memlimit-decompress=LIMIT\n"
@@ -1244,5 +1248,10 @@ message_help(bool long_help)
 			PACKAGE_BUGREPORT);
 	printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
 
+#if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE
+	puts(_(
+"THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE."));
+#endif
+
 	tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT);
 }
diff --git a/src/xz/mytime.c b/src/xz/mytime.c
new file mode 100644
index 000000000000..4be184fd19da
--- /dev/null
+++ b/src/xz/mytime.c
@@ -0,0 +1,89 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       mytime.c
+/// \brief      Time handling functions
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+#if !(defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC)
+#	include <sys/time.h>
+#endif
+
+uint64_t opt_flush_timeout = 0;
+bool flush_needed;
+
+static uint64_t start_time;
+static uint64_t next_flush;
+
+
+/// \brief      Get the current time as milliseconds
+///
+/// It's relative to some point but not necessarily to the UNIX Epoch.
+static uint64_t
+mytime_now(void)
+{
+	// NOTE: HAVE_DECL_CLOCK_MONOTONIC is always defined to 0 or 1.
+#if defined(HAVE_CLOCK_GETTIME) && HAVE_DECL_CLOCK_MONOTONIC
+	// If CLOCK_MONOTONIC was available at compile time but for some
+	// reason isn't at runtime, fallback to CLOCK_REALTIME which
+	// according to POSIX is mandatory for all implementations.
+	static clockid_t clk_id = CLOCK_MONOTONIC;
+	struct timespec tv;
+	while (clock_gettime(clk_id, &tv))
+		clk_id = CLOCK_REALTIME;
+
+	return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_nsec / 1000000;
+#else
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	return (uint64_t)(tv.tv_sec) * UINT64_C(1000) + tv.tv_usec / 1000;
+#endif
+}
+
+
+extern void
+mytime_set_start_time(void)
+{
+	start_time = mytime_now();
+	next_flush = start_time + opt_flush_timeout;
+	flush_needed = false;
+	return;
+}
+
+
+extern uint64_t
+mytime_get_elapsed(void)
+{
+	return mytime_now() - start_time;
+}
+
+
+extern void
+mytime_set_flush_time(void)
+{
+	next_flush = mytime_now() + opt_flush_timeout;
+	flush_needed = false;
+	return;
+}
+
+
+extern int
+mytime_get_flush_timeout(void)
+{
+	if (opt_flush_timeout == 0 || opt_mode != MODE_COMPRESS)
+		return -1;
+
+	const uint64_t now = mytime_now();
+	if (now >= next_flush)
+		return 0;
+
+	const uint64_t remaining = next_flush - now;
+	return remaining > INT_MAX ? INT_MAX : (int)remaining;
+}
diff --git a/src/xz/mytime.h b/src/xz/mytime.h
new file mode 100644
index 000000000000..ea291eed81c7
--- /dev/null
+++ b/src/xz/mytime.h
@@ -0,0 +1,47 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       mytime.h
+/// \brief      Time handling functions
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+/// \brief      Number of milliseconds to between LZMA_SYNC_FLUSHes
+///
+/// If 0, timed flushing is disabled. Otherwise if no more input is available
+/// and not at the end of the file and at least opt_flush_timeout milliseconds
+/// has elapsed since the start of compression or the previous flushing
+/// (LZMA_SYNC_FLUSH or LZMA_FULL_FLUSH), set LZMA_SYNC_FLUSH to flush
+/// the pending data.
+extern uint64_t opt_flush_timeout;
+
+
+/// \brief      True when flushing is needed due to expired timeout
+extern bool flush_needed;
+
+
+/// \brief      Store the time when (de)compression was started
+///
+/// The start time is also stored as the time of the first flush.
+extern void mytime_set_start_time(void);
+
+
+/// \brief      Get the number of milliseconds since the operation started
+extern uint64_t mytime_get_elapsed(void);
+
+
+/// \brief      Store the time of when compressor was flushed
+extern void mytime_set_flush_time(void);
+
+
+/// \brief      Get the number of milliseconds until the next flush
+///
+/// This returns -1 if no timed flushing is used.
+///
+/// The return value is inteded for use with poll().
+extern int mytime_get_flush_timeout(void);
diff --git a/src/xz/options.c b/src/xz/options.c
index f21a0ba51065..f9c7ab9e8535 100644
--- a/src/xz/options.c
+++ b/src/xz/options.c
@@ -31,8 +31,8 @@ typedef struct {
 } option_map;
 
 
-/// Parses option=value pairs that are separated with colons, semicolons,
-/// or commas: opt=val:opt=val;opt=val,opt=val
+/// Parses option=value pairs that are separated with commas:
+/// opt=val,opt=val,opt=val
 ///
 /// Each option is a string, that is converted to an integer using the
 /// index where the option string is in the array.
diff --git a/src/xz/private.h b/src/xz/private.h
index 6b01e51354e1..4acfa8dc4558 100644
--- a/src/xz/private.h
+++ b/src/xz/private.h
@@ -12,6 +12,7 @@
 
 #include "sysdefs.h"
 #include "mythread.h"
+
 #include "lzma.h"
 
 #include <sys/types.h>
@@ -45,6 +46,7 @@
 #endif
 
 #include "main.h"
+#include "mytime.h"
 #include "coder.h"
 #include "message.h"
 #include "args.h"
diff --git a/src/xz/signals.c b/src/xz/signals.c
index 322811f472b5..5387c424e1a5 100644
--- a/src/xz/signals.c
+++ b/src/xz/signals.c
@@ -41,6 +41,11 @@ signal_handler(int sig)
 {
 	exit_signal = sig;
 	user_abort = true;
+
+#ifndef TUKLIB_DOSLIKE
+	io_write_to_user_abort_pipe();
+#endif
+
 	return;
 }
 
diff --git a/src/xz/suffix.c b/src/xz/suffix.c
index 8e331a7022a3..9d4fcd139b8f 100644
--- a/src/xz/suffix.c
+++ b/src/xz/suffix.c
@@ -12,6 +12,10 @@
 
 #include "private.h"
 
+#ifdef __DJGPP__
+#	include <fcntl.h>
+#endif
+
 // For case-insensitive filename suffix on case-insensitive systems
 #if defined(TUKLIB_DOSLIKE) || defined(__VMS)
 #	define strcmp strcasecmp
@@ -45,6 +49,31 @@ has_dir_sep(const char *str)
 }
 
 
+#ifdef __DJGPP__
+/// \brief      Test for special suffix used for 8.3 short filenames (SFN)
+///
+/// \return     If str matches *.?- or *.??-, true is returned. Otherwise
+///             false is returned.
+static bool
+has_sfn_suffix(const char *str, size_t len)
+{
+	if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
+			&& !is_dir_sep(str[len - 2])) {
+		// *.?-
+		if (str[len - 3] == '.')
+			return !is_dir_sep(str[len - 4]);
+
+		// *.??-
+		if (len >= 5 && !is_dir_sep(str[len - 3])
+				&& str[len - 4] == '.')
+			return !is_dir_sep(str[len - 5]);
+	}
+
+	return false;
+}
+#endif
+
+
 /// \brief      Checks if src_name has given compressed_suffix
 ///
 /// \param      suffix      Filename suffix to look for
@@ -87,6 +116,9 @@ uncompressed_name(const char *src_name, const size_t src_len)
 		{ ".xz",    "" },
 		{ ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
 		{ ".lzma",  "" },
+#ifdef __DJGPP__
+		{ ".lzm",   "" },
+#endif
 		{ ".tlz",   ".tar" },
 		// { ".gz",    "" },
 		// { ".tgz",   ".tar" },
@@ -112,6 +144,17 @@ uncompressed_name(const char *src_name, const size_t src_len)
 				break;
 			}
 		}
+
+#ifdef __DJGPP__
+		// Support also *.?- -> *.? and *.??- -> *.?? on DOS.
+		// This is done also when long filenames are available
+		// to keep it easy to decompress files created when
+		// long filename support wasn't available.
+		if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
+			new_suffix = "";
+			new_len = src_len - 1;
+		}
+#endif
 	}
 
 	if (new_len == 0 && custom_suffix != NULL)
@@ -134,21 +177,35 @@ uncompressed_name(const char *src_name, const size_t src_len)
 }
 
 
+/// This message is needed in multiple places in compressed_name(),
+/// so the message has been put into its own function.
+static void
+msg_suffix(const char *src_name, const char *suffix)
+{
+	message_warning(_("%s: File already has `%s' suffix, skipping"),
+			src_name, suffix);
+	return;
+}
+
+
 /// \brief      Appends suffix to src_name
 ///
 /// In contrast to uncompressed_name(), we check only suffixes that are valid
 /// for the specified file format.
 static char *
-compressed_name(const char *src_name, const size_t src_len)
+compressed_name(const char *src_name, size_t src_len)
 {
 	// The order of these must match the order in args.h.
-	static const char *const all_suffixes[][3] = {
+	static const char *const all_suffixes[][4] = {
 		{
 			".xz",
 			".txz",
 			NULL
 		}, {
 			".lzma",
+#ifdef __DJGPP__
+			".lzm",
+#endif
 			".tlz",
 			NULL
 /*
@@ -170,20 +227,27 @@ compressed_name(const char *src_name, const size_t src_len)
 	const size_t format = opt_format - 1;
 	const char *const *suffixes = all_suffixes[format];
 
+	// Look for known filename suffixes and refuse to compress them.
 	for (size_t i = 0; suffixes[i] != NULL; ++i) {
 		if (test_suffix(suffixes[i], src_name, src_len) != 0) {
-			message_warning(_("%s: File already has `%s' "
-					"suffix, skipping"), src_name,
-					suffixes[i]);
+			msg_suffix(src_name, suffixes[i]);
 			return NULL;
 		}
 	}
 
+#ifdef __DJGPP__
+	// Recognize also the special suffix that is used when long
+	// filename (LFN) support isn't available. This suffix is
+	// recognized on LFN systems too.
+	if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
+		msg_suffix(src_name, "-");
+		return NULL;
+	}
+#endif
+
 	if (custom_suffix != NULL) {
 		if (test_suffix(custom_suffix, src_name, src_len) != 0) {
-			message_warning(_("%s: File already has `%s' "
-					"suffix, skipping"), src_name,
-					custom_suffix);
+			msg_suffix(src_name, custom_suffix);
 			return NULL;
 		}
 	}
@@ -199,7 +263,101 @@ compressed_name(const char *src_name, const size_t src_len)
 
 	const char *suffix = custom_suffix != NULL
 			? custom_suffix : suffixes[0];
-	const size_t suffix_len = strlen(suffix);
+	size_t suffix_len = strlen(suffix);
+
+#ifdef __DJGPP__
+	if (!_use_lfn(src_name)) {
+		// Long filename (LFN) support isn't available and we are
+		// limited to 8.3 short filenames (SFN).
+		//
+		// Look for suffix separator from the filename, and make sure
+		// that it is in the filename, not in a directory name.
+		const char *sufsep = strrchr(src_name, '.');
+		if (sufsep == NULL || sufsep[1] == '\0'
+				|| has_dir_sep(sufsep)) {
+			// src_name has no filename extension.
+			//
+			// Examples:
+			// xz foo         -> foo.xz
+			// xz -F lzma foo -> foo.lzm
+			// xz -S x foo    -> foox
+			// xz -S x foo.   -> foo.x
+			// xz -S x.y foo  -> foox.y
+			// xz -S .x foo   -> foo.x
+			// xz -S .x foo.  -> foo.x
+			//
+			// Avoid double dots:
+			if (sufsep != NULL && sufsep[1] == '\0'
+					&& suffix[0] == '.')
+				--src_len;
+
+		} else if (custom_suffix == NULL
+				&& strcasecmp(sufsep, ".tar") == 0) {
+			// ".tar" is handled specially.
+			//
+			// Examples:
+			// xz foo.tar          -> foo.txz
+			// xz -F lzma foo.tar  -> foo.tlz
+			static const char *const tar_suffixes[] = {
+				".txz",
+				".tlz",
+				// ".tgz",
+			};
+			suffix = tar_suffixes[format];
+			suffix_len = 4;
+			src_len -= 4;
+
+		} else {
+			if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
+				// Instead of the .xz suffix, use a single
+				// character at the end of the filename
+				// extension. This is to minimize name
+				// conflicts when compressing multiple files
+				// with the same basename. E.g. foo.txt and
+				// foo.exe become foo.tx- and foo.ex-. Dash
+				// is rare as the last character of the
+				// filename extension, so it seems to be
+				// quite safe choice and it stands out better
+				// in directory listings than e.g. x. For
+				// comparison, gzip uses z.
+				suffix = "-";
+				suffix_len = 1;
+			}
+
+			if (suffix[0] == '.') {
+				// The first character of the suffix is a dot.
+				// Throw away the original filename extension
+				// and replace it with the new suffix.
+				//
+				// Examples:
+				// xz -F lzma foo.txt  -> foo.lzm
+				// xz -S .x  foo.txt   -> foo.x
+				src_len = sufsep - src_name;
+
+			} else {
+				// The first character of the suffix is not
+				// a dot. Preserve the first 0-2 characters
+				// of the original filename extension.
+				//
+				// Examples:
+				// xz foo.txt         -> foo.tx-
+				// xz -S x  foo.c     -> foo.cx
+				// xz -S ab foo.c     -> foo.cab
+				// xz -S ab foo.txt   -> foo.tab
+				// xz -S abc foo.txt  -> foo.abc
+				//
+				// Truncate the suffix to three chars:
+				if (suffix_len > 3)
+					suffix_len = 3;
+
+				// If needed, overwrite 1-3 characters.
+				if (strlen(sufsep) > 4 - suffix_len)
+					src_len = sufsep - src_name
+							+ 4 - suffix_len;
+			}
+		}
+	}
+#endif
 
 	char *dest_name = xmalloc(src_len + suffix_len + 1);
 
diff --git a/src/xz/xz.1 b/src/xz/xz.1
index 363b90cb4e55..75aead3d0439 100644
--- a/src/xz/xz.1
+++ b/src/xz/xz.1
@@ -5,7 +5,7 @@
 .\" This file has been put into the public domain.
 .\" You can do whatever you want with this file.
 .\"
-.TH XZ 1 "2013-06-21" "Tukaani" "XZ Utils"
+.TH XZ 1 "2014-12-16" "Tukaani" "XZ Utils"
 .
 .SH NAME
 xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
@@ -435,6 +435,29 @@ standard output instead of a file.
 This implies
 .BR \-\-keep .
 .TP
+.B \-\-single\-stream
+Decompress only the first
+.B .xz
+stream, and
+silently ignore possible remaining input data following the stream.
+Normally such trailing garbage makes
+.B xz
+display an error.
+.IP ""
+.B xz
+never decompresses more than one stream from
+.B .lzma
+files or raw streams, but this option still makes
+.B xz
+ignore the possible trailing data after the
+.B .lzma
+file or raw stream.
+.IP ""
+This option has no effect if the operation mode is not
+.B \-\-decompress
+or
+.BR \-\-test .
+.TP
 .B \-\-no\-sparse
 Disable creation of sparse files.
 By default, if decompressing into a regular file,
@@ -586,6 +609,25 @@ Integrity of the
 headers is always verified with CRC32.
 It is not possible to change or disable it.
 .TP
+.B \-\-ignore\-check
+Don't verify the integrity check of the compressed data when decompressing.
+The CRC32 values in the
+.B .xz
+headers will still be verified normally.
+.IP ""
+.B "Do not use this option unless you know what you are doing."
+Possible reasons to use this option:
+.RS
+.IP \(bu 3
+Trying to recover data from a corrupt .xz file.
+.IP \(bu 3
+Speeding up decompression.
+This matters mostly with SHA-256 or
+with files that have compressed extremely well.
+It's recommended to not use this option for this purpose
+unless the file integrity is verified externally in some other way.
+.RE
+.TP
 .BR \-0 " ... " \-9
 Select a compression preset level.
 The default is
@@ -778,6 +820,124 @@ These are provided only for backwards compatibility
 with LZMA Utils.
 Avoid using these options.
 .TP
+.BI \-\-block\-size= size
+When compressing to the
+.B .xz
+format, split the input data into blocks of
+.I size
+bytes.
+The blocks are compressed independently from each other,
+which helps with multi-threading and
+makes limited random-access decompression possible.
+This option is typically used to override the default
+block size in multi-threaded mode,
+but this option can be used in single-threaded mode too.
+.IP ""
+In multi-threaded mode about three times
+.I size
+bytes will be allocated in each thread for buffering input and output.
+The default
+.I size
+is three times the LZMA2 dictionary size or 1 MiB,
+whichever is more.
+Typically a good value is 2\-4 times
+the size of the LZMA2 dictionary or at least 1 MiB.
+Using
+.I size
+less than the LZMA2 dictionary size is waste of RAM
+because then the LZMA2 dictionary buffer will never get fully used.
+The sizes of the blocks are stored in the block headers,
+which a future version of
+.B xz
+will use for multi-threaded decompression.
+.IP ""
+In single-threaded mode no block splitting is done by default.
+Setting this option doesn't affect memory usage.
+No size information is stored in block headers,
+thus files created in single-threaded mode
+won't be identical to files created in multi-threaded mode.
+The lack of size information also means that a future version of
+.B xz
+won't be able decompress the files in multi-threaded mode.
+.TP
+.BI \-\-block\-list= sizes
+When compressing to the
+.B .xz
+format, start a new block after
+the given intervals of uncompressed data.
+.IP ""
+The uncompressed
+.I sizes
+of the blocks are specified as a comma-separated list.
+Omitting a size (two or more consecutive commas) is a shorthand
+to use the size of the previous block.
+.IP ""
+If the input file is bigger than the sum of
+.IR sizes ,
+the last value in
+.I sizes
+is repeated until the end of the file.
+A special value of
+.B 0
+may be used as the last value to indicate that
+the rest of the file should be encoded as a single block.
+.IP ""
+If one specifies
+.I sizes
+that exceed the encoder's block size
+(either the default value in threaded mode or
+the value specified with \fB\-\-block\-size=\fIsize\fR),
+the encoder will create additional blocks while
+keeping the boundaries specified in
+.IR sizes .
+For example, if one specifies
+.B \-\-block\-size=10MiB
+.B \-\-block\-list=5MiB,10MiB,8MiB,12MiB,24MiB
+and the input file is 80 MiB,
+one will get 11 blocks:
+5, 10, 8, 10, 2, 10, 10, 4, 10, 10, and 1 MiB.
+.IP ""
+In multi-threaded mode the sizes of the blocks
+are stored in the block headers.
+This isn't done in single-threaded mode,
+so the encoded output won't be
+identical to that of the multi-threaded mode.
+.TP
+.BI \-\-flush\-timeout= timeout
+When compressing, if more than
+.I timeout
+milliseconds (a positive integer) has passed since the previous flush and
+reading more input would block,
+all the pending input data is flushed from the encoder and
+made available in the output stream.
+This can be useful if
+.B xz
+is used to compress data that is streamed over a network.
+Small
+.I timeout
+values make the data available at the receiving end
+with a small delay, but large
+.I timeout
+values give better compression ratio.
+.IP ""
+This feature is disabled by default.
+If this option is specified more than once, the last one takes effect.
+The special
+.I timeout
+value of
+.B 0
+can be used to explicitly disable this feature.
+.IP ""
+This feature is not available on non-POSIX systems.
+.IP ""
+.\" FIXME
+.B "This feature is still experimental."
+Currently
+.B xz
+is unsuitable for decompressing the stream in real time due to how
+.B xz
+does buffering.
+.TP
 .BI \-\-memlimit\-compress= limit
 Set a memory usage limit for compression.
 If this option is specified multiple times,
@@ -876,24 +1036,25 @@ Automatic adjusting is always disabled when creating raw streams
 .TP
 \fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads
 Specify the number of worker threads to use.
+Setting
+.I threads
+to a special value
+.B 0
+makes
+.B xz
+use as many threads as there are CPU cores on the system.
 The actual number of threads can be less than
 .I threads
+if the input file is not big enough
+for threading with the given settings or
 if using more threads would exceed the memory usage limit.
 .IP ""
-.B "Multithreaded compression and decompression are not"
-.B "implemented yet, so this option has no effect for now."
-.IP ""
-.B "As of writing (2010-09-27), it hasn't been decided"
-.B "if threads will be used by default on multicore systems"
-.B "once support for threading has been implemented."
-.B "Comments are welcome."
-The complicating factor is that using many threads
-will increase the memory usage dramatically.
-Note that if multithreading will be the default,
-it will probably be done so that single-threaded and
-multithreaded modes produce the same output,
-so compression ratio won't be significantly affected
-if threading will be enabled by default.
+Currently the only threading method is to split the input into
+blocks and compress them independently from each other.
+The default block size depends on the compression level and
+can be overriden with the
+.BI \-\-block\-size= size
+option.
 .
 .SS "Custom compressor filter chains"
 A custom filter chain allows specifying
@@ -1863,6 +2024,14 @@ or
 .B no
 indicating if all block headers have both compressed size and
 uncompressed size stored in them
+.PP
+.I Since
+.B xz
+.I 5.1.2alpha:
+.IP 4. 4
+Minimum
+.B xz
+version required to decompress the file
 .RE
 .PD
 .PP
@@ -1913,6 +2082,14 @@ or
 .B no
 indicating if all block headers have both compressed size and
 uncompressed size stored in them
+.PP
+.I Since
+.B xz
+.I 5.1.2alpha:
+.IP 12. 4
+Minimum
+.B xz
+version required to decompress the file
 .RE
 .PD
 .PP
@@ -2173,7 +2350,9 @@ If there is data left after the first
 .B .lzma
 stream,
 .B xz
-considers the file to be corrupt.
+considers the file to be corrupt unless
+.B \-\-single\-stream
+was used.
 This may break obscure scripts which have
 assumed that trailing garbage is ignored.
 .