18 files changed, 2444 insertions, 975 deletions
diff --git a/programs/Makefile b/programs/Makefile
index be666b4ef0547..32dbc67eff5c5 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -27,9 +27,11 @@ LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
 LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
 LIBVER  := $(shell echo $(LIBVER_SCRIPT))
 
-ZSTD_VERSION=$(LIBVER)
+ZSTD_VERSION = $(LIBVER)
 
-ifeq ($(shell $(CC) -v 2>&1 | grep -c "gcc version "), 1)
+GREP = grep --color=never
+
+ifeq ($(shell $(CC) -v 2>&1 | $(GREP) -c "gcc version "), 1)
 ALIGN_LOOP = -falign-loops=32
 else
 ALIGN_LOOP =
@@ -38,12 +40,15 @@ endif
 CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
            -I$(ZSTDDIR)/dictBuilder \
            -DXXH_NAMESPACE=ZSTD_
+ifeq ($(OS),Windows_NT)   # MinGW assumed
+CPPFLAGS   += -D__USE_MINGW_ANSI_STDIO   # compatibility with %zu formatting
+endif
 CFLAGS  ?= -O3
 DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
             -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
             -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
             -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
-            -Wredundant-decls
+            -Wredundant-decls -Wmissing-prototypes
 CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
 FLAGS    = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
 
@@ -55,11 +60,11 @@ ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
 ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c
 ZSTDDECOMP_O = $(ZSTDDIR)/decompress/zstd_decompress.o
 
-ZSTD_LEGACY_SUPPORT ?= 4
+ZSTD_LEGACY_SUPPORT ?= 5
 ZSTDLEGACY_FILES :=
 ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
 ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
-	ZSTDLEGACY_FILES += $(shell ls $(ZSTDDIR)/legacy/*.c | grep 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
+	ZSTDLEGACY_FILES += $(shell ls $(ZSTDDIR)/legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
 endif
 	CPPFLAGS += -I$(ZSTDDIR)/legacy
 else
@@ -129,6 +134,18 @@ else
 LZ4_MSG := $(NO_LZ4_MSG)
 endif
 
+# explicit backtrace enable/disable for Linux & Darwin
+ifeq ($(BACKTRACE), 0)
+DEBUGFLAGS += -DBACKTRACE_ENABLE=0
+endif
+ifeq (,$(filter Windows%, $(OS)))
+ifeq ($(BACKTRACE), 1)
+DEBUGFLAGS += -DBACKTRACE_ENABLE=1
+DEBUGFLAGS_LD += -rdynamic
+endif
+endif
+
+
 .PHONY: default
 default: zstd-release
 
@@ -141,7 +158,7 @@ allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy
 $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
 
 zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
-zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD)
+zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
 zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
 zstd : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o
 	@echo "$(THREAD_MSG)"
@@ -154,11 +171,12 @@ endif
 	$(CC) $(FLAGS) $^ $(RES_FILE) -o $@$(EXT) $(LDFLAGS)
 
 .PHONY: zstd-release
-zstd-release: DEBUGFLAGS :=
+zstd-release: DEBUGFLAGS := -DBACKTRACE_ENABLE=0
+zstd-release: DEBUGFLAGS_LD :=
 zstd-release: zstd
 
 zstd32 : CPPFLAGS += $(THREAD_CPP)
-zstd32 : LDFLAGS += $(THREAD_LD) 
+zstd32 : LDFLAGS  += $(THREAD_LD)
 zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
 zstd32 : $(ZSTDLIB_FILES) zstdcli.c fileio.c bench.c datagen.c dibio.c
 ifneq (,$(filter Windows%,$(OS)))
@@ -170,17 +188,17 @@ zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o fileio.c bench.o datagen.
 	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
 
 zstd-nomt : THREAD_CPP :=
-zstd-nomt : THREAD_LD :=
+zstd-nomt : THREAD_LD  :=
 zstd-nomt : THREAD_MSG := - multi-threading disabled
 zstd-nomt : zstd
 
 zstd-nogz : ZLIBCPP :=
-zstd-nogz : ZLIBLD :=
+zstd-nogz : ZLIBLD  :=
 zstd-nogz : ZLIB_MSG := - gzip support is disabled
 zstd-nogz : zstd
 
 zstd-noxz : LZMACPP :=
-zstd-noxz : LZMALD :=
+zstd-noxz : LZMALD  :=
 zstd-noxz : LZMA_MSG := - xz/lzma support is disabled
 zstd-noxz : zstd
 
@@ -231,25 +249,53 @@ MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="zstd
 zstd.1: zstd.1.md ../lib/zstd.h
 	cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
 
+zstdgrep.1: zstdgrep.1.md ../lib/zstd.h
+	cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
+
+zstdless.1: zstdless.1.md ../lib/zstd.h
+	cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
+
 .PHONY: man
-man: zstd.1
+man: zstd.1 zstdgrep.1 zstdless.1
 
 .PHONY: clean-man
 clean-man:
 	rm zstd.1
+	rm zstdgrep.1
+	rm zstdless.1
 
 .PHONY: preview-man
 preview-man: clean-man man
 	man ./zstd.1
+	man ./zstdgrep.1
+	man ./zstdless.1
 
 #-----------------------------------------------------------------------------
-# make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
+# make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
 
+EGREP = egrep --color=never
+
+# Print a two column output of targets and their description. To add a target description, put a
+# comment in the Makefile with the format "## <TARGET>: <DESCRIPTION>".  For example:
+#
+## list: Print all targets and their descriptions (if provided)
 .PHONY: list
 list:
-	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
+	@TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \
+		| awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' \
+		| $(EGREP) -v  -e '^[^[:alnum:]]' | sort); \
+	{ \
+	    printf "Target Name\tDescription\n"; \
+	    printf "%0.s-" {1..16}; printf "\t"; printf "%0.s-" {1..40}; printf "\n"; \
+	    for target in $$TARGETS; do \
+	        line=$$($(EGREP) "^##[[:space:]]+$$target:" $(lastword $(MAKEFILE_LIST))); \
+	        description=$$(echo $$line | awk '{i=index($$0,":"); print substr($$0,i+1)}' | xargs); \
+	        printf "$$target\t$$description\n"; \
+	    done \
+	} | column -t -s $$'\t'
+
 
 DESTDIR     ?=
 # directory variables : GNU conventions prefer lowercase
@@ -296,6 +342,8 @@ install: zstd
 	@$(INSTALL_MAN) zstd.1 $(DESTDIR)$(MAN1DIR)/zstd.1
 	@ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/zstdcat.1
 	@ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/unzstd.1
+	@$(INSTALL_MAN) zstdgrep.1 $(DESTDIR)$(MAN1DIR)/zstdgrep.1
+	@$(INSTALL_MAN) zstdless.1 $(DESTDIR)$(MAN1DIR)/zstdless.1
 	@echo zstd installation completed
 
 .PHONY: uninstall
@@ -305,6 +353,8 @@ uninstall:
 	@$(RM) $(DESTDIR)$(BINDIR)/zstdcat
 	@$(RM) $(DESTDIR)$(BINDIR)/unzstd
 	@$(RM) $(DESTDIR)$(BINDIR)/zstd
+	@$(RM) $(DESTDIR)$(MAN1DIR)/zstdless.1
+	@$(RM) $(DESTDIR)$(MAN1DIR)/zstdgrep.1
 	@$(RM) $(DESTDIR)$(MAN1DIR)/zstdcat.1
 	@$(RM) $(DESTDIR)$(MAN1DIR)/unzstd.1
 	@$(RM) $(DESTDIR)$(MAN1DIR)/zstd.1
diff --git a/programs/README.md b/programs/README.md
index a308fccf9ea3b..ca9056eaaa437 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -61,6 +61,13 @@ There are however other Makefile targets that create different variations of CLI
   In which case, linking stage will fail if `lz4` library cannot be found.
   This is useful to prevent silent feature disabling.
 
+- __BACKTRACE__ : `zstd` can display a stack backtrace when execution
+  generates a runtime exception. By default, this feature may be
+  degraded/disabled on some platforms unless additional compiler directives are
+  applied. When triaging a runtime issue, enabling this feature can provide
+  more context to determine the location of the fault.
+  Example : `make zstd BACKTRACE=1`
+
 
 #### Aggregation of parameters
 CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.
@@ -150,7 +157,8 @@ Advanced arguments :
 
 Dictionary builder :
 --train ## : create a dictionary from a training set of files
---train-cover[=k=#,d=#,steps=#] : use the cover algorithm with optional args
+--train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args
+--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fastcover algorithm with optional args
 --train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9)
  -o file : `file` is dictionary name (default: dictionary)
 --maxdict=# : limit dictionary to specified size (default: 112640)
@@ -185,7 +193,7 @@ version is less than `128 MiB`).
 
 Compression Speed vs Ratio | Decompression Speed
 ---------------------------|---------------------
-![Compression Speed vs Ratio](../doc/images/ldmCspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](../doc/images/ldmDspeed.png "Decompression Speed")
+![Compression Speed vs Ratio](https://raw.githubusercontent.com/facebook/zstd/v1.3.3/doc/images/ldmCspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](https://raw.githubusercontent.com/facebook/zstd/v1.3.3/doc/images/ldmDspeed.png "Decompression Speed")
 
 | Method | Compression ratio | Compression speed | Decompression speed  |
 |:-------|------------------:|-------------------------:|---------------------------:|
@@ -208,10 +216,24 @@ The below table illustrates this on the [Silesia compression corpus].
 [Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
 
 | Method | Compression ratio | Compression speed | Decompression speed  |
-|:-------|------------------:|-------------------------:|---------------------------:|
-| `zstd -1`   | `2.878`   | `231.7 MB/s`  | `594.4 MB/s`  |
-| `zstd -1 --long` | `2.929` | `106.5 MB/s` | `517.9 MB/s` |
-| `zstd -5`  | `3.274`    | `77.1 MB/s`  | `464.2 MB/s`  |
-| `zstd -5 --long` | `3.319` | `51.7 MB/s` | `371.9 MB/s` |
-| `zstd -10` | `3.523`    | `16.4 MB/s`   | `489.2 MB/s`  |
-| `zstd -10 --long`| `3.566` | `16.2 MB/s` | `415.7 MB/s`  |
+|:-------|------------------:|------------------:|---------------------:|
+| `zstd -1`        | `2.878` | `231.7 MB/s`      | `594.4 MB/s`   |
+| `zstd -1 --long` | `2.929` | `106.5 MB/s`      | `517.9 MB/s`   |
+| `zstd -5`        | `3.274` | `77.1 MB/s`       | `464.2 MB/s`   |
+| `zstd -5 --long` | `3.319` | `51.7 MB/s`       | `371.9 MB/s`   |
+| `zstd -10`       | `3.523` | `16.4 MB/s`       | `489.2 MB/s`   |
+| `zstd -10 --long`| `3.566` | `16.2 MB/s`       | `415.7 MB/s`   |
+
+
+#### zstdgrep
+
+`zstdgrep` is a utility which makes it possible to `grep` directly a `.zst` compressed file.
+It's used the same way as normal `grep`, for example :
+`zstdgrep pattern file.zst`
+
+`zstdgrep` is _not_ compatible with dictionary compression.
+
+To search into a file compressed with a dictionary,
+it's necessary to decompress it using `zstd` or `zstdcat`,
+and then pipe the result to `grep`. For example  :
+`zstdcat -D dictionary -qc -- file.zst | grep pattern`
diff --git a/programs/bench.c b/programs/bench.c
index 014a4fd41b1cf..326c1c1c56e59 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -41,6 +41,8 @@
 #include "zstd.h"
 #include "datagen.h"     /* RDG_genBuffer */
 #include "xxhash.h"
+#include "bench.h"
+#include "zstd_errors.h"
 
 
 /* *************************************
@@ -61,25 +63,27 @@
 #define MB *(1 <<20)
 #define GB *(1U<<30)
 
-static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
+#define BMK_RUNTEST_DEFAULT_MS 1000
 
-static U32 g_compressibilityDefault = 50;
+static const size_t maxMemory = (sizeof(size_t)==4)  ?
+                    /* 32-bit */ (2 GB - 64 MB) :
+                    /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31));
 
 
 /* *************************************
 *  console display
 ***************************************/
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static int g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
+#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+/* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
 
 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 
-#define DISPLAYUPDATE(l, ...) { if (g_displayLevel>=l) { \
-            if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
+#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
+            if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
             { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stderr); } } }
+            if (displayLevel>=4) fflush(stderr); } } }
 
 
 /* *************************************
@@ -89,467 +93,867 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 #  define DEBUG 0
 #endif
 #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
-#define EXM_THROW(error, ...)  {                      \
+
+#define EXM_THROW_INT(errorNum, ...)  {               \
     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
-    DISPLAYLEVEL(1, "Error %i : ", error);            \
+    DISPLAYLEVEL(1, "Error %i : ", errorNum);         \
     DISPLAYLEVEL(1, __VA_ARGS__);                     \
     DISPLAYLEVEL(1, " \n");                           \
-    exit(error);                                      \
+    return errorNum;                                  \
 }
 
+#define RETURN_ERROR(errorNum, retType, ...)  {       \
+    retType r;                                        \
+    memset(&r, 0, sizeof(retType));                   \
+    DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
+    DISPLAYLEVEL(1, "Error %i : ", errorNum);         \
+    DISPLAYLEVEL(1, __VA_ARGS__);                     \
+    DISPLAYLEVEL(1, " \n");                           \
+    r.tag = errorNum;                                 \
+    return r;                                         \
+}
+
+/* error without displaying */
+#define RETURN_QUIET_ERROR(errorNum, retType, ...)  { \
+    retType r;                                        \
+    memset(&r, 0, sizeof(retType));                   \
+    DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
+    DEBUGOUTPUT("Error %i : ", errorNum);             \
+    DEBUGOUTPUT(__VA_ARGS__);                         \
+    DEBUGOUTPUT(" \n");                               \
+    r.tag = errorNum;                                 \
+    return r;                                         \
+}
 
 /* *************************************
 *  Benchmark Parameters
 ***************************************/
-static int g_additionalParam = 0;
-static U32 g_decodeOnly = 0;
 
-void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
+BMK_advancedParams_t BMK_initAdvancedParams(void) {
+    BMK_advancedParams_t const res = {
+        BMK_both, /* mode */
+        BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
+        0, /* blockSize */
+        0, /* nbWorkers */
+        0, /* realTime */
+        0, /* additionalParam */
+        0, /* ldmFlag */
+        0, /* ldmMinMatch */
+        0, /* ldmHashLog */
+        0, /* ldmBuckSizeLog */
+        0  /* ldmHashEveryLog */
+    };
+    return res;
+}
+
+
+/* ********************************************************
+*  Bench functions
+**********************************************************/
+typedef struct {
+    const void* srcPtr;
+    size_t srcSize;
+    void*  cPtr;
+    size_t cRoom;
+    size_t cSize;
+    void*  resPtr;
+    size_t resSize;
+} blockParam_t;
+
+#undef MIN
+#undef MAX
+#define MIN(a,b)    ((a) < (b) ? (a) : (b))
+#define MAX(a,b)    ((a) > (b) ? (a) : (b))
+
+static void BMK_initCCtx(ZSTD_CCtx* ctx,
+    const void* dictBuffer, size_t dictBufferSize, int cLevel,
+    const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) {
+    ZSTD_CCtx_reset(ctx);
+    ZSTD_CCtx_resetParameters(ctx);
+    if (adv->nbWorkers==1) {
+        ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, 0);
+    } else {
+        ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, adv->nbWorkers);
+    }
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, adv->ldmFlag);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, adv->ldmMinMatch);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, adv->ldmHashLog);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, adv->ldmBucketSizeLog);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, adv->ldmHashEveryLog);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_hashLog, comprParams->hashLog);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
+    ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
+    ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
+}
+
+static void BMK_initDCtx(ZSTD_DCtx* dctx,
+    const void* dictBuffer, size_t dictBufferSize) {
+    ZSTD_DCtx_reset(dctx);
+    ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize);
+}
+
 
-void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
+typedef struct {
+    ZSTD_CCtx* cctx;
+    const void* dictBuffer;
+    size_t dictBufferSize;
+    int cLevel;
+    const ZSTD_compressionParameters* comprParams;
+    const BMK_advancedParams_t* adv;
+} BMK_initCCtxArgs;
+
+static size_t local_initCCtx(void* payload) {
+    BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
+    BMK_initCCtx(ag->cctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv);
+    return 0;
+}
 
-static U32 g_nbSeconds = BMK_TIMETEST_DEFAULT_S;
-void BMK_setNbSeconds(unsigned nbSeconds)
+typedef struct {
+    ZSTD_DCtx* dctx;
+    const void* dictBuffer;
+    size_t dictBufferSize;
+} BMK_initDCtxArgs;
+
+static size_t local_initDCtx(void* payload) {
+    BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
+    BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
+    return 0;
+}
+
+
+/* `addArgs` is the context */
+static size_t local_defaultCompress(
+                    const void* srcBuffer, size_t srcSize,
+                    void* dstBuffer, size_t dstSize,
+                    void* addArgs)
 {
-    g_nbSeconds = nbSeconds;
-    DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression - \n", g_nbSeconds);
+    size_t moreToFlush = 1;
+    ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
+    ZSTD_inBuffer in;
+    ZSTD_outBuffer out;
+    in.src = srcBuffer; in.size = srcSize; in.pos = 0;
+    out.dst = dstBuffer; out.size = dstSize; out.pos = 0;
+    while (moreToFlush) {
+        if(out.pos == out.size) {
+            return (size_t)-ZSTD_error_dstSize_tooSmall;
+        }
+        moreToFlush = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
+        if (ZSTD_isError(moreToFlush)) {
+            return moreToFlush;
+        }
+    }
+    return out.pos;
 }
 
-static size_t g_blockSize = 0;
-void BMK_setBlockSize(size_t blockSize)
+/* `addArgs` is the context */
+static size_t local_defaultDecompress(
+                    const void* srcBuffer, size_t srcSize,
+                    void* dstBuffer, size_t dstCapacity,
+                    void* addArgs)
 {
-    g_blockSize = blockSize;
-    if (g_blockSize) DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
+    size_t moreToFlush = 1;
+    ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
+    ZSTD_inBuffer in;
+    ZSTD_outBuffer out;
+    in.src = srcBuffer; in.size = srcSize; in.pos = 0;
+    out.dst = dstBuffer; out.size = dstCapacity; out.pos = 0;
+    while (moreToFlush) {
+        if(out.pos == out.size) {
+            return (size_t)-ZSTD_error_dstSize_tooSmall;
+        }
+        moreToFlush = ZSTD_decompress_generic(dctx, &out, &in);
+        if (ZSTD_isError(moreToFlush)) {
+            return moreToFlush;
+        }
+    }
+    return out.pos;
+
 }
 
-void BMK_setDecodeOnlyMode(unsigned decodeFlag) { g_decodeOnly = (decodeFlag>0); }
 
-static U32 g_nbWorkers = 0;
-void BMK_setNbWorkers(unsigned nbWorkers) {
-#ifndef ZSTD_MULTITHREAD
-    if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
-#endif
-    g_nbWorkers = nbWorkers;
+/*===  Benchmarking an arbitrary function  ===*/
+
+int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
+{
+    return outcome.tag == 0;
 }
 
-static U32 g_realTime = 0;
-void BMK_setRealTime(unsigned priority) {
-    g_realTime = (priority>0);
+/* warning : this function will stop program execution if outcome is invalid !
+ *           check outcome validity first, using BMK_isValid_runResult() */
+BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
+{
+    assert(outcome.tag == 0);
+    return outcome.internal_never_use_directly;
 }
 
-static U32 g_separateFiles = 0;
-void BMK_setSeparateFiles(unsigned separate) {
-    g_separateFiles = (separate>0);
+static BMK_runOutcome_t BMK_runOutcome_error(void)
+{
+    BMK_runOutcome_t b;
+    memset(&b, 0, sizeof(b));
+    b.tag = 1;
+    return b;
+}
+
+static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
+{
+    BMK_runOutcome_t outcome;
+    outcome.tag = 0;
+    outcome.internal_never_use_directly = runTime;
+    return outcome;
+}
+
+
+/* initFn will be measured once, benchFn will be measured `nbLoops` times */
+/* initFn is optional, provide NULL if none */
+/* benchFn must return size_t field compliant with ZSTD_isError for error valuee */
+/* takes # of blocks and list of size & stuff for each. */
+/* can report result of benchFn for each block into blockResult. */
+/* blockResult is optional, provide NULL if this information is not required */
+/* note : time per loop could be zero if run time < timer resolution */
+BMK_runOutcome_t BMK_benchFunction(
+            BMK_benchFn_t benchFn, void* benchPayload,
+            BMK_initFn_t initFn, void* initPayload,
+            size_t blockCount,
+            const void* const * srcBlockBuffers, const size_t* srcBlockSizes,
+            void* const * dstBlockBuffers, const size_t* dstBlockCapacities,
+            size_t* blockResults,
+            unsigned nbLoops)
+{
+    size_t dstSize = 0;
+
+    if(!nbLoops) {
+        RETURN_QUIET_ERROR(2, BMK_runOutcome_t, "nbLoops must be nonzero ");
+    }
+
+    /* init */
+    {   size_t i;
+        for(i = 0; i < blockCount; i++) {
+            memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]);  /* warm up and erase result buffer */
+        }
+#if 0
+        /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
+         * (Makes former slower)
+         */
+        UTIL_sleepMilli(5);  /* give processor time to other processes */
+        UTIL_waitForNextTick();
+#endif
+    }
+
+    /* benchmark */
+    {   UTIL_time_t const clockStart = UTIL_getTime();
+        unsigned loopNb, blockNb;
+        if (initFn != NULL) initFn(initPayload);
+        for (loopNb = 0; loopNb < nbLoops; loopNb++) {
+            for (blockNb = 0; blockNb < blockCount; blockNb++) {
+                size_t const res = benchFn(srcBlockBuffers[blockNb], srcBlockSizes[blockNb],
+                                    dstBlockBuffers[blockNb], dstBlockCapacities[blockNb],
+                                    benchPayload);
+                if(ZSTD_isError(res)) {
+                    RETURN_QUIET_ERROR(2, BMK_runOutcome_t,
+                        "Function benchmark failed on block %u of size %u : %s",
+                        blockNb, (U32)dstBlockCapacities[blockNb], ZSTD_getErrorName(res));
+                } else if (loopNb == 0) {
+                    dstSize += res;
+                    if (blockResults != NULL) blockResults[blockNb] = res;
+            }   }
+        }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
+
+        {   U64 const totalTime = UTIL_clockSpanNano(clockStart);
+            BMK_runTime_t rt;
+            rt.nanoSecPerRun = totalTime / nbLoops;
+            rt.sumOfReturn = dstSize;
+            return BMK_setValid_runTime(rt);
+    }   }
 }
 
-static U32 g_ldmFlag = 0;
-void BMK_setLdmFlag(unsigned ldmFlag) {
-    g_ldmFlag = ldmFlag;
+
+/* ====  Benchmarking any function, providing intermediate results  ==== */
+
+struct BMK_timedFnState_s {
+    U64 timeSpent_ns;
+    U64 timeBudget_ns;
+    U64 runBudget_ns;
+    BMK_runTime_t fastestRun;
+    unsigned nbLoops;
+    UTIL_time_t coolTime;
+};  /* typedef'd to BMK_timedFnState_t within bench.h */
+
+BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
+{
+    BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
+    if (r == NULL) return NULL;   /* malloc() error */
+    BMK_resetTimedFnState(r, total_ms, run_ms);
+    return r;
 }
 
-static U32 g_ldmMinMatch = 0;
-void BMK_setLdmMinMatch(unsigned ldmMinMatch) {
-    g_ldmMinMatch = ldmMinMatch;
+void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
+    free(state);
 }
 
-static U32 g_ldmHashLog = 0;
-void BMK_setLdmHashLog(unsigned ldmHashLog) {
-    g_ldmHashLog = ldmHashLog;
+void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
+{
+    if (!total_ms) total_ms = 1 ;
+    if (!run_ms) run_ms = 1;
+    if (run_ms > total_ms) run_ms = total_ms;
+    timedFnState->timeSpent_ns = 0;
+    timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
+    timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
+    timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
+    timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
+    timedFnState->nbLoops = 1;
+    timedFnState->coolTime = UTIL_getTime();
 }
 
-#define BMK_LDM_PARAM_NOTSET 9999
-static U32 g_ldmBucketSizeLog = BMK_LDM_PARAM_NOTSET;
-void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) {
-    g_ldmBucketSizeLog = ldmBucketSizeLog;
+/* Tells if nb of seconds set in timedFnState for all runs is spent.
+ * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
+int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
+{
+    return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
 }
 
-static U32 g_ldmHashEveryLog = BMK_LDM_PARAM_NOTSET;
-void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
-    g_ldmHashEveryLog = ldmHashEveryLog;
+
+#define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
+
+BMK_runOutcome_t BMK_benchTimedFn(
+            BMK_timedFnState_t* cont,
+            BMK_benchFn_t benchFn, void* benchPayload,
+            BMK_initFn_t initFn, void* initPayload,
+            size_t blockCount,
+            const void* const* srcBlockBuffers, const size_t* srcBlockSizes,
+            void * const * dstBlockBuffers, const size_t * dstBlockCapacities,
+            size_t* blockResults)
+{
+    U64 const runBudget_ns = cont->runBudget_ns;
+    U64 const runTimeMin_ns = runBudget_ns / 2;
+    int completed = 0;
+    BMK_runTime_t bestRunTime = cont->fastestRun;
+
+    while (!completed) {
+        BMK_runOutcome_t runResult;
+
+        /* Overheat protection */
+        if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) {
+            DEBUGOUTPUT("\rcooling down ...    \r");
+            UTIL_sleep(COOLPERIOD_SEC);
+            cont->coolTime = UTIL_getTime();
+        }
+
+        /* reinitialize capacity */
+        runResult = BMK_benchFunction(benchFn, benchPayload,
+                                    initFn, initPayload,
+                                    blockCount,
+                                    srcBlockBuffers, srcBlockSizes,
+                                    dstBlockBuffers, dstBlockCapacities,
+                                    blockResults,
+                                    cont->nbLoops);
+
+        if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
+            return BMK_runOutcome_error();
+        }
+
+        {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
+            U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
+
+            cont->timeSpent_ns += loopDuration_ns;
+
+            /* estimate nbLoops for next run to last approximately 1 second */
+            if (loopDuration_ns > (runBudget_ns / 50)) {
+                U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
+                cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
+            } else {
+                /* previous run was too short : blindly increase workload by x multiplier */
+                const unsigned multiplier = 10;
+                assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
+                cont->nbLoops *= multiplier;
+            }
+
+            if(loopDuration_ns < runTimeMin_ns) {
+                /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
+                assert(completed == 0);
+                continue;
+            } else {
+                if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
+                    bestRunTime = newRunTime;
+                }
+                completed = 1;
+            }
+        }
+    }   /* while (!completed) */
+
+    return BMK_setValid_runTime(bestRunTime);
 }
 
 
-/* ********************************************************
-*  Bench functions
-**********************************************************/
-typedef struct {
-    const void* srcPtr;
-    size_t srcSize;
-    void*  cPtr;
-    size_t cRoom;
-    size_t cSize;
-    void*  resPtr;
-    size_t resSize;
-} blockParam_t;
+/* ================================================================= */
+/*      Benchmark Zstandard, mem-to-mem scenarios                    */
+/* ================================================================= */
 
+int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)
+{
+    return outcome.tag == 0;
+}
 
+BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome)
+{
+    assert(outcome.tag == 0);
+    return outcome.internal_never_use_directly;
+}
 
-#undef MIN
-#undef MAX
-#define MIN(a,b)    ((a) < (b) ? (a) : (b))
-#define MAX(a,b)    ((a) > (b) ? (a) : (b))
+static BMK_benchOutcome_t BMK_benchOutcome_error(void)
+{
+    BMK_benchOutcome_t b;
+    memset(&b, 0, sizeof(b));
+    b.tag = 1;
+    return b;
+}
 
-static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
-                        const char* displayName, int cLevel,
-                        const size_t* fileSizes, U32 nbFiles,
-                        const void* dictBuffer, size_t dictBufferSize,
-                        const ZSTD_compressionParameters* const comprParams)
+static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t result)
 {
-    size_t const blockSize = ((g_blockSize>=32 && !g_decodeOnly) ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
-    U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
-    blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
-    size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
-    void* const compressedBuffer = malloc(maxCompressedSize);
-    void* resultBuffer = malloc(srcSize);
-    ZSTD_CCtx* const ctx = ZSTD_createCCtx();
-    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+    BMK_benchOutcome_t b;
+    b.tag = 0;
+    b.internal_never_use_directly = result;
+    return b;
+}
+
+
+/* benchMem with no allocation */
+static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
+            const void** srcPtrs, size_t* srcSizes,
+            void** cPtrs, size_t* cCapacities, size_t* cSizes,
+            void** resPtrs, size_t* resSizes,
+            void** resultBufferPtr, void* compressedBuffer,
+            size_t maxCompressedSize,
+            BMK_timedFnState_t* timeStateCompress,
+            BMK_timedFnState_t* timeStateDecompress,
+
+            const void* srcBuffer, size_t srcSize,
+            const size_t* fileSizes, unsigned nbFiles,
+            const int cLevel, const ZSTD_compressionParameters* comprParams,
+            const void* dictBuffer, size_t dictBufferSize,
+            ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
+            int displayLevel, const char* displayName,
+            const BMK_advancedParams_t* adv)
+{
+    size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize);  /* avoid div by 0 */
+    BMK_benchResult_t benchResult;
     size_t const loadedCompressedSize = srcSize;
     size_t cSize = 0;
     double ratio = 0.;
     U32 nbBlocks;
 
-    /* checks */
-    if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx)
-        EXM_THROW(31, "allocation error : not enough memory");
+    assert(cctx != NULL); assert(dctx != NULL);
 
     /* init */
-    if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* display last 17 characters */
-    if (g_nbWorkers==1) g_nbWorkers=0;   /* prefer synchronous mode */
-
-    if (g_decodeOnly) {  /* benchmark only decompression : source must be already compressed */
+    memset(&benchResult, 0, sizeof(benchResult));
+    if (strlen(displayName)>17) displayName += strlen(displayName) - 17;   /* display last 17 characters */
+    if (adv->mode == BMK_decodeOnly) {  /* benchmark only decompression : source must be already compressed */
         const char* srcPtr = (const char*)srcBuffer;
         U64 totalDSize64 = 0;
         U32 fileNb;
         for (fileNb=0; fileNb<nbFiles; fileNb++) {
             U64 const fSize64 = ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]);
-            if (fSize64==0) EXM_THROW(32, "Impossible to determine original size ");
+            if (fSize64==0) RETURN_ERROR(32, BMK_benchOutcome_t, "Impossible to determine original size ");
             totalDSize64 += fSize64;
             srcPtr += fileSizes[fileNb];
         }
         {   size_t const decodedSize = (size_t)totalDSize64;
-            if (totalDSize64 > decodedSize) EXM_THROW(32, "original size is too large");   /* size_t overflow */
-            free(resultBuffer);
-            resultBuffer = malloc(decodedSize);
-            if (!resultBuffer) EXM_THROW(33, "not enough memory");
+            assert((U64)decodedSize == totalDSize64);   /* check overflow */
+            free(*resultBufferPtr);
+            *resultBufferPtr = malloc(decodedSize);
+            if (!(*resultBufferPtr)) {
+                RETURN_ERROR(33, BMK_benchOutcome_t, "not enough memory");
+            }
+            if (totalDSize64 > decodedSize) {  /* size_t overflow */
+                free(*resultBufferPtr);
+                RETURN_ERROR(32, BMK_benchOutcome_t, "original size is too large");
+            }
             cSize = srcSize;
             srcSize = decodedSize;
             ratio = (double)srcSize / (double)cSize;
-    }   }
+        }
+    }
 
-    /* Init blockTable data */
+    /* Init data blocks  */
     {   const char* srcPtr = (const char*)srcBuffer;
         char* cPtr = (char*)compressedBuffer;
-        char* resPtr = (char*)resultBuffer;
+        char* resPtr = (char*)(*resultBufferPtr);
         U32 fileNb;
         for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
             size_t remaining = fileSizes[fileNb];
-            U32 const nbBlocksforThisFile = g_decodeOnly ? 1 : (U32)((remaining + (blockSize-1)) / blockSize);
+            U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize);
             U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
             for ( ; nbBlocks<blockEnd; nbBlocks++) {
                 size_t const thisBlockSize = MIN(remaining, blockSize);
-                blockTable[nbBlocks].srcPtr = (const void*)srcPtr;
-                blockTable[nbBlocks].srcSize = thisBlockSize;
-                blockTable[nbBlocks].cPtr = (void*)cPtr;
-                blockTable[nbBlocks].cRoom = g_decodeOnly ? thisBlockSize : ZSTD_compressBound(thisBlockSize);
-                blockTable[nbBlocks].cSize = blockTable[nbBlocks].cRoom;
-                blockTable[nbBlocks].resPtr = (void*)resPtr;
-                blockTable[nbBlocks].resSize = g_decodeOnly ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize;
+                srcPtrs[nbBlocks] = srcPtr;
+                srcSizes[nbBlocks] = thisBlockSize;
+                cPtrs[nbBlocks] = cPtr;
+                cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize);
+                resPtrs[nbBlocks] = resPtr;
+                resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize;
                 srcPtr += thisBlockSize;
-                cPtr += blockTable[nbBlocks].cRoom;
+                cPtr += cCapacities[nbBlocks];
                 resPtr += thisBlockSize;
                 remaining -= thisBlockSize;
-    }   }   }
+            }
+        }
+    }
 
-    /* warmimg up memory */
-    if (g_decodeOnly) {
+    /* warmimg up `compressedBuffer` */
+    if (adv->mode == BMK_decodeOnly) {
         memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
     } else {
         RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
     }
 
     /* Bench */
-    {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
-        U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0);
-        UTIL_time_t coolTime;
-        U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 1;
-        U32 nbDecodeLoops = (U32)((100 MB) / (srcSize+1)) + 1;  /* initial conservative speed estimate */
-        U32 nbCompressionLoops = (U32)((2 MB) / (srcSize+1)) + 1;  /* initial conservative speed estimate */
-        U64 totalCTime=0, totalDTime=0;
-        U32 cCompleted=g_decodeOnly, dCompleted=0;
+    {   U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0);
 #       define NB_MARKS 4
-        const char* const marks[NB_MARKS] = { " |", " /", " =",  "\\" };
+        const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
         U32 markNb = 0;
+        int compressionCompleted = (adv->mode == BMK_decodeOnly);
+        int decompressionCompleted = (adv->mode == BMK_compressOnly);
+        BMK_initCCtxArgs cctxprep;
+        BMK_initDCtxArgs dctxprep;
+        cctxprep.cctx = cctx;
+        cctxprep.dictBuffer = dictBuffer;
+        cctxprep.dictBufferSize = dictBufferSize;
+        cctxprep.cLevel = cLevel;
+        cctxprep.comprParams = comprParams;
+        cctxprep.adv = adv;
+        dctxprep.dctx = dctx;
+        dctxprep.dictBuffer = dictBuffer;
+        dctxprep.dictBufferSize = dictBufferSize;
+
+        DISPLAYLEVEL(2, "\r%70s\r", "");   /* blank line */
+        DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
+
+        while (!(compressionCompleted && decompressionCompleted)) {
+
+            if (!compressionCompleted) {
+                BMK_runOutcome_t const cOutcome =
+                        BMK_benchTimedFn( timeStateCompress,
+                                        &local_defaultCompress, cctx,
+                                        &local_initCCtx, &cctxprep,
+                                        nbBlocks,
+                                        srcPtrs, srcSizes,
+                                        cPtrs, cCapacities,
+                                        cSizes);
+
+                if (!BMK_isSuccessful_runOutcome(cOutcome)) {
+                    return BMK_benchOutcome_error();
+                }
 
-        coolTime = UTIL_getTime();
-        DISPLAYLEVEL(2, "\r%79s\r", "");
-        while (!cCompleted || !dCompleted) {
-
-            /* overheat protection */
-            if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) {
-                DISPLAYLEVEL(2, "\rcooling down ...    \r");
-                UTIL_sleep(COOLPERIOD_SEC);
-                coolTime = UTIL_getTime();
-            }
-
-            if (!g_decodeOnly) {
-                /* Compression */
-                DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
-                if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
-
-                UTIL_sleepMilli(5);  /* give processor time to other processes */
-                UTIL_waitForNextTick();
-
-                if (!cCompleted) {   /* still some time to do compression tests */
-                    U32 nbLoops = 0;
-                    UTIL_time_t const clockStart = UTIL_getTime();
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, g_nbWorkers);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog);
-                    if (g_ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) {
-                      ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog);
-                    }
-                    if (g_ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) {
-                      ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog);
-                    }
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_hashLog, comprParams->hashLog);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
-                    ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
-                    ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
-
-                    if (!g_nbSeconds) nbCompressionLoops=1;
-                    for (nbLoops=0; nbLoops<nbCompressionLoops; nbLoops++) {
-                        U32 blockNb;
-                        for (blockNb=0; blockNb<nbBlocks; blockNb++) {
-#if 0   /* direct compression function, for occasional comparison */
-                            ZSTD_parameters const params = ZSTD_getParams(cLevel, blockTable[blockNb].srcSize, dictBufferSize);
-                            blockTable[blockNb].cSize = ZSTD_compress_advanced(ctx,
-                                                            blockTable[blockNb].cPtr, blockTable[blockNb].cRoom,
-                                                            blockTable[blockNb].srcPtr, blockTable[blockNb].srcSize,
-                                                            dictBuffer, dictBufferSize,
-                                                            params);
-#else
-                            size_t moreToFlush = 1;
-                            ZSTD_outBuffer out;
-                            ZSTD_inBuffer in;
-                            in.src = blockTable[blockNb].srcPtr;
-                            in.size = blockTable[blockNb].srcSize;
-                            in.pos = 0;
-                            out.dst = blockTable[blockNb].cPtr;
-                            out.size = blockTable[blockNb].cRoom;
-                            out.pos = 0;
-                            while (moreToFlush) {
-                                moreToFlush = ZSTD_compress_generic(ctx,
-                                                    &out, &in, ZSTD_e_end);
-                                if (ZSTD_isError(moreToFlush))
-                                    EXM_THROW(1, "ZSTD_compress_generic() error : %s",
-                                                ZSTD_getErrorName(moreToFlush));
-                            }
-                            blockTable[blockNb].cSize = out.pos;
-#endif
-                    }   }
-                    {   U64 const loopDuration = UTIL_clockSpanNano(clockStart);
-                        if (loopDuration > 0) {
-                            if (loopDuration < fastestC * nbCompressionLoops)
-                                fastestC = loopDuration / nbCompressionLoops;
-                            nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1;
-                        } else {
-                            assert(nbCompressionLoops < 40000000);  /* avoid overflow */
-                            nbCompressionLoops *= 100;
-                        }
-                        totalCTime += loopDuration;
-                        cCompleted = (totalCTime >= maxTime);  /* end compression tests */
+                {   BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
+                    cSize = cResult.sumOfReturn;
+                    ratio = (double)srcSize / cSize;
+                    {   BMK_benchResult_t newResult;
+                        newResult.cSpeed = ((U64)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
+                        benchResult.cSize = cSize;
+                        if (newResult.cSpeed > benchResult.cSpeed)
+                            benchResult.cSpeed = newResult.cSpeed;
                 }   }
 
-                cSize = 0;
-                { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; }
-                ratio = (double)srcSize / (double)cSize;
-                markNb = (markNb+1) % NB_MARKS;
                 {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                    double const compressionSpeed = ((double)srcSize / fastestC) * 1000;
-                    int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
+                    markNb = (markNb+1) % NB_MARKS;
                     DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
-                            marks[markNb], displayName, (U32)srcSize, (U32)cSize,
+                            marks[markNb], displayName,
+                            (U32)srcSize, (U32)cSize,
                             ratioAccuracy, ratio,
-                            cSpeedAccuracy, compressionSpeed );
+                            benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT);
                 }
-            }  /* if (!g_decodeOnly) */
+                compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
+            }
 
-#if 0       /* disable decompression test */
-            dCompleted=1;
-            (void)totalDTime; (void)fastestD; (void)crcOrig;   /* unused when decompression disabled */
-#else
-            /* Decompression */
-            if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
-
-            UTIL_sleepMilli(5); /* give processor time to other processes */
-            UTIL_waitForNextTick();
-
-            if (!dCompleted) {
-                U32 nbLoops = 0;
-                ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize);
-                UTIL_time_t const clockStart = UTIL_getTime();
-                if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure");
-                if (!g_nbSeconds) nbDecodeLoops = 1;
-                for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
-                    U32 blockNb;
-                    for (blockNb=0; blockNb<nbBlocks; blockNb++) {
-                        size_t const regenSize = ZSTD_decompress_usingDDict(dctx,
-                            blockTable[blockNb].resPtr, blockTable[blockNb].resSize,
-                            blockTable[blockNb].cPtr, blockTable[blockNb].cSize,
-                            ddict);
-                        if (ZSTD_isError(regenSize)) {
-                            EXM_THROW(2, "ZSTD_decompress_usingDDict() failed on block %u of size %u : %s  \n",
-                                      blockNb, (U32)blockTable[blockNb].cSize, ZSTD_getErrorName(regenSize));
-                        }
-                        blockTable[blockNb].resSize = regenSize;
-                }   }
-                ZSTD_freeDDict(ddict);
-                {   U64 const loopDuration = UTIL_clockSpanNano(clockStart);
-                    if (loopDuration > 0) {
-                        if (loopDuration < fastestD * nbDecodeLoops)
-                            fastestD = loopDuration / nbDecodeLoops;
-                        nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1;
-                    } else {
-                        assert(nbDecodeLoops < 40000000);  /* avoid overflow */
-                        nbDecodeLoops *= 100;
-                    }
-                    totalDTime += loopDuration;
-                    dCompleted = (totalDTime >= maxTime);
-            }   }
+            if(!decompressionCompleted) {
+                BMK_runOutcome_t const dOutcome =
+                        BMK_benchTimedFn(timeStateDecompress,
+                                        &local_defaultDecompress, dctx,
+                                        &local_initDCtx, &dctxprep,
+                                        nbBlocks,
+                                        (const void *const *)cPtrs, cSizes,
+                                        resPtrs, resSizes,
+                                        NULL);
+
+                if(!BMK_isSuccessful_runOutcome(dOutcome)) {
+                    return BMK_benchOutcome_error();
+                }
 
-            markNb = (markNb+1) % NB_MARKS;
-            {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                double const compressionSpeed = ((double)srcSize / fastestC) * 1000;
-                int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
-                double const decompressionSpeed = ((double)srcSize / fastestD) * 1000;
-                DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
-                        marks[markNb], displayName, (U32)srcSize, (U32)cSize,
-                        ratioAccuracy, ratio,
-                        cSpeedAccuracy, compressionSpeed,
-                        decompressionSpeed);
-            }
+                {   BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
+                    U64 const newDSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
+                    if (newDSpeed > benchResult.dSpeed)
+                        benchResult.dSpeed = newDSpeed;
+                }
 
-            /* CRC Checking */
-            {   U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
-                if (!g_decodeOnly && (crcOrig!=crcCheck)) {
-                    size_t u;
-                    DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
-                    for (u=0; u<srcSize; u++) {
-                        if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
-                            U32 segNb, bNb, pos;
-                            size_t bacc = 0;
-                            DISPLAY("Decoding error at pos %u ", (U32)u);
-                            for (segNb = 0; segNb < nbBlocks; segNb++) {
-                                if (bacc + blockTable[segNb].srcSize > u) break;
-                                bacc += blockTable[segNb].srcSize;
-                            }
-                            pos = (U32)(u - bacc);
-                            bNb = pos / (128 KB);
-                            DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos);
-                            if (u>5) {
-                                int n;
-                                DISPLAY("origin: ");
-                                for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
-                                DISPLAY(" :%02X:  ", ((const BYTE*)srcBuffer)[u]);
-                                for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
-                                DISPLAY(" \n");
-                                DISPLAY("decode: ");
-                                for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]);
-                                DISPLAY(" :%02X:  ", ((const BYTE*)resultBuffer)[u]);
-                                for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]);
-                                DISPLAY(" \n");
-                            }
-                            break;
+                {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
+                    markNb = (markNb+1) % NB_MARKS;
+                    DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
+                            marks[markNb], displayName,
+                            (U32)srcSize, (U32)benchResult.cSize,
+                            ratioAccuracy, ratio,
+                            benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT,
+                            (double)benchResult.dSpeed / MB_UNIT);
+                }
+                decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
+            }
+        }   /* while (!(compressionCompleted && decompressionCompleted)) */
+
+        /* CRC Checking */
+        {   const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
+            U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
+            if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) {
+                size_t u;
+                DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
+                for (u=0; u<srcSize; u++) {
+                    if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
+                        U32 segNb, bNb, pos;
+                        size_t bacc = 0;
+                        DISPLAY("Decoding error at pos %u ", (U32)u);
+                        for (segNb = 0; segNb < nbBlocks; segNb++) {
+                            if (bacc + srcSizes[segNb] > u) break;
+                            bacc += srcSizes[segNb];
                         }
-                        if (u==srcSize-1) {  /* should never happen */
-                            DISPLAY("no difference detected\n");
-                    }   }
-                    break;
-            }   }   /* CRC Checking */
-#endif
-        }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
-
-        if (g_displayLevel == 1) {   /* hidden display mode -q, used by python speed benchmark */
-            double const cSpeed = ((double)srcSize / fastestC) * 1000;
-            double const dSpeed = ((double)srcSize / fastestD) * 1000;
-            if (g_additionalParam)
-                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
-            else
+                        pos = (U32)(u - bacc);
+                        bNb = pos / (128 KB);
+                        DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos);
+                        if (u>5) {
+                            int n;
+                            DISPLAY("origin: ");
+                            for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
+                            DISPLAY(" :%02X:  ", ((const BYTE*)srcBuffer)[u]);
+                            for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
+                            DISPLAY(" \n");
+                            DISPLAY("decode: ");
+                            for (n=-5; n<0; n++) DISPLAY("%02X ", resultBuffer[u+n]);
+                            DISPLAY(" :%02X:  ", resultBuffer[u]);
+                            for (n=1; n<3; n++) DISPLAY("%02X ", resultBuffer[u+n]);
+                            DISPLAY(" \n");
+                        }
+                        break;
+                    }
+                    if (u==srcSize-1) {  /* should never happen */
+                        DISPLAY("no difference detected\n");
+                    }
+                }
+            }
+        }   /* CRC Checking */
+
+        if (displayLevel == 1) {   /* hidden display mode -q, used by python speed benchmark */
+            double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
+            double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
+            if (adv->additionalParam) {
+                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
+            } else {
                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
+            }
         }
+
         DISPLAYLEVEL(2, "%2i#\n", cLevel);
     }   /* Bench */
 
-    /* clean up */
-    free(blockTable);
-    free(compressedBuffer);
-    free(resultBuffer);
-    ZSTD_freeCCtx(ctx);
-    ZSTD_freeDCtx(dctx);
-    return 0;
+    benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
+    return BMK_benchOutcome_setValidResult(benchResult);
 }
 
+BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
+                        void* dstBuffer, size_t dstCapacity,
+                        const size_t* fileSizes, unsigned nbFiles,
+                        int cLevel, const ZSTD_compressionParameters* comprParams,
+                        const void* dictBuffer, size_t dictBufferSize,
+                        int displayLevel, const char* displayName, const BMK_advancedParams_t* adv)
 
-static size_t BMK_findMaxMem(U64 requiredMem)
 {
-    size_t const step = 64 MB;
-    BYTE* testmem = NULL;
+    int const dstParamsError = !dstBuffer ^ !dstCapacity;  /* must be both NULL or none */
 
-    requiredMem = (((requiredMem >> 26) + 1) << 26);
-    requiredMem += step;
-    if (requiredMem > maxMemory) requiredMem = maxMemory;
+    size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
+    U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
 
-    do {
-        testmem = (BYTE*)malloc((size_t)requiredMem);
-        requiredMem -= step;
-    } while (!testmem);
+    /* these are the blockTable parameters, just split up */
+    const void ** const srcPtrs = (const void**)malloc(maxNbBlocks * sizeof(void*));
+    size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
 
-    free(testmem);
-    return (size_t)(requiredMem);
+
+    void ** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
+    size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
+    size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
+
+    void ** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
+    size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
+
+    BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
+    BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
+
+    ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+
+    const size_t maxCompressedSize = dstCapacity ? dstCapacity : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
+
+    void* const internalDstBuffer = dstBuffer ? NULL : malloc(maxCompressedSize);
+    void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;
+
+    BMK_benchOutcome_t outcome = BMK_benchOutcome_error();  /* error by default */
+
+    void* resultBuffer = srcSize ? malloc(srcSize) : NULL;
+
+    int allocationincomplete = !srcPtrs || !srcSizes || !cPtrs ||
+        !cSizes || !cCapacities || !resPtrs || !resSizes ||
+        !timeStateCompress || !timeStateDecompress ||
+        !cctx || !dctx ||
+        !compressedBuffer || !resultBuffer;
+
+
+    if (!allocationincomplete && !dstParamsError) {
+        outcome = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes,
+                                            cPtrs, cCapacities, cSizes,
+                                            resPtrs, resSizes,
+                                            &resultBuffer,
+                                            compressedBuffer, maxCompressedSize,
+                                            timeStateCompress, timeStateDecompress,
+                                            srcBuffer, srcSize,
+                                            fileSizes, nbFiles,
+                                            cLevel, comprParams,
+                                            dictBuffer, dictBufferSize,
+                                            cctx, dctx,
+                                            displayLevel, displayName, adv);
+    }
+
+    /* clean up */
+    BMK_freeTimedFnState(timeStateCompress);
+    BMK_freeTimedFnState(timeStateDecompress);
+
+    ZSTD_freeCCtx(cctx);
+    ZSTD_freeDCtx(dctx);
+
+    free(internalDstBuffer);
+    free(resultBuffer);
+
+    free((void*)srcPtrs);
+    free(srcSizes);
+    free(cPtrs);
+    free(cSizes);
+    free(cCapacities);
+    free(resPtrs);
+    free(resSizes);
+
+    if(allocationincomplete) {
+        RETURN_ERROR(31, BMK_benchOutcome_t, "allocation error : not enough memory");
+    }
+
+    if(dstParamsError) {
+        RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
+    }
+    return outcome;
+}
+
+BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
+                        const size_t* fileSizes, unsigned nbFiles,
+                        int cLevel, const ZSTD_compressionParameters* comprParams,
+                        const void* dictBuffer, size_t dictBufferSize,
+                        int displayLevel, const char* displayName) {
+
+    BMK_advancedParams_t const adv = BMK_initAdvancedParams();
+    return BMK_benchMemAdvanced(srcBuffer, srcSize,
+                                NULL, 0,
+                                fileSizes, nbFiles,
+                                cLevel, comprParams,
+                                dictBuffer, dictBufferSize,
+                                displayLevel, displayName, &adv);
 }
 
-static void BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,
-                            const char* displayName, int cLevel, int cLevelLast,
+static BMK_benchOutcome_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,
                             const size_t* fileSizes, unsigned nbFiles,
+                            int cLevel, const ZSTD_compressionParameters* comprParams,
                             const void* dictBuffer, size_t dictBufferSize,
-                            const ZSTD_compressionParameters* const compressionParams)
+                            int displayLevel, const char* displayName,
+                            BMK_advancedParams_t const * const adv)
 {
-    int l;
-
     const char* pch = strrchr(displayName, '\\'); /* Windows */
-    if (!pch) pch = strrchr(displayName, '/'); /* Linux */
+    if (!pch) pch = strrchr(displayName, '/');    /* Linux */
     if (pch) displayName = pch+1;
 
-    if (g_realTime) {
+    if (adv->realTime) {
         DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
         SET_REALTIME_PRIORITY;
     }
 
-    if (g_displayLevel == 1 && !g_additionalParam)
-        DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));
+    if (displayLevel == 1 && !adv->additionalParam)   /* --quiet mode */
+        DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
+                ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING,
+                (U32)benchedSize, adv->nbSeconds, (U32)(adv->blockSize>>10));
+
+    return BMK_benchMemAdvanced(srcBuffer, benchedSize,
+                                NULL, 0,
+                                fileSizes, nbFiles,
+                                cLevel, comprParams,
+                                dictBuffer, dictBufferSize,
+                                displayLevel, displayName, adv);
+}
 
-    for (l=cLevel; l <= cLevelLast; l++) {
-        if (l==0) continue;  /* skip level 0 */
-        BMK_benchMem(srcBuffer, benchedSize,
-                     displayName, l,
-                     fileSizes, nbFiles,
-                     dictBuffer, dictBufferSize, compressionParams);
+BMK_benchOutcome_t BMK_syntheticTest(int cLevel, double compressibility,
+                          const ZSTD_compressionParameters* compressionParams,
+                          int displayLevel, const BMK_advancedParams_t* adv)
+{
+    char name[20] = {0};
+    size_t const benchedSize = 10000000;
+    void* srcBuffer;
+    BMK_benchOutcome_t res;
+
+    if (cLevel > ZSTD_maxCLevel()) {
+        RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level");
     }
+
+    /* Memory allocation */
+    srcBuffer = malloc(benchedSize);
+    if (!srcBuffer) RETURN_ERROR(21, BMK_benchOutcome_t, "not enough memory");
+
+    /* Fill input buffer */
+    RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
+
+    /* Bench */
+    snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
+    res = BMK_benchCLevel(srcBuffer, benchedSize,
+                    &benchedSize /* ? */, 1 /* ? */,
+                    cLevel, compressionParams,
+                    NULL, 0,  /* dictionary */
+                    displayLevel, name, adv);
+
+    /* clean up */
+    free(srcBuffer);
+
+    return res;
 }
 
 
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+    size_t const step = 64 MB;
+    BYTE* testmem = NULL;
+
+    requiredMem = (((requiredMem >> 26) + 1) << 26);
+    requiredMem += step;
+    if (requiredMem > maxMemory) requiredMem = maxMemory;
+
+    do {
+        testmem = (BYTE*)malloc((size_t)requiredMem);
+        requiredMem -= step;
+    } while (!testmem && requiredMem > 0);
+
+    free(testmem);
+    return (size_t)(requiredMem);
+}
+
 /*! BMK_loadFiles() :
  *  Loads `buffer` with content of files listed within `fileNamesTable`.
  *  At most, fills `buffer` entirely. */
-static void BMK_loadFiles(void* buffer, size_t bufferSize,
-                          size_t* fileSizes,
-                          const char* const * const fileNamesTable, unsigned nbFiles)
+static int BMK_loadFiles(void* buffer, size_t bufferSize,
+                         size_t* fileSizes,
+                         const char* const * fileNamesTable, unsigned nbFiles,
+                         int displayLevel)
 {
     size_t pos = 0, totalSize = 0;
     unsigned n;
@@ -567,45 +971,69 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize,
             continue;
         }
         f = fopen(fileNamesTable[n], "rb");
-        if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
+        if (f==NULL) EXM_THROW_INT(10, "impossible to open file %s", fileNamesTable[n]);
         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
         if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
-        { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
-          if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
-          pos += readSize; }
+        {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
+            if (readSize != (size_t)fileSize) EXM_THROW_INT(11, "could not read %s", fileNamesTable[n]);
+            pos += readSize;
+        }
         fileSizes[n] = (size_t)fileSize;
         totalSize += (size_t)fileSize;
         fclose(f);
     }
 
-    if (totalSize == 0) EXM_THROW(12, "no data to bench");
+    if (totalSize == 0) EXM_THROW_INT(12, "no data to bench");
+    return 0;
 }
 
-static void BMK_benchFileTable(const char* const * const fileNamesTable, unsigned const nbFiles,
-                               const char* const dictFileName,
-                               int const cLevel, int const cLevelLast,
-                               const ZSTD_compressionParameters* const compressionParams)
+BMK_benchOutcome_t BMK_benchFilesAdvanced(
+                        const char* const * fileNamesTable, unsigned nbFiles,
+                        const char* dictFileName, int cLevel,
+                        const ZSTD_compressionParameters* compressionParams,
+                        int displayLevel, const BMK_advancedParams_t* adv)
 {
-    void* srcBuffer;
+    void* srcBuffer = NULL;
     size_t benchedSize;
     void* dictBuffer = NULL;
     size_t dictBufferSize = 0;
-    size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
+    size_t* fileSizes = NULL;
+    BMK_benchOutcome_t res;
     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
 
-    if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes");
+    if (!nbFiles) {
+        RETURN_ERROR(14, BMK_benchOutcome_t, "No Files to Benchmark");
+    }
+
+    if (cLevel > ZSTD_maxCLevel()) {
+        RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level");
+    }
+
+    fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
+    if (!fileSizes) RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory for fileSizes");
 
     /* Load dictionary */
     if (dictFileName != NULL) {
         U64 const dictFileSize = UTIL_getFileSize(dictFileName);
-        if (dictFileSize > 64 MB)
-            EXM_THROW(10, "dictionary file %s too large", dictFileName);
+        if (dictFileSize > 64 MB) {
+            free(fileSizes);
+            RETURN_ERROR(10, BMK_benchOutcome_t, "dictionary file %s too large", dictFileName);
+        }
         dictBufferSize = (size_t)dictFileSize;
         dictBuffer = malloc(dictBufferSize);
-        if (dictBuffer==NULL)
-            EXM_THROW(11, "not enough memory for dictionary (%u bytes)",
+        if (dictBuffer==NULL) {
+            free(fileSizes);
+            RETURN_ERROR(11, BMK_benchOutcome_t, "not enough memory for dictionary (%u bytes)",
                             (U32)dictBufferSize);
-        BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1);
+        }
+
+        {   int const errorCode = BMK_loadFiles(dictBuffer, dictBufferSize,
+                                                fileSizes, &dictFileName /*?*/,
+                                                1 /*?*/, displayLevel);
+            if (errorCode) {
+                res = BMK_benchOutcome_error();
+                goto _cleanUp;
+        }   }
     }
 
     /* Memory allocation & restrictions */
@@ -613,79 +1041,49 @@ static void BMK_benchFileTable(const char* const * const fileNamesTable, unsigne
     if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
     if (benchedSize < totalSizeToLoad)
         DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
-    srcBuffer = malloc(benchedSize);
-    if (!srcBuffer) EXM_THROW(12, "not enough memory");
+
+    srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
+    if (!srcBuffer) {
+        free(dictBuffer);
+        free(fileSizes);
+        RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory");
+    }
 
     /* Load input buffer */
-    BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
+    {   int const errorCode = BMK_loadFiles(srcBuffer, benchedSize,
+                                        fileSizes, fileNamesTable, nbFiles,
+                                        displayLevel);
+        if (errorCode) {
+            res = BMK_benchOutcome_error();
+            goto _cleanUp;
+    }   }
 
     /* Bench */
-    if (g_separateFiles) {
-        const BYTE* srcPtr = (const BYTE*)srcBuffer;
-        U32 fileNb;
-        for (fileNb=0; fileNb<nbFiles; fileNb++) {
-            size_t const fileSize = fileSizes[fileNb];
-            BMK_benchCLevel(srcPtr, fileSize,
-                            fileNamesTable[fileNb], cLevel, cLevelLast,
-                            fileSizes+fileNb, 1,
-                            dictBuffer, dictBufferSize, compressionParams);
-            srcPtr += fileSize;
-        }
-    } else {
-        char mfName[20] = {0};
+    {   char mfName[20] = {0};
         snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
         {   const char* const displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
-            BMK_benchCLevel(srcBuffer, benchedSize,
-                            displayName, cLevel, cLevelLast,
-                            fileSizes, nbFiles,
-                            dictBuffer, dictBufferSize, compressionParams);
+            res = BMK_benchCLevel(srcBuffer, benchedSize,
+                                fileSizes, nbFiles,
+                                cLevel, compressionParams,
+                                dictBuffer, dictBufferSize,
+                                displayLevel, displayName,
+                                adv);
     }   }
 
-    /* clean up */
+_cleanUp:
     free(srcBuffer);
     free(dictBuffer);
     free(fileSizes);
+    return res;
 }
 
 
-static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility,
-                              const ZSTD_compressionParameters* compressionParams)
+BMK_benchOutcome_t BMK_benchFiles(
+                    const char* const * fileNamesTable, unsigned nbFiles,
+                    const char* dictFileName,
+                    int cLevel, const ZSTD_compressionParameters* compressionParams,
+                    int displayLevel)
 {
-    char name[20] = {0};
-    size_t benchedSize = 10000000;
-    void* const srcBuffer = malloc(benchedSize);
-
-    /* Memory allocation */
-    if (!srcBuffer) EXM_THROW(21, "not enough memory");
-
-    /* Fill input buffer */
-    RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
-
-    /* Bench */
-    snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
-    BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, NULL, 0, compressionParams);
-
-    /* clean up */
-    free(srcBuffer);
-}
-
-
-int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
-                   const char* dictFileName,
-                   int cLevel, int cLevelLast,
-                   const ZSTD_compressionParameters* compressionParams)
-{
-    double const compressibility = (double)g_compressibilityDefault / 100;
-
-    if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
-    if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
-    if (cLevelLast < cLevel) cLevelLast = cLevel;
-    if (cLevelLast > cLevel)
-        DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
-
-    if (nbFiles == 0)
-        BMK_syntheticTest(cLevel, cLevelLast, compressibility, compressionParams);
-    else
-        BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams);
-    return 0;
+    BMK_advancedParams_t const adv = BMK_initAdvancedParams();
+    return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv);
 }
diff --git a/programs/bench.h b/programs/bench.h
index bf1087013feb1..13ca5b50b4612 100644
--- a/programs/bench.h
+++ b/programs/bench.h
@@ -8,30 +8,296 @@
  * You may select, at your option, one of the above-listed licenses.
  */
 
+#if defined (__cplusplus)
+extern "C" {
+#endif
 
 #ifndef BENCH_H_121279284357
 #define BENCH_H_121279284357
 
+/* ===  Dependencies  === */
 #include <stddef.h>   /* size_t */
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_compressionParameters */
 #include "zstd.h"     /* ZSTD_compressionParameters */
 
-int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName,
-                   int cLevel, int cLevelLast, const ZSTD_compressionParameters* compressionParams);
-
-/* Set Parameters */
-void BMK_setNbSeconds(unsigned nbLoops);
-void BMK_setBlockSize(size_t blockSize);
-void BMK_setNbWorkers(unsigned nbWorkers);
-void BMK_setRealTime(unsigned priority);
-void BMK_setNotificationLevel(unsigned level);
-void BMK_setSeparateFiles(unsigned separate);
-void BMK_setAdditionalParam(int additionalParam);
-void BMK_setDecodeOnlyMode(unsigned decodeFlag);
-void BMK_setLdmFlag(unsigned ldmFlag);
-void BMK_setLdmMinMatch(unsigned ldmMinMatch);
-void BMK_setLdmHashLog(unsigned ldmHashLog);
-void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
-void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog);
+
+/* ===  Constants  === */
+
+#define MB_UNIT 1000000
+
+
+/* ===  Benchmark functions  === */
+
+/* Creates a variant `typeName`, able to express "error or valid result".
+ * Functions with return type `typeName`
+ * must first check if result is valid, using BMK_isSuccessful_*(),
+ * and only then can extract `baseType`.
+ */
+#define VARIANT_ERROR_RESULT(baseType, variantName)  \
+                                             \
+typedef struct {                             \
+    baseType internal_never_use_directly;    \
+    int tag;                                 \
+} variantName
+
+
+typedef struct {
+    size_t cSize;
+    unsigned long long cSpeed;   /* bytes / sec */
+    unsigned long long dSpeed;
+    size_t cMem;                 /* memory usage during compression */
+} BMK_benchResult_t;
+
+VARIANT_ERROR_RESULT(BMK_benchResult_t, BMK_benchOutcome_t);
+
+/* check first if the return structure represents an error or a valid result */
+int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome);
+
+/* extract result from variant type.
+ * note : this function will abort() program execution if result is not valid
+ *        check result validity first, by using BMK_isSuccessful_benchOutcome()
+ */
+BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome);
+
+
+/*! BMK_benchFiles() -- called by zstdcli */
+/*  Loads files from fileNamesTable into memory,
+ *  and an optional dictionary from dictFileName (can be NULL),
+ *  then uses benchMem().
+ *  fileNamesTable - name of files to benchmark.
+ *  nbFiles - number of files (size of fileNamesTable), must be > 0.
+ *  dictFileName - name of dictionary file to load.
+ *  cLevel - compression level to benchmark, errors if invalid.
+ *  compressionParams - advanced compression Parameters.
+ *  displayLevel - what gets printed:
+ *      0 : no display;
+ *      1 : errors;
+ *      2 : + result + interaction + warnings;
+ *      3 : + information;
+ *      4 : + debug
+ * @return:
+ *      a variant, which expresses either an error, or a valid result.
+ *      Use BMK_isSuccessful_benchOutcome() to check if function was successful.
+ *      If yes, extract the valid result with BMK_extract_benchResult(),
+ *      it will contain :
+ *          .cSpeed: compression speed in bytes per second,
+ *          .dSpeed: decompression speed in bytes per second,
+ *          .cSize : compressed size, in bytes
+ *          .cMem  : memory budget required for the compression context
+ */
+BMK_benchOutcome_t BMK_benchFiles(
+                   const char* const * fileNamesTable, unsigned nbFiles,
+                   const char* dictFileName,
+                   int cLevel, const ZSTD_compressionParameters* compressionParams,
+                   int displayLevel);
+
+
+typedef enum {
+    BMK_both = 0,
+    BMK_decodeOnly = 1,
+    BMK_compressOnly = 2
+} BMK_mode_t;
+
+typedef struct {
+    BMK_mode_t mode;            /* 0: all, 1: compress only 2: decode only */
+    unsigned nbSeconds;         /* default timing is in nbSeconds */
+    size_t blockSize;           /* Maximum size of each block*/
+    unsigned nbWorkers;         /* multithreading */
+    unsigned realTime;          /* real time priority */
+    int additionalParam;        /* used by python speed benchmark */
+    unsigned ldmFlag;           /* enables long distance matching */
+    unsigned ldmMinMatch;       /* below: parameters for long distance matching, see zstd.1.md */
+    unsigned ldmHashLog;
+    unsigned ldmBucketSizeLog;
+    unsigned ldmHashEveryLog;
+} BMK_advancedParams_t;
+
+/* returns default parameters used by nonAdvanced functions */
+BMK_advancedParams_t BMK_initAdvancedParams(void);
+
+/*! BMK_benchFilesAdvanced():
+ *  Same as BMK_benchFiles(),
+ *  with more controls, provided through advancedParams_t structure */
+BMK_benchOutcome_t BMK_benchFilesAdvanced(
+                   const char* const * fileNamesTable, unsigned nbFiles,
+                   const char* dictFileName,
+                   int cLevel, const ZSTD_compressionParameters* compressionParams,
+                   int displayLevel, const BMK_advancedParams_t* adv);
+
+/*! BMK_syntheticTest() -- called from zstdcli */
+/*  Generates a sample with datagen, using compressibility argument */
+/*  cLevel - compression level to benchmark, errors if invalid
+ *  compressibility - determines compressibility of sample
+ *  compressionParams - basic compression Parameters
+ *  displayLevel - see benchFiles
+ *  adv - see advanced_Params_t
+ * @return:
+ *      a variant, which expresses either an error, or a valid result.
+ *      Use BMK_isSuccessful_benchOutcome() to check if function was successful.
+ *      If yes, extract the valid result with BMK_extract_benchResult(),
+ *      it will contain :
+ *          .cSpeed: compression speed in bytes per second,
+ *          .dSpeed: decompression speed in bytes per second,
+ *          .cSize : compressed size, in bytes
+ *          .cMem  : memory budget required for the compression context
+ */
+BMK_benchOutcome_t BMK_syntheticTest(
+                              int cLevel, double compressibility,
+                              const ZSTD_compressionParameters* compressionParams,
+                              int displayLevel, const BMK_advancedParams_t* adv);
+
+
+
+/* ===  Benchmark Zstandard in a memory-to-memory scenario  === */
+
+/** BMK_benchMem() -- core benchmarking function, called in paramgrill
+ *  applies ZSTD_compress_generic() and ZSTD_decompress_generic() on data in srcBuffer
+ *  with specific compression parameters provided by other arguments using benchFunction
+ *  (cLevel, comprParams + adv in advanced Mode) */
+/*  srcBuffer - data source, expected to be valid compressed data if in Decode Only Mode
+ *  srcSize - size of data in srcBuffer
+ *  fileSizes - srcBuffer is considered cut into 1+ segments, to compress separately.
+ *              note : sum(fileSizes) must be == srcSize.  (<== ensure it's properly checked)
+ *  nbFiles - nb of segments
+ *  cLevel - compression level
+ *  comprParams - basic compression parameters
+ *  dictBuffer - a dictionary if used, null otherwise
+ *  dictBufferSize - size of dictBuffer, 0 otherwise
+ *  diplayLevel - see BMK_benchFiles
+ *  displayName - name used by display
+ * @return:
+ *      a variant, which expresses either an error, or a valid result.
+ *      Use BMK_isSuccessful_benchOutcome() to check if function was successful.
+ *      If yes, extract the valid result with BMK_extract_benchResult(),
+ *      it will contain :
+ *          .cSpeed: compression speed in bytes per second,
+ *          .dSpeed: decompression speed in bytes per second,
+ *          .cSize : compressed size, in bytes
+ *          .cMem  : memory budget required for the compression context
+ */
+BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
+                        const size_t* fileSizes, unsigned nbFiles,
+                        int cLevel, const ZSTD_compressionParameters* comprParams,
+                        const void* dictBuffer, size_t dictBufferSize,
+                        int displayLevel, const char* displayName);
+
+/* BMK_benchMemAdvanced() : same as BMK_benchMem()
+ * with following additional options :
+ * dstBuffer - destination buffer to write compressed output in, NULL if none provided.
+ * dstCapacity - capacity of destination buffer, give 0 if dstBuffer = NULL
+ * adv = see advancedParams_t
+ */
+BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
+                        void* dstBuffer, size_t dstCapacity,
+                        const size_t* fileSizes, unsigned nbFiles,
+                        int cLevel, const ZSTD_compressionParameters* comprParams,
+                        const void* dictBuffer, size_t dictBufferSize,
+                        int displayLevel, const char* displayName,
+                        const BMK_advancedParams_t* adv);
+
+
+
+/* ====  Benchmarking any function, iterated on a set of blocks  ==== */
+
+typedef struct {
+    unsigned long long nanoSecPerRun;  /* time per iteration */
+    size_t sumOfReturn;       /* sum of return values */
+} BMK_runTime_t;
+
+VARIANT_ERROR_RESULT(BMK_runTime_t, BMK_runOutcome_t);
+
+/* check first if the return structure represents an error or a valid result */
+int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome);
+
+/* extract result from variant type.
+ * note : this function will abort() program execution if result is not valid
+ *        check result validity first, by using BMK_isSuccessful_runOutcome()
+ */
+BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome);
+
+
+
+typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload);
+typedef size_t (*BMK_initFn_t)(void* initPayload);
+
+
+/* BMK_benchFunction() :
+ * This function times the execution of 2 argument functions, benchFn and initFn  */
+
+/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload)
+ *      is run nbLoops times
+ * initFn - (*initFn)(initPayload) is run once per benchmark, at the beginning.
+ *      This argument can be NULL, in which case nothing is run.
+ * blockCount - number of blocks. Size of all array parameters : srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults
+ * srcBuffers - an array of buffers to be operated on by benchFn
+ * srcSizes - an array of the sizes of above buffers
+ * dstBuffers - an array of buffers to be written into by benchFn
+ * dstCapacities - an array of the capacities of above buffers
+ * blockResults - Optional: store the return value of benchFn for each block. Use NULL if this result is not requested.
+ * nbLoops - defines number of times benchFn is run.
+ * @return: a variant, which express either an error, or can generate a valid BMK_runTime_t result.
+ *          Use BMK_isSuccessful_runOutcome() to check if function was successful.
+ *          If yes, extract the result with BMK_extract_runTime(),
+ *          it will contain :
+ *              .sumOfReturn : the sum of all return values of benchFn through all of blocks
+ *              .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops)
+ *          .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer,
+ *              in which case, this value will be the total amount of bytes written into dstBuffer.
+ */
+BMK_runOutcome_t BMK_benchFunction(
+                        BMK_benchFn_t benchFn, void* benchPayload,
+                        BMK_initFn_t initFn, void* initPayload,
+                        size_t blockCount,
+                        const void *const * srcBuffers, const size_t* srcSizes,
+                        void *const * dstBuffers, const size_t* dstCapacities,
+                        size_t* blockResults,
+                        unsigned nbLoops);
+
+
+
+/* ====  Benchmark any function, providing intermediate results  ==== */
+
+/* state information tracking benchmark session */
+typedef struct BMK_timedFnState_s BMK_timedFnState_t;
+
+/* BMK_createTimedFnState() and BMK_resetTimedFnState() :
+ * Create/Set BMK_timedFnState_t for next benchmark session,
+ * which shall last a minimum of total_ms milliseconds,
+ * producing intermediate results, paced at interval of (approximately) run_ms.
+ */
+BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms);
+void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms);
+void BMK_freeTimedFnState(BMK_timedFnState_t* state);
+
+
+/* Tells if duration of all benchmark runs has exceeded total_ms
+ */
+int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState);
+
+
+/* BMK_benchTimedFn() :
+ * Similar to BMK_benchFunction(), most arguments being identical.
+ * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms.
+ * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms.
+ * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms)
+ *         call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms
+ *         Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn()
+ */
+BMK_runOutcome_t BMK_benchTimedFn(
+                    BMK_timedFnState_t* timedFnState,
+                    BMK_benchFn_t benchFn, void* benchPayload,
+                    BMK_initFn_t initFn, void* initPayload,
+                    size_t blockCount,
+                    const void *const * srcBlockBuffers, const size_t* srcBlockSizes,
+                    void *const * dstBlockBuffers, const size_t* dstBlockCapacities,
+                    size_t* blockResults);
+
+
+
+
 
 #endif   /* BENCH_H_121279284357 */
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/programs/datagen.c b/programs/datagen.c
index a489d6af08d78..c8383658488b1 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -13,6 +13,7 @@
 /*-************************************
 *  Dependencies
 **************************************/
+#include "datagen.h"
 #include "platform.h"  /* SET_BINARY_MODE */
 #include <stdlib.h>    /* malloc, free */
 #include <stdio.h>     /* FILE, fwrite, fprintf */
@@ -91,7 +92,7 @@ static U32 RDG_randLength(unsigned* seedPtr)
     return (RDG_rand(seedPtr) & 0x1FF) + 0xF;
 }
 
-void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, const BYTE* ldt, unsigned* seedPtr)
+static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, const BYTE* ldt, unsigned* seedPtr)
 {
     BYTE* const buffPtr = (BYTE*)buffer;
     U32 const matchProba32 = (U32)(32768 * matchProba);
diff --git a/programs/dibio.c b/programs/dibio.c
index 112259ddcd054..d3fd8cc053de0 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -27,6 +27,7 @@
 #include <string.h>         /* memset */
 #include <stdio.h>          /* fprintf, fopen, ftello64 */
 #include <errno.h>          /* errno */
+#include <assert.h>
 
 #include "mem.h"            /* read */
 #include "error_private.h"
@@ -43,6 +44,7 @@
 #define SAMPLESIZE_MAX (128 KB)
 #define MEMMULT 11    /* rough estimation : memory cost to analyze 1 byte of sample */
 #define COVER_MEMMULT 9    /* rough estimation : memory cost to analyze 1 byte of sample */
+#define FASTCOVER_MEMMULT 1    /* rough estimation : memory cost to analyze 1 byte of sample */
 static const size_t g_maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
 
 #define NOISELENGTH 32
@@ -82,10 +84,6 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 /* ********************************************************
 *  Helper functions
 **********************************************************/
-unsigned DiB_isError(size_t errorCode) { return ERR_isError(errorCode); }
-
-const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
-
 #undef MIN
 #define MIN(a,b)    ((a) < (b) ? (a) : (b))
 
@@ -165,6 +163,7 @@ static U32 DiB_rand(U32* src)
 static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
     U32 seed = 0xFD2FB528;
     unsigned i;
+    assert(nbFiles >= 1);
     for (i = nbFiles - 1; i > 0; --i) {
         unsigned const j = DiB_rand(&seed) % (i + 1);
         const char* const tmp = fileNamesTable[j];
@@ -269,16 +268,19 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCa
 
 int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
                        const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
-                       ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams,
-                       int optimizeCover)
+                       ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams,
+                       ZDICT_fastCover_params_t* fastCoverParams, int optimize)
 {
     unsigned const displayLevel = params ? params->zParams.notificationLevel :
                         coverParams ? coverParams->zParams.notificationLevel :
+                        fastCoverParams ? fastCoverParams->zParams.notificationLevel :
                         0;   /* should never happen */
     void* const dictBuffer = malloc(maxDictSize);
     fileStats const fs = DiB_fileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
     size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
-    size_t const memMult = params ? MEMMULT : COVER_MEMMULT;
+    size_t const memMult = params ? MEMMULT :
+                           coverParams ? COVER_MEMMULT:
+                           FASTCOVER_MEMMULT;
     size_t const maxMem =  DiB_findMaxMem(fs.totalSizeToLoad * memMult) / memMult;
     size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);
     void* const srcBuffer = malloc(loadedSize+NOISELENGTH);
@@ -310,7 +312,8 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
     /* Load input buffer */
     DISPLAYLEVEL(3, "Shuffling input files\n");
     DiB_shuffle(fileNamesTable, nbFiles);
-    nbFiles = DiB_loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel);
+
+    DiB_loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel);
 
     {   size_t dictSize;
         if (params) {
@@ -318,16 +321,36 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
             dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize,
                                                            srcBuffer, sampleSizes, fs.nbSamples,
                                                            *params);
-        } else if (optimizeCover) {
-            dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize,
-                                                           srcBuffer, sampleSizes, fs.nbSamples,
-                                                           coverParams);
-            if (!ZDICT_isError(dictSize)) {
-                DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
+        } else if (coverParams) {
+            if (optimize) {
+              dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize,
+                                                             srcBuffer, sampleSizes, fs.nbSamples,
+                                                             coverParams);
+              if (!ZDICT_isError(dictSize)) {
+                  unsigned splitPercentage = (unsigned)(coverParams->splitPoint * 100);
+                  DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParams->k, coverParams->d,
+                              coverParams->steps, splitPercentage);
+              }
+            } else {
+              dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
+                                                     sampleSizes, fs.nbSamples, *coverParams);
             }
         } else {
-            dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
-                                                   sampleSizes, fs.nbSamples, *coverParams);
+            assert(fastCoverParams != NULL);
+            if (optimize) {
+              dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize,
+                                                              srcBuffer, sampleSizes, fs.nbSamples,
+                                                              fastCoverParams);
+              if (!ZDICT_isError(dictSize)) {
+                unsigned splitPercentage = (unsigned)(fastCoverParams->splitPoint * 100);
+                DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastCoverParams->k,
+                            fastCoverParams->d, fastCoverParams->f, fastCoverParams->steps, splitPercentage,
+                            fastCoverParams->accel);
+              }
+            } else {
+              dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer,
+                                                        sampleSizes, fs.nbSamples, *fastCoverParams);
+            }
         }
         if (ZDICT_isError(dictSize)) {
             DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize));   /* should not happen */
diff --git a/programs/dibio.h b/programs/dibio.h
index 499e3036520cd..ea163fe6afd9c 100644
--- a/programs/dibio.h
+++ b/programs/dibio.h
@@ -33,7 +33,7 @@
 */
 int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
                        const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
-                       ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams,
-                       int optimizeCover);
+                       ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams,
+                       ZDICT_fastCover_params_t* fastCoverParams, int optimize);
 
 #endif
diff --git a/programs/fileio.c b/programs/fileio.c
index 14569bb47528f..c24f4defbb9ad 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -20,7 +20,6 @@
 #  define _POSIX_SOURCE 1          /* disable %llu warnings with MinGW on Windows */
 #endif
 
-
 /*-*************************************
 *  Includes
 ***************************************/
@@ -29,16 +28,17 @@
 #include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */
 #include <stdlib.h>     /* malloc, free */
 #include <string.h>     /* strcmp, strlen */
+#include <assert.h>
 #include <errno.h>      /* errno */
+#include <signal.h>
 
 #if defined (_MSC_VER)
 #  include <sys/stat.h>
 #  include <io.h>
 #endif
 
-#include "mem.h"
+#include "mem.h"       /* U32, U64 */
 #include "fileio.h"
-#include "util.h"
 
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
 #include "zstd.h"
@@ -70,6 +70,7 @@
 #define MB *(1<<20)
 #define GB *(1U<<30)
 
+#define ADAPT_WINDOWLOG_DEFAULT 23   /* 8 MB */
 #define DICTSIZE_MAX (32 MB)   /* protection against large input (attack scenario) */
 
 #define FNSPACE 30
@@ -101,25 +102,10 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 #define MIN(a,b)    ((a) < (b) ? (a) : (b))
 
 
-/*-*************************************
-*  Debug
-***************************************/
-#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
-#  include <assert.h>
-#else
-#  ifndef assert
-#    define assert(condition) ((void)0)
-#  endif
-#endif
-
-#ifndef ZSTD_DEBUG
-#  define ZSTD_DEBUG 0
-#endif
-#define DEBUGLOG(l,...) if (l<=ZSTD_DEBUG) DISPLAY(__VA_ARGS__);
 #define EXM_THROW(error, ...)                                             \
 {                                                                         \
     DISPLAYLEVEL(1, "zstd: ");                                            \
-    DEBUGLOG(1, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \
     DISPLAYLEVEL(1, "error %i : ", error);                                \
     DISPLAYLEVEL(1, __VA_ARGS__);                                         \
     DISPLAYLEVEL(1, " \n");                                               \
@@ -129,7 +115,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 #define CHECK_V(v, f)                                \
     v = f;                                           \
     if (ZSTD_isError(v)) {                           \
-        DEBUGLOG(1, "%s \n", #f);                    \
+        DISPLAYLEVEL(5, "%s \n", #f);                \
         EXM_THROW(11, "%s", ZSTD_getErrorName(v));   \
     }
 #define CHECK(f) { size_t err; CHECK_V(err, f); }
@@ -138,8 +124,6 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 /*-************************************
 *  Signal (Ctrl-C trapping)
 **************************************/
-#include  <signal.h>
-
 static const char* g_artefact = NULL;
 static void INThandler(int sig)
 {
@@ -171,8 +155,85 @@ static void clearHandler(void)
 }
 
 
-/* ************************************************************
-* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
+/*-*********************************************************
+*  Termination signal trapping (Print debug stack trace)
+***********************************************************/
+#if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
+#  if (__has_feature(address_sanitizer))
+#    define BACKTRACE_ENABLE 0
+#  endif /* __has_feature(address_sanitizer) */
+#elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
+#  define BACKTRACE_ENABLE 0
+#endif
+
+#if !defined(BACKTRACE_ENABLE)
+/* automatic detector : backtrace enabled by default on linux+glibc and osx */
+#  if (defined(__linux__) && defined(__GLIBC__)) \
+     || (defined(__APPLE__) && defined(__MACH__))
+#    define BACKTRACE_ENABLE 1
+#  else
+#    define BACKTRACE_ENABLE 0
+#  endif
+#endif
+
+/* note : after this point, BACKTRACE_ENABLE is necessarily defined */
+
+
+#if BACKTRACE_ENABLE
+
+#include <execinfo.h>   /* backtrace, backtrace_symbols */
+
+#define MAX_STACK_FRAMES    50
+
+static void ABRThandler(int sig) {
+    const char* name;
+    void* addrlist[MAX_STACK_FRAMES];
+    char** symbollist;
+    U32 addrlen, i;
+
+    switch (sig) {
+        case SIGABRT: name = "SIGABRT"; break;
+        case SIGFPE: name = "SIGFPE"; break;
+        case SIGILL: name = "SIGILL"; break;
+        case SIGINT: name = "SIGINT"; break;
+        case SIGSEGV: name = "SIGSEGV"; break;
+        default: name = "UNKNOWN";
+    }
+
+    DISPLAY("Caught %s signal, printing stack:\n", name);
+    /* Retrieve current stack addresses. */
+    addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
+    if (addrlen == 0) {
+        DISPLAY("\n");
+        return;
+    }
+    /* Create readable strings to each frame. */
+    symbollist = backtrace_symbols(addrlist, addrlen);
+    /* Print the stack trace, excluding calls handling the signal. */
+    for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
+        DISPLAY("%s\n", symbollist[i]);
+    }
+    free(symbollist);
+    /* Reset and raise the signal so default handler runs. */
+    signal(sig, SIG_DFL);
+    raise(sig);
+}
+#endif
+
+void FIO_addAbortHandler()
+{
+#if BACKTRACE_ENABLE
+    signal(SIGABRT, ABRThandler);
+    signal(SIGFPE, ABRThandler);
+    signal(SIGILL, ABRThandler);
+    signal(SIGSEGV, ABRThandler);
+    signal(SIGBUS, ABRThandler);
+#endif
+}
+
+
+/*-************************************************************
+* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
 ***************************************************************/
 #if defined(_MSC_VER) && _MSC_VER >= 1400
 #   define LONG_SEEK _fseeki64
@@ -240,6 +301,26 @@ void FIO_setOverlapLog(unsigned overlapLog){
         DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
     g_overlapLog = overlapLog;
 }
+static U32 g_adaptiveMode = 0;
+void FIO_setAdaptiveMode(unsigned adapt) {
+    if ((adapt>0) && (g_nbWorkers==0))
+        EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
+    g_adaptiveMode = adapt;
+}
+static int g_minAdaptLevel = -50;   /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
+void FIO_setAdaptMin(int minCLevel)
+{
+#ifndef ZSTD_NOCOMPRESS
+    assert(minCLevel >= ZSTD_minCLevel());
+#endif
+    g_minAdaptLevel = minCLevel;
+}
+static int g_maxAdaptLevel = 22;   /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
+void FIO_setAdaptMax(int maxCLevel)
+{
+    g_maxAdaptLevel = maxCLevel;
+}
+
 static U32 g_ldmFlag = 0;
 void FIO_setLdmFlag(unsigned ldmFlag) {
     g_ldmFlag = (ldmFlag>0);
@@ -418,7 +499,7 @@ typedef struct {
 
 static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
                                     U64 srcSize,
-                                    ZSTD_compressionParameters* comprParams) {
+                                    ZSTD_compressionParameters comprParams) {
     cRess_t ress;
     memset(&ress, 0, sizeof(ress));
 
@@ -439,6 +520,9 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
         if (dictFileName && (dictBuffer==NULL))
             EXM_THROW(32, "allocation error : can't create dictBuffer");
 
+        if (g_adaptiveMode && !g_ldmFlag && !comprParams.windowLog)
+            comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
+
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_contentSizeFlag, 1) );  /* always enable content size when available (note: supposed to be default) */
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) );
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) );
@@ -455,17 +539,24 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
             CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) );
         }
         /* compression parameters */
-        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) );
-        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) );
-        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams->hashLog) );
-        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_searchLog, comprParams->searchLog) );
-        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_minMatch, comprParams->searchLength) );
-        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) );
-        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) );
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams.windowLog) );
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams.chainLog) );
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams.hashLog) );
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_searchLog, comprParams.searchLog) );
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_minMatch, comprParams.searchLength) );
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams.targetLength) );
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams.strategy) );
         /* multi-threading */
 #ifdef ZSTD_MULTITHREAD
         DISPLAYLEVEL(5,"set nb workers = %u \n", g_nbWorkers);
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbWorkers, g_nbWorkers) );
+        if ( (g_overlapLog == FIO_OVERLAP_LOG_NOTSET)
+          && (cLevel == ZSTD_maxCLevel()) )
+            g_overlapLog = 9;   /* full overlap */
+        if (g_overlapLog != FIO_OVERLAP_LOG_NOTSET) {
+            DISPLAYLEVEL(3,"set overlapLog = %u \n", g_overlapLog);
+            CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_overlapSizeLog, g_overlapLog) );
+        }
 #endif
         /* dictionary */
         CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );  /* set the value temporarily for dictionary loading, to adapt compression parameters */
@@ -487,7 +578,8 @@ static void FIO_freeCResources(cRess_t ress)
 
 
 #ifdef ZSTD_GZCOMPRESS
-static unsigned long long FIO_compressGzFrame(cRess_t* ress,
+static unsigned long long
+FIO_compressGzFrame(cRess_t* ress,
                     const char* srcFileName, U64 const srcFileSize,
                     int compressionLevel, U64* readsize)
 {
@@ -569,9 +661,10 @@ static unsigned long long FIO_compressGzFrame(cRess_t* ress,
 
 
 #ifdef ZSTD_LZMACOMPRESS
-static unsigned long long FIO_compressLzmaFrame(cRess_t* ress,
-                            const char* srcFileName, U64 const srcFileSize,
-                            int compressionLevel, U64* readsize, int plain_lzma)
+static unsigned long long
+FIO_compressLzmaFrame(cRess_t* ress,
+                      const char* srcFileName, U64 const srcFileSize,
+                      int compressionLevel, U64* readsize, int plain_lzma)
 {
     unsigned long long inFileSize = 0, outFileSize = 0;
     lzma_stream strm = LZMA_STREAM_INIT;
@@ -644,9 +737,10 @@ static unsigned long long FIO_compressLzmaFrame(cRess_t* ress,
 #define LZ4F_max64KB max64KB
 #endif
 static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
-static unsigned long long FIO_compressLz4Frame(cRess_t* ress,
-                            const char* srcFileName, U64 const srcFileSize,
-                            int compressionLevel, U64* readsize)
+static unsigned long long
+FIO_compressLz4Frame(cRess_t* ress,
+                     const char* srcFileName, U64 const srcFileSize,
+                     int compressionLevel, U64* readsize)
 {
     const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
     unsigned long long inFileSize = 0, outFileSize = 0;
@@ -734,11 +828,6 @@ static unsigned long long FIO_compressLz4Frame(cRess_t* ress,
 #endif
 
 
-/*! FIO_compressFilename_internal() :
- *  same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
- *  @return : 0 : compression completed correctly,
- *            1 : missing or pb opening srcFileName
- */
 static unsigned long long
 FIO_compressZstdFrame(const cRess_t* ressPtr,
                       const char* srcFileName, U64 fileSize,
@@ -749,16 +838,28 @@ FIO_compressZstdFrame(const cRess_t* ressPtr,
     FILE* const dstFile = ress.dstFile;
     U64 compressedfilesize = 0;
     ZSTD_EndDirective directive = ZSTD_e_continue;
+
+    /* stats */
+    ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
+    ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
+    typedef enum { noChange, slower, faster } speedChange_e;
+    speedChange_e speedChange = noChange;
+    unsigned flushWaiting = 0;
+    unsigned inputPresented = 0;
+    unsigned inputBlocked = 0;
+    unsigned lastJobID = 0;
+
     DISPLAYLEVEL(6, "compression using zstd format \n");
 
     /* init */
-    if (fileSize != UTIL_FILESIZE_UNKNOWN)
-        ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize);
-    (void)compressionLevel; (void)srcFileName;
+    if (fileSize != UTIL_FILESIZE_UNKNOWN) {
+        CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
+    }
+    (void)srcFileName;
 
     /* Main compression loop */
     do {
-        size_t result;
+        size_t stillToFlush;
         /* Fill input Buffer */
         size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
         ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
@@ -768,41 +869,149 @@ FIO_compressZstdFrame(const cRess_t* ressPtr,
         if ((inSize == 0) || (*readsize == fileSize))
             directive = ZSTD_e_end;
 
-        result = 1;
-        while (inBuff.pos != inBuff.size || (directive == ZSTD_e_end && result != 0)) {
+        stillToFlush = 1;
+        while ((inBuff.pos != inBuff.size)   /* input buffer must be entirely ingested */
+            || (directive == ZSTD_e_end && stillToFlush != 0) ) {
+
+            size_t const oldIPos = inBuff.pos;
             ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
-            CHECK_V(result, ZSTD_compress_generic(ress.cctx, &outBuff, &inBuff, directive));
+            size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
+            CHECK_V(stillToFlush, ZSTD_compress_generic(ress.cctx, &outBuff, &inBuff, directive));
+
+            /* count stats */
+            inputPresented++;
+            if (oldIPos == inBuff.pos) inputBlocked++;  /* input buffer is full and can't take any more : input speed is faster than consumption rate */
+            if (!toFlushNow) flushWaiting = 1;
 
             /* Write compressed stream */
-            DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => intput pos(%u)<=(%u)size ; output generated %u bytes \n",
+            DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
                             (U32)directive, (U32)inBuff.pos, (U32)inBuff.size, (U32)outBuff.pos);
             if (outBuff.pos) {
                 size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
-                if (sizeCheck!=outBuff.pos)
+                if (sizeCheck != outBuff.pos)
                     EXM_THROW(25, "Write error : cannot write compressed block");
                 compressedfilesize += outBuff.pos;
             }
+
+            /* display notification; and adapt compression level */
             if (READY_FOR_UPDATE()) {
                 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
                 double const cShare = (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
+
+                /* display progress notifications */
                 if (g_displayLevel >= 3) {
-                    DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%%",
+                    DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%% ",
                                 compressionLevel,
                                 (U32)((zfp.ingested - zfp.consumed) >> 20),
                                 (U32)(zfp.consumed >> 20),
                                 (U32)(zfp.produced >> 20),
                                 cShare );
-                } else {   /* g_displayLevel == 2 */
+                } else {   /* summarized notifications if == 2; */
                     DISPLAYLEVEL(2, "\rRead : %u ", (U32)(zfp.consumed >> 20));
                     if (fileSize != UTIL_FILESIZE_UNKNOWN)
                         DISPLAYLEVEL(2, "/ %u ", (U32)(fileSize >> 20));
                     DISPLAYLEVEL(2, "MB ==> %2.f%% ", cShare);
                     DELAY_NEXT_UPDATE();
                 }
-            }
-        }
+
+                /* adaptive mode : statistics measurement and speed correction */
+                if (g_adaptiveMode) {
+
+                    /* check output speed */
+                    if (zfp.currentJobID > 1) {  /* only possible if nbWorkers >= 1 */
+
+                        unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
+                        unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
+                        assert(zfp.produced >= previous_zfp_update.produced);
+                        assert(g_nbWorkers >= 1);
+
+                        /* test if compression is blocked
+                         * either because output is slow and all buffers are full
+                         * or because input is slow and no job can start while waiting for at least one buffer to be filled.
+                         * note : excluse starting part, since currentJobID > 1 */
+                        if ( (zfp.consumed == previous_zfp_update.consumed)   /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
+                          && (zfp.nbActiveWorkers == 0)                       /* confirmed : no compression ongoing */
+                          ) {
+                            DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
+                            speedChange = slower;
+                        }
+
+                        previous_zfp_update = zfp;
+
+                        if ( (newlyProduced > (newlyFlushed * 9 / 8))   /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
+                          && (flushWaiting == 0)                        /* flush speed was never slowed by lack of production, so it's operating at max capacity */
+                          ) {
+                            DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
+                            speedChange = slower;
+                        }
+                        flushWaiting = 0;
+                    }
+
+                    /* course correct only if there is at least one new job completed */
+                    if (zfp.currentJobID > lastJobID) {
+                        DISPLAYLEVEL(6, "compression level adaptation check \n")
+
+                        /* check input speed */
+                        if (zfp.currentJobID > g_nbWorkers+1) {   /* warm up period, to fill all workers */
+                            if (inputBlocked <= 0) {
+                                DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
+                                speedChange = slower;
+                            } else if (speedChange == noChange) {
+                                unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
+                                unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
+                                unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
+                                unsigned long long newlyFlushed  = zfp.flushed  - previous_zfp_correction.flushed;
+                                previous_zfp_correction = zfp;
+                                assert(inputPresented > 0);
+                                DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
+                                                inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
+                                                (U32)newlyIngested, (U32)newlyConsumed,
+                                                (U32)newlyFlushed, (U32)newlyProduced);
+                                if ( (inputBlocked > inputPresented / 8)     /* input is waiting often, because input buffers is full : compression or output too slow */
+                                  && (newlyFlushed * 33 / 32 > newlyProduced)  /* flush everything that is produced */
+                                  && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
+                                ) {
+                                    DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
+                                                    newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
+                                    speedChange = faster;
+                                }
+                            }
+                            inputBlocked = 0;
+                            inputPresented = 0;
+                        }
+
+                        if (speedChange == slower) {
+                            DISPLAYLEVEL(6, "slower speed , higher compression \n")
+                            compressionLevel ++;
+                            if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
+                            if (compressionLevel > g_maxAdaptLevel) compressionLevel = g_maxAdaptLevel;
+                            compressionLevel += (compressionLevel == 0);   /* skip 0 */
+                            ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, (unsigned)compressionLevel);
+                        }
+                        if (speedChange == faster) {
+                            DISPLAYLEVEL(6, "faster speed , lighter compression \n")
+                            compressionLevel --;
+                            if (compressionLevel < g_minAdaptLevel) compressionLevel = g_minAdaptLevel;
+                            compressionLevel -= (compressionLevel == 0);   /* skip 0 */
+                            ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, (unsigned)compressionLevel);
+                        }
+                        speedChange = noChange;
+
+                        lastJobID = zfp.currentJobID;
+                    }  /* if (zfp.currentJobID > lastJobID) */
+                }  /* if (g_adaptiveMode) */
+            }  /* if (READY_FOR_UPDATE()) */
+        }  /* while ((inBuff.pos != inBuff.size) */
     } while (directive != ZSTD_e_end);
 
+    if (ferror(srcFile)) {
+        EXM_THROW(26, "Read error : I/O error");
+    }
+    if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
+        EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
+                (unsigned long long)*readsize, (unsigned long long)fileSize);
+    }
+
     return compressedfilesize;
 }
 
@@ -872,14 +1081,80 @@ FIO_compressFilename_internal(cRess_t ress,
 }
 
 
+/*! FIO_compressFilename_dstFile() :
+ *  open dstFileName, or pass-through if ress.dstFile != NULL,
+ *  then start compression with FIO_compressFilename_internal().
+ *  Manages source removal (--rm) and file permissions transfer.
+ *  note : ress.srcFile must be != NULL,
+ *  so reach this function through FIO_compressFilename_srcFile().
+ *  @return : 0 : compression completed correctly,
+ *            1 : pb
+ */
+static int FIO_compressFilename_dstFile(cRess_t ress,
+                                        const char* dstFileName,
+                                        const char* srcFileName,
+                                        int compressionLevel)
+{
+    int closeDstFile = 0;
+    int result;
+    stat_t statbuf;
+    int transfer_permissions = 0;
+
+    assert(ress.srcFile != NULL);
+
+    if (ress.dstFile == NULL) {
+        closeDstFile = 1;
+        DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName);
+        ress.dstFile = FIO_openDstFile(dstFileName);
+        if (ress.dstFile==NULL) return 1;  /* could not open dstFileName */
+        /* Must only be added after FIO_openDstFile() succeeds.
+         * Otherwise we may delete the destination file if it already exists,
+         * and the user presses Ctrl-C when asked if they wish to overwrite.
+         */
+        addHandler(dstFileName);
+
+        if ( strcmp (srcFileName, stdinmark)
+          && UTIL_getFileStat(srcFileName, &statbuf))
+            transfer_permissions = 1;
+    }
+
+    result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, compressionLevel);
+
+    if (closeDstFile) {
+        FILE* const dstFile = ress.dstFile;
+        ress.dstFile = NULL;
+
+        clearHandler();
+
+        if (fclose(dstFile)) { /* error closing dstFile */
+            DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
+            result=1;
+        }
+        if ( (result != 0)  /* operation failure */
+          && strcmp(dstFileName, nulmark)     /* special case : don't remove() /dev/null */
+          && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
+          ) {
+            FIO_remove(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
+        } else if ( strcmp(dstFileName, stdoutmark)
+                 && strcmp(dstFileName, nulmark)
+                 && transfer_permissions) {
+            UTIL_setFileStat(dstFileName, &statbuf);
+        }
+    }
+
+    return result;
+}
+
+
 /*! FIO_compressFilename_srcFile() :
- *  note : ress.destFile already opened
  *  @return : 0 : compression completed correctly,
  *            1 : missing or pb opening srcFileName
  */
-static int FIO_compressFilename_srcFile(cRess_t ress,
-                            const char* dstFileName, const char* srcFileName,
-                            int compressionLevel)
+static int
+FIO_compressFilename_srcFile(cRess_t ress,
+                             const char* dstFileName,
+                             const char* srcFileName,
+                             int compressionLevel)
 {
     int result;
 
@@ -890,12 +1165,16 @@ static int FIO_compressFilename_srcFile(cRess_t ress,
     }
 
     ress.srcFile = FIO_openSrcFile(srcFileName);
-    if (!ress.srcFile) return 1;   /* srcFile could not be opened */
+    if (ress.srcFile == NULL) return 1;   /* srcFile could not be opened */
 
-    result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, compressionLevel);
+    result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
 
     fclose(ress.srcFile);
-    if (g_removeSrcFile /* --rm */ && !result && strcmp(srcFileName, stdinmark)) {
+    ress.srcFile = NULL;
+    if ( g_removeSrcFile   /* --rm */
+      && result == 0       /* success */
+      && strcmp(srcFileName, stdinmark)   /* exception : don't erase stdin */
+      ) {
         /* We must clear the handler, since after this point calling it would
          * delete both the source and destination files.
          */
@@ -907,59 +1186,16 @@ static int FIO_compressFilename_srcFile(cRess_t ress,
 }
 
 
-/*! FIO_compressFilename_dstFile() :
- *  @return : 0 : compression completed correctly,
- *            1 : pb
- */
-static int FIO_compressFilename_dstFile(cRess_t ress,
-                                        const char* dstFileName,
-                                        const char* srcFileName,
-                                        int compressionLevel)
-{
-    int result;
-    stat_t statbuf;
-    int stat_result = 0;
-
-    DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName);
-    ress.dstFile = FIO_openDstFile(dstFileName);
-    if (ress.dstFile==NULL) return 1;  /* could not open dstFileName */
-    /* Must ony be added after FIO_openDstFile() succeeds.
-     * Otherwise we may delete the destination file if at already exists, and
-     * the user presses Ctrl-C when asked if they wish to overwrite.
-     */
-    addHandler(dstFileName);
-
-    if (strcmp (srcFileName, stdinmark) && UTIL_getFileStat(srcFileName, &statbuf))
-        stat_result = 1;
-    result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, compressionLevel);
-    clearHandler();
-
-    if (fclose(ress.dstFile)) { /* error closing dstFile */
-        DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
-        result=1;
-    }
-    if ( (result != 0)  /* operation failure */
-      && strcmp(dstFileName, nulmark)      /* special case : don't remove() /dev/null */
-      && strcmp(dstFileName, stdoutmark) ) /* special case : don't remove() stdout */
-        FIO_remove(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
-    else if ( strcmp(dstFileName, stdoutmark)
-           && strcmp(dstFileName, nulmark)
-           && stat_result)
-        UTIL_setFileStat(dstFileName, &statbuf);
-
-    return result;
-}
-
-
 int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
-                         const char* dictFileName, int compressionLevel, ZSTD_compressionParameters* comprParams)
+                         const char* dictFileName, int compressionLevel,
+                         ZSTD_compressionParameters comprParams)
 {
     clock_t const start = clock();
     U64 const fileSize = UTIL_getFileSize(srcFileName);
     U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
 
     cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
-    int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
+    int const result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, compressionLevel);
 
     double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
     DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
@@ -969,57 +1205,77 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
 }
 
 
+/* FIO_determineCompressedName() :
+ * create a destination filename for compressed srcFileName.
+ * @return a pointer to it.
+ * This function never returns an error (it may abort() in case of pb)
+ */
+static const char*
+FIO_determineCompressedName(const char* srcFileName, const char* suffix)
+{
+    static size_t dfnbCapacity = 0;
+    static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
+
+    size_t const sfnSize = strlen(srcFileName);
+    size_t const suffixSize = strlen(suffix);
+
+    if (dfnbCapacity <= sfnSize+suffixSize+1) {
+        /* resize buffer for dstName */
+        free(dstFileNameBuffer);
+        dfnbCapacity = sfnSize + suffixSize + 30;
+        dstFileNameBuffer = (char*)malloc(dfnbCapacity);
+        if (!dstFileNameBuffer) {
+            EXM_THROW(30, "zstd: %s", strerror(errno));
+    }   }
+    assert(dstFileNameBuffer != NULL);
+    memcpy(dstFileNameBuffer, srcFileName, sfnSize);
+    memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);
+
+    return dstFileNameBuffer;
+}
+
+
+/* FIO_compressMultipleFilenames() :
+ * compress nbFiles files
+ * into one destination (outFileName)
+ * or into one file each (outFileName == NULL, but suffix != NULL).
+ */
 int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFiles,
                                   const char* outFileName, const char* suffix,
                                   const char* dictFileName, int compressionLevel,
-                                  ZSTD_compressionParameters* comprParams)
+                                  ZSTD_compressionParameters comprParams)
 {
-    int missed_files = 0;
-    size_t dfnSize = FNSPACE;
-    char*  dstFileName = (char*)malloc(FNSPACE);
-    size_t const suffixSize = suffix ? strlen(suffix) : 0;
+    int error = 0;
     U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]);
     U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
     U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
     cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
 
     /* init */
-    if (dstFileName==NULL)
-        EXM_THROW(27, "FIO_compressMultipleFilenames : allocation error for dstFileName");
-    if (outFileName == NULL && suffix == NULL)
-        EXM_THROW(28, "FIO_compressMultipleFilenames : dst unknown");  /* should never happen */
+    assert(outFileName != NULL || suffix != NULL);
 
-    /* loop on each file */
-    if (outFileName != NULL) {
-        unsigned u;
+    if (outFileName != NULL) {   /* output into a single destination (stdout typically) */
         ress.dstFile = FIO_openDstFile(outFileName);
-        if (ress.dstFile==NULL) {  /* could not open outFileName */
-            missed_files = nbFiles;
+        if (ress.dstFile == NULL) {  /* could not open outFileName */
+            error = 1;
         } else {
+            unsigned u;
             for (u=0; u<nbFiles; u++)
-                missed_files += FIO_compressFilename_srcFile(ress, outFileName, inFileNamesTable[u], compressionLevel);
+                error |= FIO_compressFilename_srcFile(ress, outFileName, inFileNamesTable[u], compressionLevel);
             if (fclose(ress.dstFile))
-                EXM_THROW(29, "Write error : cannot properly close stdout");
+                EXM_THROW(29, "Write error : cannot properly close %s", outFileName);
+            ress.dstFile = NULL;
         }
     } else {
         unsigned u;
         for (u=0; u<nbFiles; u++) {
-            size_t const ifnSize = strlen(inFileNamesTable[u]);
-            if (dfnSize <= ifnSize+suffixSize+1) {  /* resize name buffer */
-                free(dstFileName);
-                dfnSize = ifnSize + 20;
-                dstFileName = (char*)malloc(dfnSize);
-                if (!dstFileName) {
-                    EXM_THROW(30, "zstd: %s", strerror(errno));
-            }   }
-            strcpy(dstFileName, inFileNamesTable[u]);
-            strcat(dstFileName, suffix);
-            missed_files += FIO_compressFilename_dstFile(ress, dstFileName, inFileNamesTable[u], compressionLevel);
+            const char* const srcFileName = inFileNamesTable[u];
+            const char* const dstFileName = FIO_determineCompressedName(srcFileName, suffix);  /* cannot fail */
+            error |= FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, compressionLevel);
     }   }
 
     FIO_freeCResources(ress);
-    free(dstFileName);
-    return missed_files;
+    return error;
 }
 
 #endif /* #ifndef ZSTD_NOCOMPRESS */
@@ -1208,12 +1464,12 @@ static void FIO_zstdErrorHelp(dRess_t* ress, size_t err, char const* srcFileName
     if (err == 0) {
         unsigned long long const windowSize = header.windowSize;
         U32 const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
-        U32 const windowMB = (U32)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
-        assert(windowSize < (U64)(1ULL << 52));
         assert(g_memLimit > 0);
         DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u\n",
                         srcFileName, windowSize, g_memLimit);
         if (windowLog <= ZSTD_WINDOWLOG_MAX) {
+            U32 const windowMB = (U32)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
+            assert(windowSize < (U64)(1ULL << 52));   /* ensure now overflow for windowMB */
             DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB\n",
                             srcFileName, windowLog, windowMB);
             return;
@@ -1227,7 +1483,7 @@ static void FIO_zstdErrorHelp(dRess_t* ress, size_t err, char const* srcFileName
  *  @return : size of decoded zstd frame, or an error code
 */
 #define FIO_ERROR_FRAME_DECODING   ((unsigned long long)(-2))
-unsigned long long FIO_decompressZstdFrame(dRess_t* ress,
+static unsigned long long FIO_decompressZstdFrame(dRess_t* ress,
                                        FILE* finput,
                                        const char* srcFileName,
                                        U64 alreadyDecoded)
@@ -1480,7 +1736,7 @@ static unsigned long long FIO_decompressLz4Frame(dRess_t* ress,
             if (LZ4F_isError(nextToLoad)) {
                 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
                                 srcFileName, LZ4F_getErrorName(nextToLoad));
-                decodingError = 1; break;
+                decodingError = 1; nextToLoad = 0; break;
             }
             pos += remaining;
 
@@ -1488,7 +1744,7 @@ static unsigned long long FIO_decompressLz4Frame(dRess_t* ress,
             if (decodedBytes) {
                 if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) {
                     DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
-                    decodingError = 1; break;
+                    decodingError = 1; nextToLoad = 0; break;
                 }
                 filesize += decodedBytes;
                 DISPLAYUPDATE(2, "\rDecompressed : %u MB  ", (unsigned)(filesize>>20));
@@ -1597,11 +1853,71 @@ static int FIO_decompressFrames(dRess_t ress, FILE* srcFile,
     return 0;
 }
 
+/** FIO_decompressDstFile() :
+    open `dstFileName`,
+    or path-through if ress.dstFile is already != 0,
+    then start decompression process (FIO_decompressFrames()).
+    @return : 0 : OK
+              1 : operation aborted
+*/
+static int FIO_decompressDstFile(dRess_t ress, FILE* srcFile,
+                                 const char* dstFileName, const char* srcFileName)
+{
+    int result;
+    stat_t statbuf;
+    int transfer_permissions = 0;
+    int releaseDstFile = 0;
+
+    if (ress.dstFile == NULL) {
+        releaseDstFile = 1;
+
+        ress.dstFile = FIO_openDstFile(dstFileName);
+        if (ress.dstFile==0) return 1;
+
+        /* Must only be added after FIO_openDstFile() succeeds.
+         * Otherwise we may delete the destination file if it already exists,
+         * and the user presses Ctrl-C when asked if they wish to overwrite.
+         */
+        addHandler(dstFileName);
+
+        if ( strcmp(srcFileName, stdinmark)   /* special case : don't transfer permissions from stdin */
+          && UTIL_getFileStat(srcFileName, &statbuf) )
+            transfer_permissions = 1;
+    }
+
+
+    result = FIO_decompressFrames(ress, srcFile, dstFileName, srcFileName);
+
+    if (releaseDstFile) {
+        FILE* const dstFile = ress.dstFile;
+        clearHandler();
+        ress.dstFile = NULL;
+        if (fclose(dstFile)) {
+            DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
+            result = 1;
+        }
+
+        if ( (result != 0)  /* operation failure */
+          && strcmp(dstFileName, nulmark)     /* special case : don't remove() /dev/null (#316) */
+          && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
+          ) {
+            FIO_remove(dstFileName);  /* remove decompression artefact; note: don't do anything special if remove() fails */
+        } else {  /* operation success */
+            if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */
+              && strcmp(dstFileName, nulmark)    /* special case : don't chmod /dev/null */
+              && transfer_permissions )          /* file permissions correctly extracted from src */
+                UTIL_setFileStat(dstFileName, &statbuf);  /* transfer file permissions from src into dst */
+        }
+    }
+
+    return result;
+}
+
 
 /** FIO_decompressSrcFile() :
-    Decompression `srcFileName` into `ress.dstFile`
+    Open `srcFileName`, transfer control to decompressDstFile()
     @return : 0 : OK
-              1 : operation not started
+              1 : error
 */
 static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const char* srcFileName)
 {
@@ -1615,16 +1931,17 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
 
     srcFile = FIO_openSrcFile(srcFileName);
     if (srcFile==NULL) return 1;
+    ress.srcBufferLoaded = 0;
 
-    result = FIO_decompressFrames(ress, srcFile, dstFileName, srcFileName);
+    result = FIO_decompressDstFile(ress, srcFile, dstFileName, srcFileName);
 
     /* Close file */
     if (fclose(srcFile)) {
         DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));  /* error should not happen */
         return 1;
     }
-    if ( g_removeSrcFile /* --rm */
-      && (result==0)     /* decompression successful */
+    if ( g_removeSrcFile  /* --rm */
+      && (result==0)      /* decompression successful */
       && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
         /* We must clear the handler, since after this point calling it would
          * delete both the source and destination files.
@@ -1639,73 +1956,94 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
 }
 
 
-/** FIO_decompressFile_extRess() :
-    decompress `srcFileName` into `dstFileName`
-    @return : 0 : OK
-              1 : operation aborted (src not available, dst already taken, etc.)
-*/
-static int FIO_decompressDstFile(dRess_t ress,
-                                 const char* dstFileName, const char* srcFileName)
-{
-    int result;
-    stat_t statbuf;
-    int stat_result = 0;
-
-    ress.dstFile = FIO_openDstFile(dstFileName);
-    if (ress.dstFile==0) return 1;
-    /* Must ony be added after FIO_openDstFile() succeeds.
-     * Otherwise we may delete the destination file if at already exists, and
-     * the user presses Ctrl-C when asked if they wish to overwrite.
-     */
-    addHandler(dstFileName);
-
-    if ( strcmp(srcFileName, stdinmark)
-      && UTIL_getFileStat(srcFileName, &statbuf) )
-        stat_result = 1;
-    result = FIO_decompressSrcFile(ress, dstFileName, srcFileName);
-    clearHandler();
-
-    if (fclose(ress.dstFile)) {
-        DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
-        result = 1;
-    }
-
-    if ( (result != 0)  /* operation failure */
-      && strcmp(dstFileName, nulmark)      /* special case : don't remove() /dev/null (#316) */
-      && strcmp(dstFileName, stdoutmark) ) /* special case : don't remove() stdout */
-        FIO_remove(dstFileName);  /* remove decompression artefact; note don't do anything special if remove() fails */
-    else {  /* operation success */
-        if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */
-          && strcmp(dstFileName, nulmark)    /* special case : don't chmod /dev/null */
-          && stat_result )                   /* file permissions correctly extracted from src */
-            UTIL_setFileStat(dstFileName, &statbuf);  /* transfer file permissions from src into dst */
-    }
-
-    signal(SIGINT, SIG_DFL);
-
-    return result;
-}
-
 
 int FIO_decompressFilename(const char* dstFileName, const char* srcFileName,
                            const char* dictFileName)
 {
     dRess_t const ress = FIO_createDResources(dictFileName);
 
-    int const decodingError = FIO_decompressDstFile(ress, dstFileName, srcFileName);
+    int const decodingError = FIO_decompressSrcFile(ress, dstFileName, srcFileName);
 
     FIO_freeDResources(ress);
     return decodingError;
 }
 
 
-#define MAXSUFFIXSIZE 8
-int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles,
-                                    const char* outFileName,
-                                    const char* dictFileName)
+/* FIO_determineDstName() :
+ * create a destination filename from a srcFileName.
+ * @return a pointer to it.
+ * @return == NULL if there is an error */
+static const char*
+FIO_determineDstName(const char* srcFileName)
 {
-    int skippedFiles = 0;
-    int missingFiles = 0;
+    static size_t dfnbCapacity = 0;
+    static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
+
+    size_t const sfnSize = strlen(srcFileName);
+    size_t suffixSize;
+    const char* const suffixPtr = strrchr(srcFileName, '.');
+    if (suffixPtr == NULL) {
+        DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n",
+                        srcFileName);
+        return NULL;
+    }
+    suffixSize = strlen(suffixPtr);
+
+    /* check suffix is authorized */
+    if (sfnSize <= suffixSize
+        || (   strcmp(suffixPtr, ZSTD_EXTENSION)
+        #ifdef ZSTD_GZDECOMPRESS
+            && strcmp(suffixPtr, GZ_EXTENSION)
+        #endif
+        #ifdef ZSTD_LZMADECOMPRESS
+            && strcmp(suffixPtr, XZ_EXTENSION)
+            && strcmp(suffixPtr, LZMA_EXTENSION)
+        #endif
+        #ifdef ZSTD_LZ4DECOMPRESS
+            && strcmp(suffixPtr, LZ4_EXTENSION)
+        #endif
+            ) ) {
+        const char* suffixlist = ZSTD_EXTENSION
+        #ifdef ZSTD_GZDECOMPRESS
+            "/" GZ_EXTENSION
+        #endif
+        #ifdef ZSTD_LZMADECOMPRESS
+            "/" XZ_EXTENSION "/" LZMA_EXTENSION
+        #endif
+        #ifdef ZSTD_LZ4DECOMPRESS
+            "/" LZ4_EXTENSION
+        #endif
+        ;
+        DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n",
+                     srcFileName, suffixlist);
+        return NULL;
+    }
+
+    /* allocate enough space to write dstFilename into it */
+    if (dfnbCapacity+suffixSize <= sfnSize+1) {
+        free(dstFileNameBuffer);
+        dfnbCapacity = sfnSize + 20;
+        dstFileNameBuffer = (char*)malloc(dfnbCapacity);
+        if (dstFileNameBuffer==NULL)
+            EXM_THROW(74, "not enough memory for dstFileName");
+    }
+
+    /* return dst name == src name truncated from suffix */
+    assert(dstFileNameBuffer != NULL);
+    memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
+    dstFileNameBuffer[sfnSize-suffixSize] = '\0';
+    return dstFileNameBuffer;
+
+    /* note : dstFileNameBuffer memory is not going to be free */
+}
+
+
+int
+FIO_decompressMultipleFilenames(const char* srcNamesTable[], unsigned nbFiles,
+                                const char* outFileName,
+                                const char* dictFileName)
+{
+    int error = 0;
     dRess_t ress = FIO_createDResources(dictFileName);
 
     if (outFileName) {
@@ -1713,55 +2051,22 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
         ress.dstFile = FIO_openDstFile(outFileName);
         if (ress.dstFile == 0) EXM_THROW(71, "cannot open %s", outFileName);
         for (u=0; u<nbFiles; u++)
-            missingFiles += FIO_decompressSrcFile(ress, outFileName, srcNamesTable[u]);
+            error |= FIO_decompressSrcFile(ress, outFileName, srcNamesTable[u]);
         if (fclose(ress.dstFile))
             EXM_THROW(72, "Write error : cannot properly close output file");
     } else {
-        size_t suffixSize;
-        size_t dfnSize = FNSPACE;
         unsigned u;
-        char* dstFileName = (char*)malloc(FNSPACE);
-        if (dstFileName==NULL)
-            EXM_THROW(73, "not enough memory for dstFileName");
         for (u=0; u<nbFiles; u++) {   /* create dstFileName */
             const char* const srcFileName = srcNamesTable[u];
-            const char* const suffixPtr = strrchr(srcFileName, '.');
-            size_t const sfnSize = strlen(srcFileName);
-            if (!suffixPtr) {
-                DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n",
-                                srcFileName);
-                skippedFiles++;
-                continue;
-            }
-            suffixSize = strlen(suffixPtr);
-            if (dfnSize+suffixSize <= sfnSize+1) {
-                free(dstFileName);
-                dfnSize = sfnSize + 20;
-                dstFileName = (char*)malloc(dfnSize);
-                if (dstFileName==NULL)
-                    EXM_THROW(74, "not enough memory for dstFileName");
-            }
-            if (sfnSize <= suffixSize
-                || (strcmp(suffixPtr, GZ_EXTENSION)
-                    && strcmp(suffixPtr, XZ_EXTENSION)
-                    && strcmp(suffixPtr, ZSTD_EXTENSION)
-                    && strcmp(suffixPtr, LZMA_EXTENSION)
-                    && strcmp(suffixPtr, LZ4_EXTENSION)) ) {
-                DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s/%s/%s/%s expected) -- ignored \n",
-                             srcFileName, GZ_EXTENSION, XZ_EXTENSION, ZSTD_EXTENSION, LZMA_EXTENSION, LZ4_EXTENSION);
-                skippedFiles++;
-                continue;
-            } else {
-                memcpy(dstFileName, srcFileName, sfnSize - suffixSize);
-                dstFileName[sfnSize-suffixSize] = '\0';
-            }
-            missingFiles += FIO_decompressDstFile(ress, dstFileName, srcFileName);
+            const char* const dstFileName = FIO_determineDstName(srcFileName);
+            if (dstFileName == NULL) { error=1; continue; }
+
+            error |= FIO_decompressSrcFile(ress, dstFileName, srcFileName);
         }
-        free(dstFileName);
     }
 
     FIO_freeDResources(ress);
-    return missingFiles + skippedFiles;
+    return error;
 }
 
 
@@ -1781,22 +2086,19 @@ typedef struct {
     U32 nbFiles;
 } fileInfo_t;
 
-/** getFileInfo() :
- *  Reads information from file, stores in *info
- * @return : 0 if successful
- *           1 for frame analysis error
- *           2 for file not compressed with zstd
- *           3 for cases in which file could not be opened.
- */
-static int getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName){
-    int detectError = 0;
-    FILE* const srcFile = FIO_openSrcFile(inFileName);
-    if (srcFile == NULL) {
-        DISPLAY("Error: could not open source file %s\n", inFileName);
-        return 3;
-    }
-    info->compressedSize = UTIL_getFileSize(inFileName);
+typedef enum { info_success=0, info_frame_error=1, info_not_zstd=2, info_file_error=3 } InfoError;
+
+#define ERROR_IF(c,n,...) {             \
+    if (c) {                           \
+        DISPLAYLEVEL(1, __VA_ARGS__);  \
+        DISPLAYLEVEL(1, " \n");        \
+        return n;                      \
+    }                                  \
+}
 
+static InfoError
+FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
+{
     /* begin analyzing frame */
     for ( ; ; ) {
         BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
@@ -1806,130 +2108,111 @@ static int getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName){
               && (numBytesRead == 0)
               && (info->compressedSize > 0)
               && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
-                break;
-            }
-            else if (feof(srcFile)) {
-                DISPLAY("Error: reached end of file with incomplete frame\n");
-                detectError = 2;
-                break;
-            }
-            else {
-                DISPLAY("Error: did not reach end of file but ran out of frames\n");
-                detectError = 1;
-                break;
+                break;  /* correct end of file => success */
             }
+            ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
+            ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
         }
         {   U32 const magicNumber = MEM_readLE32(headerBuffer);
             /* Zstandard frame */
             if (magicNumber == ZSTD_MAGICNUMBER) {
                 ZSTD_frameHeader header;
                 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
-                if (frameContentSize == ZSTD_CONTENTSIZE_ERROR || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
+                if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
+                  || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
                     info->decompUnavailable = 1;
                 } else {
                     info->decompressedSize += frameContentSize;
                 }
-                if (ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0) {
-                    DISPLAY("Error: could not decode frame header\n");
-                    detectError = 1;
-                    break;
-                }
+                ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
+                        info_frame_error, "Error: could not decode frame header");
                 info->windowSize = header.windowSize;
                 /* move to the end of the frame header */
                 {   size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
-                    if (ZSTD_isError(headerSize)) {
-                        DISPLAY("Error: could not determine frame header size\n");
-                        detectError = 1;
-                        break;
-                    }
-                    {   int const ret = fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR);
-                        if (ret != 0) {
-                            DISPLAY("Error: could not move to end of frame header\n");
-                            detectError = 1;
-                            break;
-                }   }   }
-
-                /* skip the rest of the blocks in the frame */
+                    ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
+                    ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
+                            info_frame_error, "Error: could not move to end of frame header");
+                }
+
+                /* skip all blocks in the frame */
                 {   int lastBlock = 0;
                     do {
                         BYTE blockHeaderBuffer[3];
-                        size_t const readBytes = fread(blockHeaderBuffer, 1, 3, srcFile);
-                        if (readBytes != 3) {
-                            DISPLAY("There was a problem reading the block header\n");
-                            detectError = 1;
-                            break;
-                        }
+                        ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
+                                info_frame_error, "Error while reading block header");
                         {   U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
                             U32 const blockTypeID = (blockHeader >> 1) & 3;
                             U32 const isRLE = (blockTypeID == 1);
                             U32 const isWrongBlock = (blockTypeID == 3);
                             long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
-                            if (isWrongBlock) {
-                                DISPLAY("Error: unsupported block type \n");
-                                detectError = 1;
-                                break;
-                            }
+                            ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
                             lastBlock = blockHeader & 1;
-                            {   int const ret = fseek(srcFile, blockSize, SEEK_CUR);
-                                if (ret != 0) {
-                                    DISPLAY("Error: could not skip to end of block\n");
-                                    detectError = 1;
-                                    break;
-                        }   }   }
+                            ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
+                                    info_frame_error, "Error: could not skip to end of block");
+                        }
                     } while (lastBlock != 1);
-
-                    if (detectError) break;
                 }
 
                 /* check if checksum is used */
                 {   BYTE const frameHeaderDescriptor = headerBuffer[4];
                     int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
                     if (contentChecksumFlag) {
-                        int const ret = fseek(srcFile, 4, SEEK_CUR);
                         info->usesCheck = 1;
-                        if (ret != 0) {
-                            DISPLAY("Error: could not skip past checksum\n");
-                            detectError = 1;
-                            break;
-                }   }   }
+                        ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0,
+                                info_frame_error, "Error: could not skip past checksum");
+                }   }
                 info->numActualFrames++;
             }
             /* Skippable frame */
             else if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
                 U32 const frameSize = MEM_readLE32(headerBuffer + 4);
                 long const seek = (long)(8 + frameSize - numBytesRead);
-                int const ret = LONG_SEEK(srcFile, seek, SEEK_CUR);
-                if (ret != 0) {
-                    DISPLAY("Error: could not find end of skippable frame\n");
-                    detectError = 1;
-                    break;
-                }
+                ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
+                        info_frame_error, "Error: could not find end of skippable frame");
                 info->numSkippableFrames++;
             }
             /* unknown content */
             else {
-                detectError = 2;
-                break;
+                return info_not_zstd;
             }
-        }
-    }  /* end analyzing frame */
+        }  /* magic number analysis */
+    }  /* end analyzing frames */
+    return info_success;
+}
+
+
+static InfoError
+getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
+{
+    InfoError status;
+    FILE* const srcFile = FIO_openSrcFile(inFileName);
+    ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
+
+    info->compressedSize = UTIL_getFileSize(inFileName);
+    status = FIO_analyzeFrames(info, srcFile);
+
     fclose(srcFile);
     info->nbFiles = 1;
-    return detectError;
+    return status;
 }
 
-static int getFileInfo(fileInfo_t* info, const char* srcFileName)
+
+/** getFileInfo() :
+ *  Reads information from file, stores in *info
+ * @return : InfoError status
+ */
+static InfoError
+getFileInfo(fileInfo_t* info, const char* srcFileName)
 {
-    int const isAFile = UTIL_isRegularFile(srcFileName);
-    if (!isAFile) {
-        DISPLAY("Error : %s is not a file", srcFileName);
-        return 3;
-    }
+    ERROR_IF(!UTIL_isRegularFile(srcFileName),
+            info_file_error, "Error : %s is not a file", srcFileName);
     return getFileInfo_fileConfirmed(info, srcFileName);
 }
 
 
-static void displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel){
+static void
+displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
+{
     unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
     const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";
     double const windowSizeUnit = (double)info->windowSize / unit;
@@ -1987,46 +2270,62 @@ static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
     return total;
 }
 
-static int FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel){
+static int
+FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
+{
     fileInfo_t info;
     memset(&info, 0, sizeof(info));
-    {   int const error = getFileInfo(&info, inFileName);
-        if (error == 1) {
+    {   InfoError const error = getFileInfo(&info, inFileName);
+        if (error == info_frame_error) {
             /* display error, but provide output */
-            DISPLAY("An error occurred while getting file info \n");
+            DISPLAYLEVEL(1, "Error while parsing %s \n", inFileName);
         }
-        else if (error == 2) {
+        else if (error == info_not_zstd) {
             DISPLAYOUT("File %s not compressed by zstd \n", inFileName);
             if (displayLevel > 2) DISPLAYOUT("\n");
             return 1;
         }
-        else if (error == 3) {
+        else if (error == info_file_error) {
             /* error occurred while opening the file */
             if (displayLevel > 2) DISPLAYOUT("\n");
             return 1;
         }
         displayInfo(inFileName, &info, displayLevel);
         *total = FIO_addFInfo(*total, info);
+        assert(error>=0 || error<=1);
         return error;
     }
 }
 
-int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel){
+int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
+{
+    /* ensure no specified input is stdin (needs fseek() capability) */
+    {   unsigned u;
+        for (u=0; u<numFiles;u++) {
+            ERROR_IF(!strcmp (filenameTable[u], stdinmark),
+                    1, "zstd: --list does not support reading from standard input");
+    }   }
+
     if (numFiles == 0) {
-        DISPLAYOUT("No files given\n");
-        return 0;
+        if (!IS_CONSOLE(stdin)) {
+            DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
+        }
+        DISPLAYLEVEL(1, "No files given \n");
+        return 1;
     }
+
     if (displayLevel <= 2) {
         DISPLAYOUT("Frames  Skips  Compressed  Uncompressed  Ratio  Check  Filename\n");
     }
     {   int error = 0;
-        unsigned u;
         fileInfo_t total;
         memset(&total, 0, sizeof(total));
         total.usesCheck = 1;
-        for (u=0; u<numFiles;u++) {
-            error |= FIO_listFile(&total, filenameTable[u], displayLevel);
-        }
+        /* --list each file, and check for any error */
+        {   unsigned u;
+            for (u=0; u<numFiles;u++) {
+                error |= FIO_listFile(&total, filenameTable[u], displayLevel);
+        }   }
         if (numFiles > 1 && displayLevel <= 2) {   /* display total */
             unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB);
             const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB";
diff --git a/programs/fileio.h b/programs/fileio.h
index 69c83f71dce3d..4c7049cb7167f 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -48,20 +48,23 @@ typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_
 ***************************************/
 void FIO_setCompressionType(FIO_compressionType_t compressionType);
 void FIO_overwriteMode(void);
-void FIO_setNotificationLevel(unsigned level);
-void FIO_setSparseWrite(unsigned sparse);  /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
-void FIO_setDictIDFlag(unsigned dictIDFlag);
-void FIO_setChecksumFlag(unsigned checksumFlag);
-void FIO_setRemoveSrcFile(unsigned flag);
-void FIO_setMemLimit(unsigned memLimit);
-void FIO_setNbWorkers(unsigned nbWorkers);
+void FIO_setAdaptiveMode(unsigned adapt);
+void FIO_setAdaptMin(int minCLevel);
+void FIO_setAdaptMax(int maxCLevel);
 void FIO_setBlockSize(unsigned blockSize);
-void FIO_setOverlapLog(unsigned overlapLog);
+void FIO_setChecksumFlag(unsigned checksumFlag);
+void FIO_setDictIDFlag(unsigned dictIDFlag);
+void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
 void FIO_setLdmFlag(unsigned ldmFlag);
+void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
 void FIO_setLdmHashLog(unsigned ldmHashLog);
 void FIO_setLdmMinMatch(unsigned ldmMinMatch);
-void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
-void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
+void FIO_setMemLimit(unsigned memLimit);
+void FIO_setNbWorkers(unsigned nbWorkers);
+void FIO_setNotificationLevel(unsigned level);
+void FIO_setOverlapLog(unsigned overlapLog);
+void FIO_setRemoveSrcFile(unsigned flag);
+void FIO_setSparseWrite(unsigned sparse);  /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
 
 
 /*-*************************************
@@ -70,7 +73,7 @@ void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
 /** FIO_compressFilename() :
     @return : 0 == ok;  1 == pb with src file. */
 int FIO_compressFilename (const char* outfilename, const char* infilename, const char* dictFileName,
-                          int compressionLevel, ZSTD_compressionParameters* comprParams);
+                          int compressionLevel, ZSTD_compressionParameters comprParams);
 
 /** FIO_decompressFilename() :
     @return : 0 == ok;  1 == pb with src file. */
@@ -78,6 +81,7 @@ int FIO_decompressFilename (const char* outfilename, const char* infilename, con
 
 int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel);
 
+
 /*-*************************************
 *  Multiple File functions
 ***************************************/
@@ -86,7 +90,7 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis
 int FIO_compressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles,
                                   const char* outFileName, const char* suffix,
                                   const char* dictFileName, int compressionLevel,
-                                  ZSTD_compressionParameters* comprParams);
+                                  ZSTD_compressionParameters comprParams);
 
 /** FIO_decompressMultipleFilenames() :
     @return : nb of missing or skipped files */
@@ -95,6 +99,15 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
                                     const char* dictFileName);
 
 
+/*-*************************************
+*  Advanced stuff (should actually be hosted elsewhere)
+***************************************/
+
+/* custom crash signal handler */
+void FIO_addAbortHandler(void);
+
+
+
 #if defined (__cplusplus)
 }
 #endif
diff --git a/programs/platform.h b/programs/platform.h
index c86d289f5414e..155ebcd1eb9c8 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -50,53 +50,70 @@ extern "C" {
 /* *********************************************************
 *  Turn on Large Files support (>4GB) for 32-bit Linux/Unix
 ***********************************************************/
-#if !defined(__64BIT__) || defined(__MINGW32__)       /* No point defining Large file for 64 bit but MinGW-w64 requires it */
+#if !defined(__64BIT__) || defined(__MINGW32__)    /* No point defining Large file for 64 bit but MinGW-w64 requires it */
 #  if !defined(_FILE_OFFSET_BITS)
-#    define _FILE_OFFSET_BITS 64                      /* turn off_t into a 64-bit type for ftello, fseeko */
+#    define _FILE_OFFSET_BITS 64                   /* turn off_t into a 64-bit type for ftello, fseeko */
 #  endif
-#  if !defined(_LARGEFILE_SOURCE)                     /* obsolete macro, replaced with _FILE_OFFSET_BITS */
-#    define _LARGEFILE_SOURCE 1                       /* Large File Support extension (LFS) - fseeko, ftello */
+#  if !defined(_LARGEFILE_SOURCE)                  /* obsolete macro, replaced with _FILE_OFFSET_BITS */
+#    define _LARGEFILE_SOURCE 1                    /* Large File Support extension (LFS) - fseeko, ftello */
 #  endif
 #  if defined(_AIX) || defined(__hpux)
-#    define _LARGE_FILES                              /* Large file support on 32-bits AIX and HP-UX */
+#    define _LARGE_FILES                           /* Large file support on 32-bits AIX and HP-UX */
 #  endif
 #endif
 
 
 /* ************************************************************
 *  Detect POSIX version
-*  PLATFORM_POSIX_VERSION = -1 for non-Unix e.g. Windows
-*  PLATFORM_POSIX_VERSION = 0 for Unix-like non-POSIX
-*  PLATFORM_POSIX_VERSION >= 1 is equal to found _POSIX_VERSION
+*  PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows
+*  PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX
+*  PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION
+*  Value of PLATFORM_POSIX_VERSION can be forced on command line
 ***************************************************************/
-#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
-   || defined(__midipix__) || defined(__VMS))
+#ifndef PLATFORM_POSIX_VERSION
+
 #  if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
      || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  /* BSD distros */
+     /* exception rule : force posix version to 200112L,
+      * note: it's better to use unistd.h's _POSIX_VERSION whenever possible */
 #    define PLATFORM_POSIX_VERSION 200112L
-#  else
+
+/* try to determine posix version through official unistd.h's _POSIX_VERSION (http://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html).
+ * note : there is no simple way to know in advance if <unistd.h> is present or not on target system,
+ * Posix specification mandates its presence and its content, but target system must respect this spec.
+ * It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like
+ * otherwise it will block preprocessing stage.
+ * The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h>
+ */
+#  elif !defined(_WIN32) \
+     && (defined(__unix__) || defined(__unix) \
+     || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__))
+
 #    if defined(__linux__) || defined(__linux)
 #      ifndef _POSIX_C_SOURCE
-#        define _POSIX_C_SOURCE 200112L  /* use feature test macro */
+#        define _POSIX_C_SOURCE 200112L  /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
 #      endif
 #    endif
 #    include <unistd.h>  /* declares _POSIX_VERSION */
 #    if defined(_POSIX_VERSION)  /* POSIX compliant */
 #      define PLATFORM_POSIX_VERSION _POSIX_VERSION
 #    else
-#      define PLATFORM_POSIX_VERSION 0
+#      define PLATFORM_POSIX_VERSION 1
 #    endif
+
+#  else  /* non-unix target platform (like Windows) */
+#    define PLATFORM_POSIX_VERSION 0
 #  endif
-#endif
-#if !defined(PLATFORM_POSIX_VERSION)
-#  define PLATFORM_POSIX_VERSION -1
-#endif
 
+#endif   /* PLATFORM_POSIX_VERSION */
 
 /*-*********************************************
 *  Detect if isatty() and fileno() are available
 ************************************************/
-#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__)
+#if (defined(__linux__) && (PLATFORM_POSIX_VERSION > 1)) \
+ || (PLATFORM_POSIX_VERSION >= 200112L) \
+ || defined(__DJGPP__) \
+ || defined(__MSYS__)
 #  include <unistd.h>   /* isatty */
 #  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
 #elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__)
@@ -145,6 +162,34 @@ static __inline int IS_CONSOLE(FILE* stdStream) {
 #endif
 
 
+#ifndef ZSTD_START_SYMBOLLIST_FRAME
+#  ifdef __linux__
+#    define ZSTD_START_SYMBOLLIST_FRAME 2
+#  elif defined __APPLE__
+#    define ZSTD_START_SYMBOLLIST_FRAME 4
+#  else
+#    define ZSTD_START_SYMBOLLIST_FRAME 0
+#  endif
+#endif
+
+
+#ifndef ZSTD_SETPRIORITY_SUPPORT
+   /* mandates presence of <sys/resource.h> and support for setpriority() : http://man7.org/linux/man-pages/man2/setpriority.2.html */
+#  define ZSTD_SETPRIORITY_SUPPORT (PLATFORM_POSIX_VERSION >= 200112L)
+#endif
+
+
+#ifndef ZSTD_NANOSLEEP_SUPPORT
+   /* mandates support of nanosleep() within <time.h> : http://man7.org/linux/man-pages/man2/nanosleep.2.html */
+#  if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) \
+   || (PLATFORM_POSIX_VERSION >= 200112L)
+#     define ZSTD_NANOSLEEP_SUPPORT 1
+#  else
+#     define ZSTD_NANOSLEEP_SUPPORT 0
+#  endif
+#endif
+
+
 #if defined (__cplusplus)
 }
 #endif
diff --git a/programs/util.h b/programs/util.h
index 3e69745793e4e..67aa7a56b967c 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -20,13 +20,13 @@ extern "C" {
 /*-****************************************
 *  Dependencies
 ******************************************/
-#include "platform.h"     /* PLATFORM_POSIX_VERSION */
+#include "platform.h"     /* PLATFORM_POSIX_VERSION, ZSTD_NANOSLEEP_SUPPORT, ZSTD_SETPRIORITY_SUPPORT */
 #include <stdlib.h>       /* malloc */
 #include <stddef.h>       /* size_t, ptrdiff_t */
 #include <stdio.h>        /* fprintf */
 #include <string.h>       /* strncmp */
 #include <sys/types.h>    /* stat, utime */
-#include <sys/stat.h>     /* stat */
+#include <sys/stat.h>     /* stat, chmod */
 #if defined(_MSC_VER)
 #  include <sys/utime.h>  /* utime */
 #  include <io.h>         /* _chmod */
@@ -40,7 +40,7 @@ extern "C" {
 
 
 /* ************************************************************
-* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
+* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
 ***************************************************************/
 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
 #   define UTIL_fseek _fseeki64
@@ -53,32 +53,34 @@ extern "C" {
 #endif
 
 
-/*-****************************************
-*  Sleep functions: Windows - Posix - others
-******************************************/
+/*-*************************************************
+*  Sleep & priority functions: Windows - Posix - others
+***************************************************/
 #if defined(_WIN32)
 #  include <windows.h>
 #  define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)
 #  define UTIL_sleep(s) Sleep(1000*s)
 #  define UTIL_sleepMilli(milli) Sleep(milli)
-#elif PLATFORM_POSIX_VERSION >= 0 /* Unix-like operating system */
-#  include <unistd.h>
-#  include <sys/resource.h> /* setpriority */
-#  if defined(PRIO_PROCESS)
-#    define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
-#  else
-#    define SET_REALTIME_PRIORITY /* disabled */
-#  endif
+
+#elif PLATFORM_POSIX_VERSION > 0 /* Unix-like operating system */
+#  include <unistd.h>   /* sleep */
 #  define UTIL_sleep(s) sleep(s)
-#  if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) || (PLATFORM_POSIX_VERSION >= 200112L)  /* nanosleep requires POSIX.1-2001 */
+#  if ZSTD_NANOSLEEP_SUPPORT   /* necessarily defined in platform.h */
 #      define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); }
 #  else
 #      define UTIL_sleepMilli(milli) /* disabled */
 #  endif
-#else
-#  define SET_REALTIME_PRIORITY      /* disabled */
+#  if ZSTD_SETPRIORITY_SUPPORT
+#    include <sys/resource.h> /* setpriority */
+#    define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
+#  else
+#    define SET_REALTIME_PRIORITY /* disabled */
+#  endif
+
+#else  /* unknown non-unix operating systen */
 #  define UTIL_sleep(s)          /* disabled */
 #  define UTIL_sleepMilli(milli) /* disabled */
+#  define SET_REALTIME_PRIORITY  /* disabled */
 #endif
 
 
@@ -119,6 +121,7 @@ static int g_utilDisplayLevel;
 #if defined(_WIN32)   /* Windows */
     #define UTIL_TIME_INITIALIZER { { 0, 0 } }
     typedef LARGE_INTEGER UTIL_time_t;
+
     UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
     UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
     {
@@ -148,6 +151,7 @@ static int g_utilDisplayLevel;
     #include <mach/mach_time.h>
     #define UTIL_TIME_INITIALIZER 0
     typedef U64 UTIL_time_t;
+
     UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
     UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
     {
@@ -170,11 +174,16 @@ static int g_utilDisplayLevel;
         return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
     }
 
-#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2))
+#elif (PLATFORM_POSIX_VERSION >= 200112L) \
+   && (defined(__UCLIBC__)                \
+      || (defined(__GLIBC__)              \
+          && ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) \
+             || (__GLIBC__ > 2))))
 
     #define UTIL_TIME_INITIALIZER { 0, 0 }
     typedef struct timespec UTIL_freq_t;
     typedef struct timespec UTIL_time_t;
+
     UTIL_STATIC UTIL_time_t UTIL_getTime(void)
     {
         UTIL_time_t time;
@@ -182,6 +191,7 @@ static int g_utilDisplayLevel;
             UTIL_DISPLAYLEVEL(1, "ERROR: Failed to get time\n");   /* we could also exit() */
         return time;
     }
+
     UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
     {
         UTIL_time_t diff;
@@ -194,6 +204,7 @@ static int g_utilDisplayLevel;
         }
         return diff;
     }
+
     UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
     {
         UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
@@ -202,6 +213,7 @@ static int g_utilDisplayLevel;
         micro += diff.tv_nsec / 1000ULL;
         return micro;
     }
+
     UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
     {
         UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
@@ -210,6 +222,7 @@ static int g_utilDisplayLevel;
         nano += diff.tv_nsec;
         return nano;
     }
+
 #else   /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
     typedef clock_t UTIL_time_t;
     #define UTIL_TIME_INITIALIZER 0
@@ -319,15 +332,20 @@ UTIL_STATIC U32 UTIL_isDirectory(const char* infilename)
 
 UTIL_STATIC U32 UTIL_isLink(const char* infilename)
 {
-#if defined(_WIN32)
-    /* no symlinks on windows */
-    (void)infilename;
-#else
+/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
+#ifndef __STRICT_ANSI__
+#if defined(_BSD_SOURCE) \
+    || (defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE >= 500)) \
+    || (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) \
+    || (defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) \
+    || (defined(__APPLE__) && defined(__MACH__))
     int r;
     stat_t statbuf;
     r = lstat(infilename, &statbuf);
     if (!r && S_ISLNK(statbuf.st_mode)) return 1;
 #endif
+#endif
+    (void)infilename;
     return 0;
 }
 
@@ -513,7 +531,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
 
 UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
 {
-    (void)bufStart; (void)bufEnd; (void)pos;
+    (void)bufStart; (void)bufEnd; (void)pos; (void)followLinks;
     UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
     return 0;
 }
@@ -526,7 +544,10 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
  * After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
  * In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
  */
-UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb, int followLinks)
+UTIL_STATIC const char**
+UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
+                    char** allocatedBuffer, unsigned* allocatedNamesNb,
+                    int followLinks)
 {
     size_t pos;
     unsigned i, nbFiles;
diff --git a/programs/zstd.1 b/programs/zstd.1
index 8e9e83745a0f8..674f89841ce8a 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "2018-01-27" "zstd 1.3.4" "User Commands"
+.TH "ZSTD" "1" "October 2018" "zstd 1.3.7" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -17,7 +17,7 @@
 \fBzstdcat\fR is equivalent to \fBzstd \-dcf\fR
 .
 .SH "DESCRIPTION"
-\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip (1)\fR and \fBxz (1)\fR\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, with fast modes at > 200 MB/s per code, and strong modes nearing lzma compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\.
+\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip (1)\fR and \fBxz (1)\fR\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, with fast modes at > 200 MB/s per core, and strong modes nearing lzma compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\.
 .
 .P
 \fBzstd\fR command line syntax is generally similar to gzip, but features the following differences :
@@ -100,6 +100,10 @@ Display information related to a zstd compressed file, such as size, ratio, and
 \fB#\fR compression level [1\-19] (default: 3)
 .
 .TP
+\fB\-\-fast[=#]\fR
+switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
+.
+.TP
 \fB\-\-ultra\fR
 unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
 .
@@ -112,14 +116,22 @@ Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \
 .
 .TP
 \fB\-T#\fR, \fB\-\-threads=#\fR
-Compress using \fB#\fR threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==256\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
+Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
+.
+.TP
+\fB\-\-single\-thread\fR
+Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\.
+.
+.TP
+\fB\-\-adapt[=min=#,max=#]\fR
+\fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
 .
 .TP
 \fB\-D file\fR
 use \fBfile\fR as Dictionary to compress or decompress FILE(s)
 .
 .TP
-\fB\-\-nodictID\fR
+\fB\-\-no\-dictID\fR
 do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
 .
 .TP
@@ -152,7 +164,7 @@ operate recursively on dictionaries
 .
 .TP
 \fB\-\-format=FORMAT\fR
-compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\.
+compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
 .
 .TP
 \fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR
@@ -186,7 +198,7 @@ All arguments after \fB\-\-\fR are treated as files
 Use FILEs as training set to create a dictionary\. The training set should contain a lot of small files (> 100), and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)\.
 .
 .IP
-Supports multithreading if \fBzstd\fR is compiled with threading support\. Additional parameters can be specified with \fB\-\-train\-cover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. Equivalent to \fB\-\-train\-cover=d=8,steps=4\fR\.
+Supports multithreading if \fBzstd\fR is compiled with threading support\. Additional parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Equivalent to \fB\-\-train\-fastcover=d=8,steps=4\fR\.
 .
 .TP
 \fB\-o file\fR
@@ -209,11 +221,11 @@ Split input files in blocks of size # (default: no split)
 A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\.
 .
 .TP
-\fB\-\-train\-cover[=k#,d=#,steps=#]\fR
-Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. Requires that \fId\fR <= \fIk\fR\.
+\fB\-\-train\-cover[=k#,d=#,steps=#,split=#]\fR
+Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\.
 .
 .IP
-Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. Supports multithreading if \fBzstd\fR is compiled with threading support\.
+Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\.
 .
 .IP
 Examples:
@@ -230,6 +242,25 @@ Examples:
 .IP
 \fBzstd \-\-train\-cover=k=50 FILEs\fR
 .
+.IP
+\fBzstd \-\-train\-cover=k=50,split=60 FILEs\fR
+.
+.TP
+\fB\-\-train\-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]\fR
+Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75\. If \fIf\fR is not specified, then it tries \fIf\fR = 20\. Requires that 0 < \fIf\fR < 32\. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1\. Requires that 0 < \fIaccel\fR <= 10\. Requires that \fId\fR = 6 or \fId\fR = 8\.
+.
+.IP
+\fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR\. The subsegment is hashed to an index in the range [0,2^\fIf\fR \- 1]\. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency\. Using a higher \fIf\fR reduces collision but takes longer\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-fastcover FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-fastcover=d=8,f=15,accel=2 FILEs\fR
+.
 .TP
 \fB\-\-train\-legacy[=selectivity=#]\fR
 Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its possible maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\.
@@ -335,13 +366,19 @@ The minimum \fIslen\fR is 3 and the maximum is 7\.
 .
 .TP
 \fBtargetLen\fR=\fItlen\fR, \fBtlen\fR=\fItlen\fR
-Specify the minimum match length that causes a match finder to stop searching for better matches\.
+The impact of this field vary depending on selected strategy\.
+.
+.IP
+For ZSTD_btopt and ZSTD_btultra, it specifies the minimum match length that causes match finder to stop searching for better matches\. A larger \fBtargetLen\fR usually improves compression ratio but decreases compression speed\.
+.
+.IP
+For ZSTD_fast, it triggers ultra\-fast mode when > 0\. The value represents the amount of data skipped between match sampling\. Impact is reversed : a larger \fBtargetLen\fR increases compression speed but decreases compression ratio\.
 .
 .IP
-A larger minimum match length usually improves compression ratio but decreases compression speed\. This option is only used with strategies ZSTD_btopt and ZSTD_btultra\.
+For all other strategies, this field has no impact\.
 .
 .IP
-The minimum \fItlen\fR is 4 and the maximum is 999\.
+The minimum \fItlen\fR is 0 and the maximum is 999\.
 .
 .TP
 \fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR
@@ -374,7 +411,7 @@ This option is ignored unless long distance matching is enabled\.
 Larger/very small values usually decrease compression ratio\.
 .
 .IP
-The minumum \fIldmslen\fR is 4 and the maximum is 4096 (default: 64)\.
+The minimum \fIldmslen\fR is 4 and the maximum is 4096 (default: 64)\.
 .
 .TP
 \fBldmBucketSizeLog\fR=\fIldmblog\fR, \fBldmblog\fR=\fIldmblog\fR
@@ -402,14 +439,14 @@ Larger values will improve compression speed\. Deviating far from the default va
 .IP
 The default value is \fBwlog \- ldmhlog\fR\.
 .
-.SS "\-B#:"
-Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\.
-.
 .SS "Example"
-The following parameters sets advanced compression options to those of predefined level 19 for files bigger than 256 KB:
+The following parameters sets advanced compression options to something similar to predefined level 19 for files bigger than 256 KB:
 .
 .P
-\fB\-\-zstd\fR=windowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6
+\fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6
+.
+.SS "\-B#:"
+Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\.
 .
 .SH "BUGS"
 Report bugs at: https://github\.com/facebook/zstd/issues
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 2e2dc54f86685..c0c04698ddc2c 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -19,7 +19,7 @@ DESCRIPTION
 with command line syntax similar to `gzip (1)` and `xz (1)`.
 It is based on the **LZ77** family, with further FSE & huff0 entropy stages.
 `zstd` offers highly configurable compression speed,
-with fast modes at > 200 MB/s per code,
+with fast modes at > 200 MB/s per core,
 and strong modes nearing lzma compression ratios.
 It also features a very fast decoder, with speeds > 500 MB/s per core.
 
@@ -102,6 +102,13 @@ the last one takes effect.
 
 * `-#`:
     `#` compression level \[1-19] (default: 3)
+* `--fast[=#]`:
+    switch to ultra-fast compression levels.
+    If `=#` is not present, it defaults to `1`.
+    The higher the value, the faster the compression speed,
+    at the cost of some compression ratio.
+    This setting overwrites compression level if one was set previously.
+    Similarly, if a compression level is set after `--fast`, it overrides it.
 * `--ultra`:
     unlocks high compression levels 20+ (maximum 22), using a lot more memory.
     Note that decompression will also require more memory when using these levels.
@@ -115,28 +122,31 @@ the last one takes effect.
 
     Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
     `--memory=windowSize` needs to be passed to the decompressor.
-* `--fast[=#]`:
-    switch to ultra-fast compression levels.
-    If `=#` is not present, it defaults to `1`.
-    The higher the value, the faster the compression speed,
-    at the cost of some compression ratio.
-    This setting overwrites compression level if one was set previously.
-    Similarly, if a compression level is set after `--fast`, it overrides it.
-
 * `-T#`, `--threads=#`:
     Compress using `#` working threads (default: 1).
     If `#` is 0, attempt to detect and use the number of physical CPU cores.
     In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==200.
     This modifier does nothing if `zstd` is compiled without multithread support.
 * `--single-thread`:
-    Does not spawn a thread for compression, use caller thread instead.
-    This is the only available mode when multithread support is disabled.
-    In this mode, compression is serialized with I/O.
+    Does not spawn a thread for compression, use a single thread for both I/O and compression.
+    In this mode, compression is serialized with I/O, which is slightly slower.
     (This is different from `-T1`, which spawns 1 compression thread in parallel of I/O).
-    Single-thread mode also features lower memory usage.
+    This mode is the only one available when multithread support is disabled.
+    Single-thread mode features lower memory usage.
+    Final compressed result is slightly different from `-T1`.
+* `--adapt[=min=#,max=#]` :
+    `zstd` will dynamically adapt compression level to perceived I/O conditions.
+    Compression level adaptation can be observed live by using command `-v`.
+    Adaptation can be constrained between supplied `min` and `max` levels.
+    The feature works when combined with multi-threading and `--long` mode.
+    It does not work with `--single-thread`.
+    It sets window size to 8 MB by default (can be changed manually, see `wlog`).
+    Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
+    _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
+    when combined with multiple worker threads (>=2).
 * `-D file`:
     use `file` as Dictionary to compress or decompress FILE(s)
-* `--nodictID`:
+* `--no-dictID`:
     do not store dictionary ID within frame header (dictionary compression).
     The decoder will have to rely on implicit knowledge about which dictionary to use,
     it won't be able to check if it's correct.
@@ -164,7 +174,8 @@ the last one takes effect.
 * `--format=FORMAT`:
     compress and decompress in other formats. If compiled with
     support, zstd can compress to or decompress from other compression algorithm
-    formats. Possibly available options are `gzip`, `xz`, `lzma`, and `lz4`.
+    formats. Possibly available options are `zstd`, `gzip`, `xz`, `lzma`, and `lz4`.
+    If no such format is provided, `zstd` is the default.
 * `-h`/`-H`, `--help`:
     display help/long help and exit
 * `-V`, `--version`:
@@ -199,9 +210,10 @@ Compression of small files similar to the sample set will be greatly improved.
     (for example, 10 MB for a 100 KB dictionary).
 
     Supports multithreading if `zstd` is compiled with threading support.
-    Additional parameters can be specified with `--train-cover`.
+    Additional parameters can be specified with `--train-fastcover`.
     The legacy dictionary builder can be accessed with `--train-legacy`.
-    Equivalent to `--train-cover=d=8,steps=4`.
+    The cover dictionary builder can be accessed with `--train-cover`.
+    Equivalent to `--train-fastcover=d=8,steps=4`.
 * `-o file`:
     Dictionary saved into `file` (default name: dictionary).
 * `--maxdict=#`:
@@ -222,11 +234,12 @@ Compression of small files similar to the sample set will be greatly improved.
     This compares favorably to 4 bytes default.
     However, it's up to the dictionary manager to not assign twice the same ID to
     2 different dictionaries.
-* `--train-cover[=k#,d=#,steps=#]`:
+* `--train-cover[=k#,d=#,steps=#,split=#]`:
     Select parameters for the default dictionary builder algorithm named cover.
     If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
     If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
     If _steps_ is not specified, then the default value of 40 is used.
+    If _split_ is not specified or split <= 0, then the default value of 100 is used.
     Requires that _d_ <= _k_.
 
     Selects segments of size _k_ with highest score to put in the dictionary.
@@ -236,6 +249,8 @@ Compression of small files similar to the sample set will be greatly improved.
     algorithm will run faster with d <= _8_.
     Good values for _k_ vary widely based on the input data, but a safe range is
     [2 * _d_, 2000].
+    If _split_ is 100, all input samples are used for both training and testing
+    to find optimal _d_ and _k_ to build dictionary.
     Supports multithreading if `zstd` is compiled with threading support.
 
     Examples:
@@ -248,6 +263,28 @@ Compression of small files similar to the sample set will be greatly improved.
 
     `zstd --train-cover=k=50 FILEs`
 
+    `zstd --train-cover=k=50,split=60 FILEs`
+
+* `--train-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]`:
+    Same as cover but with extra parameters _f_ and _accel_ and different default value of split
+    If _split_ is not specified, then it tries _split_ = 75.
+    If _f_ is not specified, then it tries _f_ = 20.
+    Requires that 0 < _f_ < 32.
+    If _accel_ is not specified, then it tries _accel_ = 1.
+    Requires that 0 < _accel_ <= 10.
+    Requires that _d_ = 6 or _d_ = 8.
+
+    _f_ is log of size of array that keeps track of frequency of subsegments of size _d_.
+    The subsegment is hashed to an index in the range [0,2^_f_ - 1].
+    It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency.
+    Using a higher _f_ reduces collision but takes longer.
+
+    Examples:
+
+    `zstd --train-fastcover FILEs`
+
+    `zstd --train-fastcover=d=8,f=15,accel=2 FILEs`
+
 * `--train-legacy[=selectivity=#]`:
     Use legacy dictionary builder algorithm with the given dictionary
     _selectivity_ (default: 9).
@@ -354,14 +391,14 @@ The list of available _options_:
     A larger `targetLen` usually improves compression ratio
     but decreases compression speed.
 
-    For ZSTD\_fast, it specifies
-    the amount of data skipped between match sampling.
+    For ZSTD\_fast, it triggers ultra-fast mode when > 0.
+    The value represents the amount of data skipped between match sampling.
     Impact is reversed : a larger `targetLen` increases compression speed
     but decreases compression ratio.
 
     For all other strategies, this field has no impact.
 
-    The minimum _tlen_ is 1 and the maximum is 999.
+    The minimum _tlen_ is 0 and the maximum is 999.
 
 - `overlapLog`=_ovlog_,  `ovlog`=_ovlog_:
     Determine `overlapSize`, amount of data reloaded from previous job.
@@ -392,7 +429,7 @@ The list of available _options_:
 
     Larger/very small values usually decrease compression ratio.
 
-    The minumum _ldmslen_ is 4 and the maximum is 4096 (default: 64).
+    The minimum _ldmslen_ is 4 and the maximum is 4096 (default: 64).
 
 - `ldmBucketSizeLog`=_ldmblog_, `ldmblog`=_ldmblog_:
     Specify the size of each bucket for the hash table used for long distance
@@ -416,6 +453,12 @@ The list of available _options_:
 
     The default value is `wlog - ldmhlog`.
 
+### Example
+The following parameters sets advanced compression options to something
+similar to predefined level 19 for files bigger than 256 KB:
+
+`--zstd`=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6
+
 ### -B#:
 Select the size of each compression job.
 This parameter is available only when multi-threading is enabled.
@@ -424,12 +467,6 @@ Default value is `4 * windowSize`, which means it varies depending on compressio
 Note that job size must respect a minimum value which is enforced transparently.
 This minimum is either 1 MB, or `overlapSize`, whichever is largest.
 
-### Example
-The following parameters sets advanced compression options to those of
-predefined level 19 for files bigger than 256 KB:
-
-`--zstd`=windowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6
-
 BUGS
 ----
 Report bugs at: https://github.com/facebook/zstd/issues
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index c35de7ccfbbd3..1545d1cac5790 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -32,13 +32,13 @@
 #include <errno.h>    /* errno */
 #include "fileio.h"   /* stdinmark, stdoutmark, ZSTD_EXTENSION */
 #ifndef ZSTD_NOBENCH
-#  include "bench.h"  /* BMK_benchFiles, BMK_SetNbSeconds */
+#  include "bench.h"  /* BMK_benchFiles */
 #endif
 #ifndef ZSTD_NODICT
 #  include "dibio.h"  /* ZDICT_cover_params_t, DiB_trainFromFiles() */
 #endif
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_maxCLevel */
-#include "zstd.h"     /* ZSTD_VERSION_STRING */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_minCLevel */
+#include "zstd.h"     /* ZSTD_VERSION_STRING, ZSTD_maxCLevel */
 
 
 /*-************************************
@@ -85,6 +85,10 @@ static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT;
 static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;
 
 
+#define DEFAULT_ACCEL 1
+
+typedef enum { cover, fastCover, legacy } dictType;
+
 /*-************************************
 *  Display Macros
 **************************************/
@@ -135,6 +139,7 @@ static int usage_advanced(const char* programName)
     DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
     DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
     DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
+    DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
 #ifdef ZSTD_MULTITHREAD
     DISPLAY( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
     DISPLAY( " -B#    : select size of each job (default: 0==automatic) \n");
@@ -145,6 +150,7 @@ static int usage_advanced(const char* programName)
 #ifdef UTIL_HAS_CREATEFILELIST
     DISPLAY( " -r     : operate recursively on directories \n");
 #endif
+    DISPLAY( "--format=zstd : compress files to the .zstd format (default) \n");
 #ifdef ZSTD_GZCOMPRESS
     DISPLAY( "--format=gzip : compress files to the .gz format \n");
 #endif
@@ -169,7 +175,8 @@ static int usage_advanced(const char* programName)
     DISPLAY( "\n");
     DISPLAY( "Dictionary builder : \n");
     DISPLAY( "--train ## : create a dictionary from a training set of files \n");
-    DISPLAY( "--train-cover[=k=#,d=#,steps=#] : use the cover algorithm with optional args\n");
+    DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args\n");
+    DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fast cover algorithm with optional args\n");
     DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
     DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
     DISPLAY( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);
@@ -219,20 +226,34 @@ static int exeNameMatch(const char* exeName, const char* test)
         (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.');
 }
 
+static void errorOut(const char* msg)
+{
+    DISPLAY("%s \n", msg); exit(1);
+}
+
 /*! readU32FromChar() :
  * @return : unsigned integer value read from input in `char` format.
  *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
  *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
- *  Note : function result can overflow if digit string > MAX_UINT */
+ *  Note : function will exit() program if digit sequence overflows */
 static unsigned readU32FromChar(const char** stringPtr)
 {
+    const char errorMsg[] = "error: numeric value too large";
     unsigned result = 0;
-    while ((**stringPtr >='0') && (**stringPtr <='9'))
+    while ((**stringPtr >='0') && (**stringPtr <='9')) {
+        unsigned const max = (((unsigned)(-1)) / 10) - 1;
+        if (result > max) errorOut(errorMsg);
         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+    }
     if ((**stringPtr=='K') || (**stringPtr=='M')) {
+        unsigned const maxK = ((unsigned)(-1)) >> 10;
+        if (result > maxK) errorOut(errorMsg);
         result <<= 10;
-        if (**stringPtr=='M') result <<= 10;
-        (*stringPtr)++ ;
+        if (**stringPtr=='M') {
+            if (result > maxK) errorOut(errorMsg);
+            result <<= 10;
+        }
+        (*stringPtr)++;  /* skip `K` or `M` */
         if (**stringPtr=='i') (*stringPtr)++;
         if (**stringPtr=='B') (*stringPtr)++;
     }
@@ -267,10 +288,42 @@ static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t
         if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "split=")) {
+          unsigned splitPercentage = readU32FromChar(&stringPtr);
+          params->splitPoint = (double)splitPercentage / 100.0;
+          if (stringPtr[0]==',') { stringPtr++; continue; } else break;
+        }
         return 0;
     }
     if (stringPtr[0] != 0) return 0;
-    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
+    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100));
+    return 1;
+}
+
+/**
+ * parseFastCoverParameters() :
+ * reads fastcover parameters from *stringPtr (e.g. "--train-fastcover=k=48,d=8,f=20,steps=32,accel=2") into *params
+ * @return 1 means that fastcover parameters were correct
+ * @return 0 in case of malformed parameters
+ */
+static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_params_t* params)
+{
+    memset(params, 0, sizeof(*params));
+    for (; ;) {
+        if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "f=")) { params->f = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "accel=")) { params->accel = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "split=")) {
+          unsigned splitPercentage = readU32FromChar(&stringPtr);
+          params->splitPoint = (double)splitPercentage / 100.0;
+          if (stringPtr[0]==',') { stringPtr++; continue; } else break;
+        }
+        return 0;
+    }
+    if (stringPtr[0] != 0) return 0;
+    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel);
     return 1;
 }
 
@@ -295,11 +348,48 @@ static ZDICT_cover_params_t defaultCoverParams(void)
     memset(&params, 0, sizeof(params));
     params.d = 8;
     params.steps = 4;
+    params.splitPoint = 1.0;
+    return params;
+}
+
+static ZDICT_fastCover_params_t defaultFastCoverParams(void)
+{
+    ZDICT_fastCover_params_t params;
+    memset(&params, 0, sizeof(params));
+    params.d = 8;
+    params.f = 20;
+    params.steps = 4;
+    params.splitPoint = 0.75; /* different from default splitPoint of cover */
+    params.accel = DEFAULT_ACCEL;
     return params;
 }
 #endif
 
 
+/** parseAdaptParameters() :
+ *  reads adapt parameters from *stringPtr (e.g. "--zstd=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr.
+ *  Both adaptMinPtr and adaptMaxPtr must be already allocated and correctly initialized.
+ *  There is no guarantee that any of these values will be updated.
+ *  @return 1 means that parsing was successful,
+ *  @return 0 in case of malformed parameters
+ */
+static unsigned parseAdaptParameters(const char* stringPtr, int* adaptMinPtr, int* adaptMaxPtr)
+{
+    for ( ; ;) {
+        if (longCommandWArg(&stringPtr, "min=")) { *adaptMinPtr = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "max=")) { *adaptMaxPtr = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        DISPLAYLEVEL(4, "invalid compression parameter \n");
+        return 0;
+    }
+    if (stringPtr[0] != 0) return 0; /* check the end of string */
+    if (*adaptMinPtr > *adaptMaxPtr) {
+        DISPLAYLEVEL(4, "incoherent adaptation limits \n");
+        return 0;
+    }
+    return 1;
+}
+
+
 /** parseCompressionParameters() :
  *  reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6") into *params
  *  @return 1 means that compression parameters were correct
@@ -364,6 +454,15 @@ typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom
 
 #define CLEAN_RETURN(i) { operationResult = (i); goto _end; }
 
+#ifdef ZSTD_NOCOMPRESS
+/* symbols from compression library are not defined and should not be invoked */
+# define MINCLEVEL  -50
+# define MAXCLEVEL   22
+#else
+# define MINCLEVEL  ZSTD_minCLevel()
+# define MAXCLEVEL  ZSTD_maxCLevel()
+#endif
+
 int main(int argCount, const char* argv[])
 {
     int argNb,
@@ -373,6 +472,9 @@ int main(int argCount, const char* argv[])
         ldmFlag = 0,
         main_pause = 0,
         nbWorkers = 0,
+        adapt = 0,
+        adaptMin = MINCLEVEL,
+        adaptMax = MAXCLEVEL,
         nextArgumentIsOutFileName = 0,
         nextArgumentIsMaxDict = 0,
         nextArgumentIsDictID = 0,
@@ -383,6 +485,7 @@ int main(int argCount, const char* argv[])
         setRealTimePrio = 0,
         singleThread = 0,
         ultra=0;
+    double compressibility = 0.5;
     unsigned bench_nbSeconds = 3;   /* would be better if this value was synchronized from bench */
     size_t blockSize = 0;
     zstd_operation_mode operation = zom_compress;
@@ -408,14 +511,16 @@ int main(int argCount, const char* argv[])
 #endif
 #ifndef ZSTD_NODICT
     ZDICT_cover_params_t coverParams = defaultCoverParams();
-    int cover = 1;
+    ZDICT_fastCover_params_t fastCoverParams = defaultFastCoverParams();
+    dictType dict = fastCover;
+#endif
+#ifndef ZSTD_NOBENCH
+    BMK_advancedParams_t benchParams = BMK_initAdvancedParams();
 #endif
 
 
     /* init */
     (void)recursive; (void)cLevelLast;    /* not used when ZSTD_NOBENCH set */
-    (void)dictCLevel; (void)dictSelect; (void)dictID;  (void)maxDictSize; /* not used when ZSTD_NODICT set */
-    (void)ultra; (void)cLevel; (void)ldmFlag; /* not used when ZSTD_NOCOMPRESS set */
     (void)memLimit;   /* not used when ZSTD_NODECOMPRESS set */
     if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
     filenameTable[0] = stdinmark;
@@ -426,7 +531,7 @@ int main(int argCount, const char* argv[])
 #endif
 
     /* preset behaviors */
-    if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0;
+    if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0, singleThread=0;
     if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress;
     if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; }   /* supports multiple formats */
     if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(); outFileName=stdoutmark; g_displayLevel=1; }  /* behave like zcat, also supports multiple formats */
@@ -441,6 +546,9 @@ int main(int argCount, const char* argv[])
     if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(FIO_lz4Compression); }                                   /* behave like unlz4, also supports multiple formats */
     memset(&compressionParams, 0, sizeof(compressionParams));
 
+    /* init crash handler */
+    FIO_addAbortHandler();
+
     /* command switches */
     for (argNb=1; argNb<argCount; argNb++) {
         const char* argument = argv[argNb];
@@ -478,14 +586,17 @@ int main(int argCount, const char* argv[])
                     if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
                     if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
                     if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
-                    if (!strcmp(argument, "--train")) { operation=zom_train; outFileName=g_defaultDictName; continue; }
+                    if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
                     if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; }  /* kept available for compatibility with old syntax ; will be removed one day */
                     if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; }  /* kept available for compatibility with old syntax ; will be removed one day */
                     if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
                     if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; }
                     if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
                     if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
+                    if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
+                    if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
                     if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
+                    if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(FIO_zstdCompression); continue; }
 #ifdef ZSTD_GZCOMPRESS
                     if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); continue; }
 #endif
@@ -501,18 +612,31 @@ int main(int argCount, const char* argv[])
 #ifndef ZSTD_NODICT
                     if (longCommandWArg(&argument, "--train-cover")) {
                       operation = zom_train;
-                      outFileName = g_defaultDictName;
-                      cover = 1;
+                      if (outFileName == NULL)
+                          outFileName = g_defaultDictName;
+                      dict = cover;
                       /* Allow optional arguments following an = */
                       if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
                       else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
                       else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
                       continue;
                     }
+                    if (longCommandWArg(&argument, "--train-fastcover")) {
+                      operation = zom_train;
+                      if (outFileName == NULL)
+                          outFileName = g_defaultDictName;
+                      dict = fastCover;
+                      /* Allow optional arguments following an = */
+                      if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
+                      else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
+                      else if (!parseFastCoverParameters(argument, &fastCoverParams)) { CLEAN_RETURN(badusage(programName)); }
+                      continue;
+                    }
                     if (longCommandWArg(&argument, "--train-legacy")) {
                       operation = zom_train;
-                      outFileName = g_defaultDictName;
-                      cover = 0;
+                      if (outFileName == NULL)
+                          outFileName = g_defaultDictName;
+                      dict = legacy;
                       /* Allow optional arguments following an = */
                       if (*argument == 0) { continue; }
                       else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
@@ -544,13 +668,20 @@ int main(int argCount, const char* argv[])
                             compressionParams.windowLog = ldmWindowLog;
                         continue;
                     }
+#ifndef ZSTD_NOCOMPRESS   /* linking ZSTD_minCLevel() requires compression support */
                     if (longCommandWArg(&argument, "--fast")) {
-                        /* Parse optional window log */
+                        /* Parse optional acceleration factor */
                         if (*argument == '=') {
+                            U32 const maxFast = (U32)-ZSTD_minCLevel();
                             U32 fastLevel;
                             ++argument;
                             fastLevel = readU32FromChar(&argument);
-                            if (fastLevel) cLevel = - (int)fastLevel;
+                            if (fastLevel > maxFast) fastLevel = maxFast;
+                            if (fastLevel) {
+                              dictCLevel = cLevel = -(int)fastLevel;
+                            } else {
+                              CLEAN_RETURN(badusage(programName));
+                            }
                         } else if (*argument != 0) {
                             /* Invalid character following --fast */
                             CLEAN_RETURN(badusage(programName));
@@ -559,6 +690,7 @@ int main(int argCount, const char* argv[])
                         }
                         continue;
                     }
+#endif
                     /* fall-through, will trigger bad_usage() later on */
                 }
 
@@ -589,7 +721,7 @@ int main(int argCount, const char* argv[])
                          /* Decoding */
                     case 'd':
 #ifndef ZSTD_NOBENCH
-                            BMK_setDecodeOnlyMode(1);
+                            benchParams.mode = BMK_decodeOnly;
                             if (operation==zom_bench) { argument++; break; }  /* benchmark decode (hidden option) */
 #endif
                             operation=zom_decompress; argument++; break;
@@ -682,11 +814,19 @@ int main(int argCount, const char* argv[])
                     case 'p': argument++;
 #ifndef ZSTD_NOBENCH
                         if ((*argument>='0') && (*argument<='9')) {
-                            BMK_setAdditionalParam(readU32FromChar(&argument));
+                            benchParams.additionalParam = (int)readU32FromChar(&argument);
                         } else
 #endif
                             main_pause=1;
                         break;
+
+                        /* Select compressibility of synthetic sample */
+                    case 'P':
+                    {   argument++;
+                        compressibility = (double)readU32FromChar(&argument) / 100;
+                    }
+                    break;
+
                         /* unknown command */
                     default : CLEAN_RETURN(badusage(programName));
                     }
@@ -743,8 +883,11 @@ int main(int argCount, const char* argv[])
         nbWorkers = UTIL_countPhysicalCores();
         DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers);
     }
+#else
+    (void)singleThread; (void)nbWorkers;
 #endif
 
+#ifdef UTIL_HAS_CREATEFILELIST
     g_utilDisplayLevel = g_displayLevel;
     if (!followLinks) {
         unsigned u;
@@ -757,7 +900,6 @@ int main(int argCount, const char* argv[])
         }
         filenameIdx = fileNamesNb;
     }
-#ifdef UTIL_HAS_CREATEFILELIST
     if (recursive) {  /* at this stage, filenameTable is a list of paths, which can contain both files and directories */
         extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb, followLinks);
         if (extendedFileList) {
@@ -768,6 +910,8 @@ int main(int argCount, const char* argv[])
             filenameIdx = fileNamesNb;
         }
     }
+#else
+    (void)followLinks;
 #endif
 
     if (operation == zom_list) {
@@ -783,24 +927,48 @@ int main(int argCount, const char* argv[])
     /* Check if benchmark is selected */
     if (operation==zom_bench) {
 #ifndef ZSTD_NOBENCH
-        BMK_setNotificationLevel(g_displayLevel);
-        BMK_setSeparateFiles(separateFiles);
-        BMK_setBlockSize(blockSize);
-        BMK_setNbWorkers(nbWorkers);
-        BMK_setRealTime(setRealTimePrio);
-        BMK_setNbSeconds(bench_nbSeconds);
-        BMK_setLdmFlag(ldmFlag);
-        BMK_setLdmMinMatch(g_ldmMinMatch);
-        BMK_setLdmHashLog(g_ldmHashLog);
+        benchParams.blockSize = blockSize;
+        benchParams.nbWorkers = nbWorkers;
+        benchParams.realTime = setRealTimePrio;
+        benchParams.nbSeconds = bench_nbSeconds;
+        benchParams.ldmFlag = ldmFlag;
+        benchParams.ldmMinMatch = g_ldmMinMatch;
+        benchParams.ldmHashLog = g_ldmHashLog;
         if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
-            BMK_setLdmBucketSizeLog(g_ldmBucketSizeLog);
+            benchParams.ldmBucketSizeLog = g_ldmBucketSizeLog;
         }
         if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) {
-            BMK_setLdmHashEveryLog(g_ldmHashEveryLog);
+            benchParams.ldmHashEveryLog = g_ldmHashEveryLog;
+        }
+
+        if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
+        if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
+        if (cLevelLast < cLevel) cLevelLast = cLevel;
+        if (cLevelLast > cLevel)
+            DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
+        if(filenameIdx) {
+            if(separateFiles) {
+                unsigned i;
+                for(i = 0; i < filenameIdx; i++) {
+                    int c;
+                    DISPLAYLEVEL(3, "Benchmarking %s \n", filenameTable[i]);
+                    for(c = cLevel; c <= cLevelLast; c++) {
+                        BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
+                    }
+                }
+            } else {
+                for(; cLevel <= cLevelLast; cLevel++) {
+                    BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
+                }
+            }
+        } else {
+            for(; cLevel <= cLevelLast; cLevel++) {
+                BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams);
+            }
         }
-        BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams);
+
 #else
-        (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles;
+        (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; (void)compressibility;
 #endif
         goto _end;
     }
@@ -812,18 +980,27 @@ int main(int argCount, const char* argv[])
         zParams.compressionLevel = dictCLevel;
         zParams.notificationLevel = g_displayLevel;
         zParams.dictID = dictID;
-        if (cover) {
+        if (dict == cover) {
             int const optimize = !coverParams.k || !coverParams.d;
             coverParams.nbThreads = nbWorkers;
             coverParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, optimize);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, NULL, optimize);
+        } else if (dict == fastCover) {
+            int const optimize = !fastCoverParams.k || !fastCoverParams.d;
+            fastCoverParams.nbThreads = nbWorkers;
+            fastCoverParams.zParams = zParams;
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, NULL, &fastCoverParams, optimize);
         } else {
             ZDICT_legacy_params_t dictParams;
             memset(&dictParams, 0, sizeof(dictParams));
             dictParams.selectivityLevel = dictSelect;
             dictParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, 0);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, NULL, 0);
         }
+#else
+        (void)dictCLevel; (void)dictSelect; (void)dictID;  (void)maxDictSize; /* not used when ZSTD_NODICT set */
+        DISPLAYLEVEL(1, "training mode not available \n");
+        operationResult = 1;
 #endif
         goto _end;
     }
@@ -866,24 +1043,25 @@ int main(int argCount, const char* argv[])
 #ifndef ZSTD_NOCOMPRESS
         FIO_setNbWorkers(nbWorkers);
         FIO_setBlockSize((U32)blockSize);
+        if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog);
         FIO_setLdmFlag(ldmFlag);
         FIO_setLdmHashLog(g_ldmHashLog);
         FIO_setLdmMinMatch(g_ldmMinMatch);
-        if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
-            FIO_setLdmBucketSizeLog(g_ldmBucketSizeLog);
-        }
-        if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) {
-            FIO_setLdmHashEveryLog(g_ldmHashEveryLog);
-        }
+        if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(g_ldmBucketSizeLog);
+        if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) FIO_setLdmHashEveryLog(g_ldmHashEveryLog);
+        FIO_setAdaptiveMode(adapt);
+        FIO_setAdaptMin(adaptMin);
+        FIO_setAdaptMax(adaptMax);
+        if (adaptMin > cLevel) cLevel = adaptMin;
+        if (adaptMax < cLevel) cLevel = adaptMax;
 
-        if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog);
         if ((filenameIdx==1) && outFileName)
-          operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams);
+          operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, compressionParams);
         else
-          operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, &compressionParams);
+          operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
 #else
-        (void)suffix;
-        DISPLAY("Compression not supported\n");
+        (void)suffix; (void)adapt; (void)ultra; (void)cLevel; (void)ldmFlag; /* not used when ZSTD_NOCOMPRESS set */
+        DISPLAY("Compression not supported \n");
 #endif
     } else {  /* decompression or test */
 #ifndef ZSTD_NODECOMPRESS
@@ -900,7 +1078,7 @@ int main(int argCount, const char* argv[])
         else
             operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, outFileName, dictFileName);
 #else
-        DISPLAY("Decompression not supported\n");
+        DISPLAY("Decompression not supported \n");
 #endif
     }
 
diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1
new file mode 100644
index 0000000000000..716d28fc8e7ed
--- /dev/null
+++ b/programs/zstdgrep.1
@@ -0,0 +1,23 @@
+.
+.TH "ZSTDGREP" "1" "October 2018" "zstd 1.3.7" "User Commands"
+.
+.SH "NAME"
+\fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
+.
+.SH "SYNOPSIS"
+\fBzstdgrep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \.\.\.]
+.
+.SH "DESCRIPTION"
+\fBzstdgrep\fR runs \fBgrep (1)\fR on files or stdin, if no files argument is given, after decompressing them with \fBzstdcat (1)\fR\.
+.
+.P
+The grep\-flags and pattern arguments are passed on to \fBgrep (1)\fR\. If an \fB\-e\fR flag is found in the \fBgrep\-flags\fR, \fBzstdgrep\fR will not look for a pattern argument\.
+.
+.SH "EXIT STATUS"
+In case of missing arguments or missing pattern, 1 will be returned, otherwise 0\.
+.
+.SH "SEE ALSO"
+\fBzstd (1)\fR
+.
+.SH "AUTHORS"
+Thomas Klausner \fIwiz@NetBSD\.org\fR
diff --git a/programs/zstdgrep.1.md b/programs/zstdgrep.1.md
new file mode 100644
index 0000000000000..363ad4f9978c6
--- /dev/null
+++ b/programs/zstdgrep.1.md
@@ -0,0 +1,26 @@
+zstdgrep(1) -- print lines matching a pattern in zstandard-compressed files
+============================================================================
+
+SYNOPSIS
+--------
+
+`zstdgrep` [*grep-flags*] [--] _pattern_ [_files_ ...]
+
+
+DESCRIPTION
+-----------
+`zstdgrep` runs `grep (1)` on files or stdin, if no files argument is given, after decompressing them with `zstdcat (1)`.
+
+The grep-flags and pattern arguments are passed on to `grep (1)`.  If an `-e` flag is found in the `grep-flags`, `zstdgrep` will not look for a pattern argument.
+
+EXIT STATUS
+-----------
+In case of missing arguments or missing pattern, 1 will be returned, otherwise 0.
+
+SEE ALSO
+--------
+`zstd (1)`
+
+AUTHORS
+-------
+Thomas Klausner <wiz@NetBSD.org>
diff --git a/programs/zstdless.1 b/programs/zstdless.1
new file mode 100644
index 0000000000000..bf4965e7dfa6a
--- /dev/null
+++ b/programs/zstdless.1
@@ -0,0 +1,14 @@
+.
+.TH "ZSTDLESS" "1" "October 2018" "zstd 1.3.7" "User Commands"
+.
+.SH "NAME"
+\fBzstdless\fR \- view zstandard\-compressed files
+.
+.SH "SYNOPSIS"
+\fBzstdless\fR [\fIflags\fR] [\fIfile\fR \.\.\.]
+.
+.SH "DESCRIPTION"
+\fBzstdless\fR runs \fBless (1)\fR on files or stdin, if no files argument is given, after decompressing them with \fBzstdcat (1)\fR\.
+.
+.SH "SEE ALSO"
+\fBzstd (1)\fR
diff --git a/programs/zstdless.1.md b/programs/zstdless.1.md
new file mode 100644
index 0000000000000..d91d48abcc71b
--- /dev/null
+++ b/programs/zstdless.1.md
@@ -0,0 +1,16 @@
+zstdless(1) -- view zstandard-compressed files
+============================================================================
+
+SYNOPSIS
+--------
+
+`zstdless` [*flags*] [_file_ ...]
+
+
+DESCRIPTION
+-----------
+`zstdless` runs `less (1)` on files or stdin, if no files argument is given, after decompressing them with `zstdcat (1)`.
+
+SEE ALSO
+--------
+`zstd (1)`