diff options
| author | Conrad Meyer <cem@FreeBSD.org> | 2020-12-25 00:21:42 +0000 |
|---|---|---|
| committer | Conrad Meyer <cem@FreeBSD.org> | 2020-12-25 00:21:42 +0000 |
| commit | f6ae97673c28bdb9ae795bd235ab6f26f2536a2d (patch) | |
| tree | 5d30f2600c2dd431f58b30b77f27b80019768627 /programs | |
| parent | bc64b5ce191d48b503e4fad8c0cefb774a2fa969 (diff) | |
Diffstat (limited to 'programs')
| -rw-r--r-- | programs/Makefile | 282 | ||||
| -rw-r--r-- | programs/README.md | 70 | ||||
| -rw-r--r-- | programs/dibio.c | 2 | ||||
| -rw-r--r-- | programs/fileio.c | 462 | ||||
| -rw-r--r-- | programs/fileio.h | 29 | ||||
| -rw-r--r-- | programs/platform.h | 6 | ||||
| -rw-r--r-- | programs/timefn.h | 6 | ||||
| -rw-r--r-- | programs/util.c | 407 | ||||
| -rw-r--r-- | programs/util.h | 62 | ||||
| -rw-r--r-- | programs/zstd.1 | 77 | ||||
| -rw-r--r-- | programs/zstd.1.md | 150 | ||||
| -rw-r--r-- | programs/zstdcli.c | 195 | ||||
| -rw-r--r-- | programs/zstdgrep.1 | 2 | ||||
| -rw-r--r-- | programs/zstdless.1 | 2 |
14 files changed, 1268 insertions, 484 deletions
diff --git a/programs/Makefile b/programs/Makefile index 418ad4e6348e..8641d0ee48a0 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -15,7 +15,14 @@ # zstd-decompress : decompressor-only version of zstd # ########################################################################## -ZSTDDIR = ../lib +.PHONY: default +default: zstd-release + +# silent mode by default; verbose can be triggered by V=1 or VERBOSE=1 +$(V)$(VERBOSE).SILENT: + + +ZSTDDIR := ../lib # Version numbers LIBVER_SRC := $(ZSTDDIR)/zstd.h @@ -33,64 +40,94 @@ ZSTD_VERSION = $(LIBVER) HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0) GREP_OPTIONS ?= ifeq ($HAVE_COLORNEVER, 1) -GREP_OPTIONS += --color=never + GREP_OPTIONS += --color=never endif GREP = grep $(GREP_OPTIONS) ifeq ($(shell $(CC) -v 2>&1 | $(GREP) -c "gcc version "), 1) -ALIGN_LOOP = -falign-loops=32 + ALIGN_LOOP = -falign-loops=32 else -ALIGN_LOOP = + ALIGN_LOOP = endif -CPPFLAGS+= -DXXH_NAMESPACE=ZSTD_ +DEBUGLEVEL ?= 0 +CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -DDEBUGLEVEL=$(DEBUGLEVEL) ifeq ($(OS),Windows_NT) # MinGW assumed -CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting + CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting endif -CFLAGS ?= -O3 +CFLAGS ?= -O3 DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-prototypes -Wundef -Wpointer-arith \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls -Wmissing-prototypes -Wc++-compat -CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) -FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) +ZSTDLIB_COMMON := $(ZSTDDIR)/common +ZSTDLIB_COMPRESS := $(ZSTDDIR)/compress +ZSTDLIB_DECOMPRESS := $(ZSTDDIR)/decompress +ZDICT_DIR := $(ZSTDDIR)/dictBuilder +ZSTDLEGACY_DIR := $(ZSTDDIR)/legacy -ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c -ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c -ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c -ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) -ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c -ZSTDDECOMP_O = $(ZSTDDIR)/decompress/zstd_decompress.o +vpath %.c $(ZSTDLIB_COMMON) $(ZSTDLIB_COMPRESS) $(ZSTDLIB_DECOMPRESS) $(ZDICT_DIR) $(ZSTDLEGACY_DIR) + +ZSTDLIB_COMMON_C := $(wildcard $(ZSTDLIB_COMMON)/*.c) +ZSTDLIB_COMPRESS_C := $(wildcard $(ZSTDLIB_COMPRESS)/*.c) +ZSTDLIB_DECOMPRESS_C := $(wildcard $(ZSTDLIB_DECOMPRESS)/*.c) +ZSTDLIB_CORE_SRC := $(ZSTDLIB_DECOMPRESS_C) $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) +ZDICT_SRC := $(wildcard $(ZDICT_DIR)/*.c) ZSTD_LEGACY_SUPPORT ?= 5 -ZSTDLEGACY_FILES := +ZSTDLEGACY_SRC := ifneq ($(ZSTD_LEGACY_SUPPORT), 0) ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0) - ZSTDLEGACY_FILES += $(shell ls $(ZSTDDIR)/legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]') + ZSTDLEGACY_SRC += $(shell ls $(ZSTDLEGACY_DIR)/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]') endif -else endif # Sort files in alphabetical order for reproducible builds -ZSTDLIB_FILES := $(sort $(wildcard $(ZSTD_FILES)) $(wildcard $(ZSTDLEGACY_FILES)) $(wildcard $(ZDICT_FILES))) +ZSTDLIB_FULL_SRC = $(sort $(ZSTDLIB_CORE_SRC) $(ZSTDLEGACY_SRC) $(ZDICT_SRC)) +ZSTDLIB_LOCAL_SRC := $(notdir $(ZSTDLIB_FULL_SRC)) +ZSTDLIB_LOCAL_OBJ := $(ZSTDLIB_LOCAL_SRC:.c=.o) + +ZSTD_CLI_SRC := $(wildcard *.c) +ZSTD_CLI_OBJ := $(ZSTD_CLI_SRC:.c=.o) + +ZSTD_ALL_SRC := $(ZSTDLIB_LOCAL_SRC) $(ZSTD_CLI_SRC) +ZSTD_ALL_OBJ := $(ZSTD_ALL_SRC:.c=.o) -ZSTD_CLI_FILES := $(wildcard *.c) -ZSTD_CLI_OBJ := $(patsubst %.c,%.o,$(ZSTD_CLI_FILES)) +UNAME := $(shell uname) +ifeq ($(UNAME), Darwin) + HASH ?= md5 +else ifeq ($(UNAME), FreeBSD) + HASH ?= gmd5sum +else ifeq ($(UNAME), OpenBSD) + HASH ?= md5 +endif +HASH ?= md5sum +HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0) + +ifndef BUILD_DIR +HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " ") +ifeq ($(HAVE_HASH),0) + $(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags) + BUILD_DIR := obj/generic_noconf +endif +endif # BUILD_DIR # Define *.exe as extension for Windows systems ifneq (,$(filter Windows%,$(OS))) -EXT =.exe -RES64_FILE = windres/zstd64.res -RES32_FILE = windres/zstd32.res + EXT =.exe + RES64_FILE = windres/zstd64.res + RES32_FILE = windres/zstd32.res ifneq (,$(filter x86_64%,$(shell $(CC) -dumpmachine))) RES_FILE = $(RES64_FILE) else RES_FILE = $(RES32_FILE) endif else -EXT = + EXT = endif VOID = /dev/null @@ -103,60 +140,64 @@ NO_THREAD_MSG := ==> no threads, building without multithreading support HAVE_PTHREAD := $(shell printf '$(NUM_SYMBOL)include <pthread.h>\nint main(void) { return 0; }' > have_pthread.c && $(CC) $(FLAGS) -o have_pthread$(EXT) have_pthread.c -pthread 2> $(VOID) && rm have_pthread$(EXT) && echo 1 || echo 0; rm have_pthread.c) HAVE_THREAD := $(shell [ "$(HAVE_PTHREAD)" -eq "1" -o -n "$(filter Windows%,$(OS))" ] && echo 1 || echo 0) ifeq ($(HAVE_THREAD), 1) -THREAD_MSG := ==> building with threading support -THREAD_CPP := -DZSTD_MULTITHREAD -THREAD_LD := -pthread + THREAD_MSG := ==> building with threading support + THREAD_CPP := -DZSTD_MULTITHREAD + THREAD_LD := -pthread else -THREAD_MSG := $(NO_THREAD_MSG) + THREAD_MSG := $(NO_THREAD_MSG) endif # zlib detection NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support HAVE_ZLIB := $(shell printf '$(NUM_SYMBOL)include <zlib.h>\nint main(void) { return 0; }' > have_zlib.c && $(CC) $(FLAGS) -o have_zlib$(EXT) have_zlib.c -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0; rm have_zlib.c) ifeq ($(HAVE_ZLIB), 1) -ZLIB_MSG := ==> building zstd with .gz compression support -ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS -ZLIBLD = -lz + ZLIB_MSG := ==> building zstd with .gz compression support + ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS + ZLIBLD = -lz else -ZLIB_MSG := $(NO_ZLIB_MSG) + ZLIB_MSG := $(NO_ZLIB_MSG) endif # lzma detection NO_LZMA_MSG := ==> no liblzma, building zstd without .xz/.lzma support HAVE_LZMA := $(shell printf '$(NUM_SYMBOL)include <lzma.h>\nint main(void) { return 0; }' > have_lzma.c && $(CC) $(FLAGS) -o have_lzma$(EXT) have_lzma.c -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0; rm have_lzma.c) ifeq ($(HAVE_LZMA), 1) -LZMA_MSG := ==> building zstd with .xz/.lzma compression support -LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS -LZMALD = -llzma + LZMA_MSG := ==> building zstd with .xz/.lzma compression support + LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS + LZMALD = -llzma else -LZMA_MSG := $(NO_LZMA_MSG) + LZMA_MSG := $(NO_LZMA_MSG) endif # lz4 detection NO_LZ4_MSG := ==> no liblz4, building zstd without .lz4 support HAVE_LZ4 := $(shell printf '$(NUM_SYMBOL)include <lz4frame.h>\n$(NUM_SYMBOL)include <lz4.h>\nint main(void) { return 0; }' > have_lz4.c && $(CC) $(FLAGS) -o have_lz4$(EXT) have_lz4.c -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0; rm have_lz4.c) ifeq ($(HAVE_LZ4), 1) -LZ4_MSG := ==> building zstd with .lz4 compression support -LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS -LZ4LD = -llz4 + LZ4_MSG := ==> building zstd with .lz4 compression support + LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS + LZ4LD = -llz4 else -LZ4_MSG := $(NO_LZ4_MSG) + LZ4_MSG := $(NO_LZ4_MSG) endif # explicit backtrace enable/disable for Linux & Darwin ifeq ($(BACKTRACE), 0) -DEBUGFLAGS += -DBACKTRACE_ENABLE=0 + DEBUGFLAGS += -DBACKTRACE_ENABLE=0 endif ifeq (,$(filter Windows%, $(OS))) ifeq ($(BACKTRACE), 1) -DEBUGFLAGS += -DBACKTRACE_ENABLE=1 -DEBUGFLAGS_LD += -rdynamic + DEBUGFLAGS += -DBACKTRACE_ENABLE=1 + DEBUGFLAGS_LD += -rdynamic endif endif +SET_CACHE_DIRECTORY = \ + $(MAKE) --no-print-directory $@ \ + BUILD_DIR=obj/$(HASH_DIR) \ + CPPFLAGS="$(CPPFLAGS)" \ + CFLAGS="$(CFLAGS)" \ + LDFLAGS="$(LDFLAGS)" -.PHONY: default -default: zstd-release .PHONY: all all: zstd @@ -164,21 +205,51 @@ all: zstd .PHONY: allVariants allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy zstd-dictBuilder -$(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP) - +.PHONY: zstd # must always be run zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP) zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD) zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) ifneq (,$(filter Windows%,$(OS))) zstd : $(RES_FILE) endif -zstd : $(ZSTDLIB_FILES) $(ZSTD_CLI_OBJ) + +ifndef BUILD_DIR +# generate BUILD_DIR from flags + +zstd: + $(SET_CACHE_DIRECTORY) + +else +# BUILD_DIR is defined + +ZSTD_OBJ := $(addprefix $(BUILD_DIR)/, $(ZSTD_ALL_OBJ)) +$(BUILD_DIR)/zstd : $(ZSTD_OBJ) @echo "$(THREAD_MSG)" @echo "$(ZLIB_MSG)" @echo "$(LZMA_MSG)" @echo "$(LZ4_MSG)" + @echo LINK $@ $(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS) +ifeq ($(HAVE_HASH),1) +SRCBIN_HASH = $(shell cat $(BUILD_DIR)/zstd 2> $(VOID) | $(HASH) | cut -f 1 -d " ") +DSTBIN_HASH = $(shell cat zstd 2> $(VOID) | $(HASH) | cut -f 1 -d " ") +BIN_ISDIFFERENT = $(if $(filter $(SRCBIN_HASH),$(DSTBIN_HASH)),0,1) +else +BIN_ISDIFFERENT = 1 +endif + +zstd : $(BUILD_DIR)/zstd + if [ $(BIN_ISDIFFERENT) -eq 1 ]; then \ + cp -f $< $@; \ + echo zstd build completed; \ + else \ + echo zstd already built; \ + fi + +endif # BUILD_DIR + + .PHONY: zstd-release zstd-release: DEBUGFLAGS := -DBACKTRACE_ENABLE=0 zstd-release: DEBUGFLAGS_LD := @@ -190,12 +261,12 @@ zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) ifneq (,$(filter Windows%,$(OS))) zstd32 : $(RES32_FILE) endif -zstd32 : $(ZSTDLIB_FILES) $(ZSTD_CLI_FILES) +zstd32 : $(ZSTDLIB_FULL_SRC) $(ZSTD_CLI_SRC) $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) ## zstd-nolegacy: same scope as zstd, with just support of legacy formats removed zstd-nolegacy : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD) -zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) $(ZSTD_CLI_OBJ) +zstd-nolegacy : $(ZSTDLIB_CORE_SRC) $(ZDICT_SRC) $(ZSTD_CLI_OBJ) $(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS) zstd-nomt : THREAD_CPP := @@ -222,12 +293,12 @@ zstd-noxz : zstd # It's unclear at this stage if this is a scenario that must be supported .PHONY: zstd-dll zstd-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd -zstd-dll : ZSTDLIB_FILES = +zstd-dll : ZSTDLIB_FULL_SRC = zstd-dll : $(ZSTD_CLI_OBJ) $(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS) -## zstd-pgo: zstd executable optimized with pgo. `gcc` only. +## zstd-pgo: zstd executable optimized with PGO. zstd-pgo : $(MAKE) clean $(MAKE) zstd MOREFLAGS=-fprofile-generate @@ -237,24 +308,24 @@ zstd-pgo : ./zstd -b $(PROFILE_WITH) ./zstd -b7i2 $(PROFILE_WITH) ./zstd -b5 $(PROFILE_WITH) - $(RM) zstd *.o $(ZSTDDECOMP_O) $(ZSTDDIR)/compress/*.o + $(RM) zstd *.o case $(CC) in *clang*) if ! [ -e default.profdata ]; then llvm-profdata merge -output=default.profdata default*.profraw; fi ;; esac $(MAKE) zstd MOREFLAGS=-fprofile-use ## zstd-small: minimal target, supporting only zstd compression and decompression. no bench. no legacy. no other format. zstd-small: CFLAGS = -Os -s -zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c util.c timefn.c fileio.c +zstd-frugal zstd-small: $(ZSTDLIB_CORE_SRC) zstdcli.c util.c timefn.c fileio.c $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT) -zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c util.c timefn.c fileio.c +zstd-decompress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_DECOMPRESS_C) zstdcli.c util.c timefn.c fileio.c $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT) -zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c util.c timefn.c fileio.c +zstd-compress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) zstdcli.c util.c timefn.c fileio.c $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT) ## zstd-dictBuilder: executable supporting dictionary creation and compression (only) zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS -zstd-dictBuilder: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) $(ZDICT_FILES) zstdcli.c util.c timefn.c fileio.c dibio.c +zstd-dictBuilder: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) $(ZDICT_SRC) zstdcli.c util.c timefn.c fileio.c dibio.c $(CC) $(FLAGS) $^ -o $@$(EXT) zstdmt: zstd @@ -274,12 +345,11 @@ endif .PHONY: clean clean: - $(MAKE) -C $(ZSTDDIR) clean - @$(RM) $(ZSTDDIR)/decompress/*.o $(ZSTDDIR)/decompress/zstd_decompress.gcda - @$(RM) core *.o tmp* result* *.gcda dictionary *.zst \ + $(RM) core *.o tmp* result* *.gcda dictionary *.zst \ zstd$(EXT) zstd32$(EXT) zstd-compress$(EXT) zstd-decompress$(EXT) \ zstd-small$(EXT) zstd-frugal$(EXT) zstd-nolegacy$(EXT) zstd4$(EXT) \ zstd-dictBuilder$(EXT) *.gcda default*.profraw default.profdata have_zlib$(EXT) + $(RM) -r obj/* @echo Cleaning completed MD2ROFF = ronn @@ -309,15 +379,34 @@ preview-man: clean-man man man ./zstdgrep.1 man ./zstdless.1 + +# Generate .h dependencies automatically + +DEPFLAGS = -MT $@ -MMD -MP -MF + +$(BUILD_DIR)/%.o : %.c $(BUILD_DIR)/%.d | $(BUILD_DIR) + @echo CC $@ + $(COMPILE.c) $(DEPFLAGS) $(BUILD_DIR)/$*.d $(OUTPUT_OPTION) $< + +MKDIR ?= mkdir +$(BUILD_DIR): ; $(MKDIR) -p $@ + +DEPFILES := $(ZSTD_OBJ:.o=.d) +$(DEPFILES): + +include $(wildcard $(DEPFILES)) + + + #----------------------------------------------------------------------------- # make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) +ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0) EGREP_OPTIONS ?= ifeq ($HAVE_COLORNEVER, 1) -EGREP_OPTIONS += --color=never + EGREP_OPTIONS += --color=never endif EGREP = egrep $(EGREP_OPTIONS) AWK = awk @@ -328,7 +417,7 @@ AWK = awk ## list: Print all targets and their descriptions (if provided) .PHONY: list list: - @TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \ + TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \ | $(AWK) -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' \ | $(EGREP) -v -e '^[^[:alnum:]]' | sort); \ { \ @@ -355,17 +444,17 @@ datarootdir ?= $(PREFIX)/share mandir ?= $(datarootdir)/man man1dir ?= $(mandir)/man1 -ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly SunOS)) -MANDIR ?= $(PREFIX)/man -MAN1DIR ?= $(MANDIR)/man1 +ifneq (,$(filter $(UNAME),OpenBSD FreeBSD NetBSD DragonFly SunOS)) + MANDIR ?= $(PREFIX)/man + MAN1DIR ?= $(MANDIR)/man1 else -MAN1DIR ?= $(man1dir) + MAN1DIR ?= $(man1dir) endif -ifneq (,$(filter $(shell uname),SunOS)) -INSTALL ?= ginstall +ifneq (,$(filter $(UNAME),SunOS)) + INSTALL ?= ginstall else -INSTALL ?= install + INSTALL ?= install endif INSTALL_PROGRAM ?= $(INSTALL) @@ -374,36 +463,39 @@ INSTALL_DATA ?= $(INSTALL) -m 644 INSTALL_MAN ?= $(INSTALL_DATA) .PHONY: install -install: zstd +install: + # generate zstd only if not already present + [ -e zstd ] || $(MAKE) zstd-release + [ -e $(DESTDIR)$(BINDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ + [ -e $(DESTDIR)$(MAN1DIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(MAN1DIR)/ @echo Installing binaries - @$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MAN1DIR)/ - @$(INSTALL_PROGRAM) zstd$(EXT) $(DESTDIR)$(BINDIR)/zstd$(EXT) - @ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdcat$(EXT) - @ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/unzstd$(EXT) - @ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdmt$(EXT) - @$(INSTALL_SCRIPT) zstdless $(DESTDIR)$(BINDIR)/zstdless - @$(INSTALL_SCRIPT) zstdgrep $(DESTDIR)$(BINDIR)/zstdgrep + $(INSTALL_PROGRAM) zstd$(EXT) $(DESTDIR)$(BINDIR)/zstd$(EXT) + ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdcat$(EXT) + ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/unzstd$(EXT) + ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdmt$(EXT) + $(INSTALL_SCRIPT) zstdless $(DESTDIR)$(BINDIR)/zstdless + $(INSTALL_SCRIPT) zstdgrep $(DESTDIR)$(BINDIR)/zstdgrep @echo Installing man pages - @$(INSTALL_MAN) zstd.1 $(DESTDIR)$(MAN1DIR)/zstd.1 - @ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/zstdcat.1 - @ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/unzstd.1 - @$(INSTALL_MAN) zstdgrep.1 $(DESTDIR)$(MAN1DIR)/zstdgrep.1 - @$(INSTALL_MAN) zstdless.1 $(DESTDIR)$(MAN1DIR)/zstdless.1 + $(INSTALL_MAN) zstd.1 $(DESTDIR)$(MAN1DIR)/zstd.1 + ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/zstdcat.1 + ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/unzstd.1 + $(INSTALL_MAN) zstdgrep.1 $(DESTDIR)$(MAN1DIR)/zstdgrep.1 + $(INSTALL_MAN) zstdless.1 $(DESTDIR)$(MAN1DIR)/zstdless.1 @echo zstd installation completed .PHONY: uninstall uninstall: - @$(RM) $(DESTDIR)$(BINDIR)/zstdgrep - @$(RM) $(DESTDIR)$(BINDIR)/zstdless - @$(RM) $(DESTDIR)$(BINDIR)/zstdcat - @$(RM) $(DESTDIR)$(BINDIR)/unzstd - @$(RM) $(DESTDIR)$(BINDIR)/zstdmt - @$(RM) $(DESTDIR)$(BINDIR)/zstd - @$(RM) $(DESTDIR)$(MAN1DIR)/zstdless.1 - @$(RM) $(DESTDIR)$(MAN1DIR)/zstdgrep.1 - @$(RM) $(DESTDIR)$(MAN1DIR)/zstdcat.1 - @$(RM) $(DESTDIR)$(MAN1DIR)/unzstd.1 - @$(RM) $(DESTDIR)$(MAN1DIR)/zstd.1 + $(RM) $(DESTDIR)$(BINDIR)/zstdgrep + $(RM) $(DESTDIR)$(BINDIR)/zstdless + $(RM) $(DESTDIR)$(BINDIR)/zstdcat + $(RM) $(DESTDIR)$(BINDIR)/unzstd + $(RM) $(DESTDIR)$(BINDIR)/zstdmt + $(RM) $(DESTDIR)$(BINDIR)/zstd + $(RM) $(DESTDIR)$(MAN1DIR)/zstdless.1 + $(RM) $(DESTDIR)$(MAN1DIR)/zstdgrep.1 + $(RM) $(DESTDIR)$(MAN1DIR)/zstdcat.1 + $(RM) $(DESTDIR)$(MAN1DIR)/unzstd.1 + $(RM) $(DESTDIR)$(MAN1DIR)/zstd.1 @echo zstd programs successfully uninstalled endif diff --git a/programs/README.md b/programs/README.md index 53706de727b7..cf7f5ba46d12 100644 --- a/programs/README.md +++ b/programs/README.md @@ -3,7 +3,7 @@ Command Line Interface for Zstandard library Command Line Interface (CLI) can be created using the `make` command without any additional parameters. There are however other Makefile targets that create different variations of CLI: -- `zstd` : default CLI supporting gzip-like arguments; includes dictionary builder, benchmark, and support for decompression of legacy zstd formats +- `zstd` : default CLI supporting gzip-like arguments; includes dictionary builder, benchmark, and supports decompression of legacy zstd formats - `zstd_nolegacy` : Same as `zstd` but without support for legacy zstd formats - `zstd-small` : CLI optimized for minimal size; no dictionary builder, no benchmark, and no support for legacy zstd formats - `zstd-compress` : version of CLI which can only compress into zstd format @@ -147,71 +147,91 @@ FILE : a filename Arguments : -# : # compression level (1-19, default: 3) -d : decompression - -D file: use `file` as Dictionary - -o file: result stored into `file` (only if 1 input file) - -f : overwrite output without prompting and (de)compress links + -D DICT: use DICT as Dictionary for compression or decompression + -o file: result stored into `file` (only 1 output file) + -f : overwrite output without prompting, also (de)compress links --rm : remove source file(s) after successful de/compression -k : preserve source file(s) (default) -h/-H : display help/long help and exit Advanced arguments : -V : display Version number and exit + -c : force write to standard output, even if it is the console -v : verbose mode; specify multiple times to increase verbosity -q : suppress warnings; specify twice to suppress errors too - -c : force write to standard output, even if it is the console - -l : print information about zstd compressed files ---exclude-compressed: only compress files that are not previously compressed +--no-progress : do not display the progress counter + -r : operate recursively on directories +--filelist FILE : read list of files to operate upon from FILE +--output-dir-flat DIR : processed files are stored into DIR +--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure +--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled). If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate). +-- : All arguments after "--" are treated as files + +Advanced compression arguments : --ultra : enable levels beyond 19, up to 22 (requires more memory) --long[=#]: enable long distance matching with given window log (default: 27) --fast[=#]: switch to very fast compression levels (default: 1) --adapt : dynamically adapt compression level to I/O conditions ---stream-size=# : optimize compression parameters for streaming input of given number of bytes ---size-hint=# optimize compression parameters for streaming input of approximately this size ---target-compressed-block-size=# : make compressed block near targeted size -T# : spawns # compression threads (default: 1, 0==# cores) -B# : select size of each job (default: 0==automatic) +--single-thread : use a single thread for both I/O and compression (result slightly different than -T1) --rsyncable : compress using a rsync-friendly method (-B sets block size) ---no-dictID : don't write dictID into header (dictionary compression) ---[no-]check : integrity check (default: enabled) +--exclude-compressed: only compress files that are not already compressed +--stream-size=# : specify size of streaming input from `stdin` +--size-hint=# optimize compression parameters for streaming input of approximately this size +--target-compressed-block-size=# : generate compressed block of approximately targeted size +--no-dictID : don't write dictID into header (dictionary compression only) --[no-]compress-literals : force (un)compressed literals - -r : operate recursively on directories ---output-dir-flat[=directory]: all resulting files stored into `directory`. --format=zstd : compress files to the .zst format (default) --format=gzip : compress files to the .gz format +--format=xz : compress files to the .xz format +--format=lzma : compress files to the .lzma format +--format=lz4 : compress files to the .lz4 format + +Advanced decompression arguments : + -l : print information about zstd compressed files --test : test compressed file integrity ---[no-]sparse : sparse mode (default: disabled) -M# : Set a memory usage limit for decompression ---no-progress : do not display the progress bar --- : All arguments after "--" are treated as files +--[no-]sparse : sparse mode (default: disabled) Dictionary builder : --train ## : create a dictionary from a training set of files --train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args --train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args --train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9) - -o file : `file` is dictionary name (default: dictionary) + -o DICT : DICT is dictionary name (default: dictionary) --maxdict=# : limit dictionary to specified size (default: 112640) --dictID=# : force dictionary ID to specified value (default: random) Benchmark arguments : -b# : benchmark file(s), using # compression level (default: 3) - -e# : test all compression levels from -bX to # (default: 1) + -e# : test all compression levels successively from -b# to -e# (default: 1) -i# : minimum evaluation time in seconds (default: 3s) -B# : cut file into independent blocks of size # (default: no block) + -S : output one benchmark result per input file (default: consolidated result) --priority=rt : set process priority to real-time ``` ### Passing parameters through Environment Variables +There is no "generic" way to pass "any kind of parameter" to `zstd` in a pass-through manner. +Using environment variables for this purpose has security implications. +Therefore, this avenue is intentionally restricted and only supports `ZSTD_CLEVEL` and `ZSTD_NBTHREADS`. + `ZSTD_CLEVEL` can be used to modify the default compression level of `zstd` (usually set to `3`) to another value between 1 and 19 (the "normal" range). -This can be useful when `zstd` CLI is invoked in a way that doesn't allow passing arguments. + +`ZSTD_NBTHREADS` can be used to specify a number of threads +that `zstd` will use for compression, which by default is `1`. +This functionality only exists when `zstd` is compiled with multithread support. +`0` means "use as many threads as detected cpu cores on local system". +The max # of threads is capped at: `ZSTDMT_NBWORKERS_MAX==200`. + +This functionality can be useful when `zstd` CLI is invoked in a way that doesn't allow passing arguments. One such scenario is `tar --zstd`. -As `ZSTD_CLEVEL` only replaces the default compression level, -it can then be overridden by corresponding command line arguments. +As `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` only replace the default compression level +and number of threads respectively, they can both be overridden by corresponding command line arguments: +`-#` for compression level and `-T#` for number of threads. -There is no "generic" way to pass "any kind of parameter" to `zstd` in a pass-through manner. -Using environment variables for this purpose has security implications. -Therefore, this avenue is intentionally restricted and only supports `ZSTD_CLEVEL`. ### Long distance matching mode The long distance matching mode, enabled with `--long`, is designed to improve diff --git a/programs/dibio.c b/programs/dibio.c index 463095a8e813..cb3829e3e59b 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -301,7 +301,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n"); EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */ } - if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) { + if (fs.totalSizeToLoad < (unsigned long long)maxDictSize * 8) { DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n"); DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n"); } diff --git a/programs/fileio.c b/programs/fileio.c index d72879d64eae..65f2d531a81d 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -321,6 +321,25 @@ struct FIO_prefs_s { int contentSize; }; +/*-************************************* +* Parameters: FIO_ctx_t +***************************************/ + +/* typedef'd to FIO_ctx_t within fileio.h */ +struct FIO_ctx_s { + + /* file i/o info */ + int nbFilesTotal; + int hasStdinInput; + int hasStdoutOutput; + + /* file i/o state */ + int currFileIdx; + int nbFilesProcessed; + size_t totalBytesInput; + size_t totalBytesOutput; +}; + /*-************************************* * Parameters: Initialization @@ -363,11 +382,31 @@ FIO_prefs_t* FIO_createPreferences(void) return ret; } +FIO_ctx_t* FIO_createContext(void) +{ + FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t)); + if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); + + ret->currFileIdx = 0; + ret->hasStdinInput = 0; + ret->hasStdoutOutput = 0; + ret->nbFilesTotal = 1; + ret->nbFilesProcessed = 0; + ret->totalBytesInput = 0; + ret->totalBytesOutput = 0; + return ret; +} + void FIO_freePreferences(FIO_prefs_t* const prefs) { free(prefs); } +void FIO_freeContext(FIO_ctx_t* const fCtx) +{ + free(fCtx); +} + /*-************************************* * Parameters: Display Options @@ -382,6 +421,8 @@ void FIO_setNoProgress(unsigned noProgress) { g_display_prefs.noProgress = noPro * Parameters: Setters ***************************************/ +/* FIO_prefs_t functions */ + void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; } void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; } @@ -495,21 +536,49 @@ void FIO_setContentSize(FIO_prefs_t* const prefs, int value) prefs->contentSize = value != 0; } +/* FIO_ctx_t functions */ + +void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) { + fCtx->hasStdoutOutput = value; +} + +void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value) +{ + fCtx->nbFilesTotal = value; +} + +void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) { + size_t i = 0; + for ( ; i < filenames->tableSize; ++i) { + if (!strcmp(stdinmark, filenames->fileNames[i])) { + fCtx->hasStdinInput = 1; + return; + } + } +} + /*-************************************* * Functions ***************************************/ -/** FIO_remove() : +/** FIO_removeFile() : * @result : Unlink `fileName`, even if it's read-only */ -static int FIO_remove(const char* path) +static int FIO_removeFile(const char* path) { - if (!UTIL_isRegularFile(path)) { - DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s \n", path); + stat_t statbuf; + if (!UTIL_stat(path, &statbuf)) { + DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path); + return 0; + } + if (!UTIL_isRegularFileStat(&statbuf)) { + DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path); return 0; } #if defined(_WIN32) || defined(WIN32) /* windows doesn't allow remove read-only files, * so try to make it writable first */ - UTIL_chmod(path, _S_IWRITE); + if (!(statbuf.st_mode & _S_IWRITE)) { + UTIL_chmod(path, &statbuf, _S_IWRITE); + } #endif return remove(path); } @@ -519,6 +588,7 @@ static int FIO_remove(const char* path) * @result : FILE* to `srcFileName`, or NULL if it fails */ static FILE* FIO_openSrcFile(const char* srcFileName) { + stat_t statbuf; assert(srcFileName != NULL); if (!strcmp (srcFileName, stdinmark)) { DISPLAYLEVEL(4,"Using stdin for input \n"); @@ -526,14 +596,14 @@ static FILE* FIO_openSrcFile(const char* srcFileName) return stdin; } - if (!UTIL_fileExist(srcFileName)) { + if (!UTIL_stat(srcFileName, &statbuf)) { DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n", srcFileName, strerror(errno)); return NULL; } - if (!UTIL_isRegularFile(srcFileName) - && !UTIL_isFIFO(srcFileName) + if (!UTIL_isRegularFileStat(&statbuf) + && !UTIL_isFIFOStat(&statbuf) ) { DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", srcFileName); @@ -551,8 +621,8 @@ static FILE* FIO_openSrcFile(const char* srcFileName) * condition : `dstFileName` must be non-NULL. * @result : FILE* to `dstFileName`, or NULL if it fails */ static FILE* -FIO_openDstFile(FIO_prefs_t* const prefs, - const char* srcFileName, const char* dstFileName) +FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs, + const char* srcFileName, const char* dstFileName) { if (prefs->testMode) return NULL; /* do not open file in test mode */ @@ -597,18 +667,12 @@ FIO_openDstFile(FIO_prefs_t* const prefs, dstFileName); return NULL; } - DISPLAY("zstd: %s already exists; overwrite (y/N) ? ", - dstFileName); - { int ch = getchar(); - if ((ch!='Y') && (ch!='y')) { - DISPLAY(" not overwritten \n"); - return NULL; - } - /* flush rest of input line */ - while ((ch!=EOF) && (ch!='\n')) ch = getchar(); - } } + DISPLAY("zstd: %s already exists; ", dstFileName); + if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput)) + return NULL; + } /* need to unlink */ - FIO_remove(dstFileName); + FIO_removeFile(dstFileName); } } { FILE* const f = fopen( dstFileName, "wb" ); @@ -618,13 +682,12 @@ FIO_openDstFile(FIO_prefs_t* const prefs, && strcmp (srcFileName, stdinmark) && strcmp(dstFileName, nulmark) ) { /* reduce rights on newly created dst file while compression is ongoing */ - UTIL_chmod(dstFileName, 00600); + UTIL_chmod(dstFileName, NULL, 00600); } return f; } } - /*! FIO_createDictBuffer() : * creates a buffer, pointed by `*bufferPtr`, * loads `filename` content into it, up to DICTSIZE_MAX bytes. @@ -669,15 +732,9 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_p * Checks for and warns if there are any files that would have the same output path */ int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { - const char **filenameTableSorted, *c, *prevElem, *filename; + const char **filenameTableSorted, *prevElem, *filename; unsigned u; - #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ - c = "\\"; - #else - c = "/"; - #endif - filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles); if (!filenameTableSorted) { DISPLAY("Unable to malloc new str array, not checking for name collisions\n"); @@ -685,7 +742,7 @@ int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { } for (u = 0; u < nbFiles; ++u) { - filename = strrchr(filenameTable[u], c[0]); + filename = strrchr(filenameTable[u], PATH_SEP); if (filename == NULL) { filenameTableSorted[u] = filenameTable[u]; } else { @@ -771,12 +828,57 @@ static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs, unsigned long long const maxSrcFileSize) { unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize)); + unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX); + if (maxSize == UTIL_FILESIZE_UNKNOWN) + EXM_THROW(42, "Using --patch-from with stdin requires --stream-size"); assert(maxSize != UTIL_FILESIZE_UNKNOWN); - if (maxSize > UINT_MAX) - EXM_THROW(42, "Can't handle files larger than %u GB\n", UINT_MAX/(1 GB) + 1); + if (maxSize > maxWindowSize) + EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB)); FIO_setMemLimit(prefs, (unsigned)maxSize); } +/* FIO_removeMultiFilesWarning() : + * Returns 1 if the console should abort, 0 if console should proceed. + * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts. + * + * If -f is specified, or there is just 1 file, zstd will always proceed as usual. + * If --rm is specified, there will be a prompt asking for user confirmation. + * If -f is specified with --rm, zstd will proceed as usual + * If -q is specified with --rm, zstd will abort pre-emptively + * If neither flag is specified, zstd will prompt the user for confirmation to proceed. + * If --rm is not specified, then zstd will print a warning to the user (which can be silenced with -q). + * However, if the output is stdout, we will always abort rather than displaying the warning prompt. + */ +static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* const prefs, const char* outFileName, int displayLevelCutoff) +{ + int error = 0; + if (fCtx->nbFilesTotal > 1 && !prefs->overwrite) { + if (g_display_prefs.displayLevel <= displayLevelCutoff) { + if (prefs->removeSrcFile) { + DISPLAYLEVEL(1, "zstd: Aborting... not deleting files and processing into dst: %s", outFileName); + error = 1; + } + } else { + if (!strcmp(outFileName, stdoutmark)) { + DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. "); + } else { + DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s ", outFileName); + } + DISPLAYLEVEL(2, "\nThe concatenated output CANNOT regenerate the original directory tree. ") + if (prefs->removeSrcFile) { + if (fCtx->hasStdoutOutput) { + DISPLAYLEVEL(1, "\nAborting. Use -f if you really want to delete the files and output to stdout"); + error = 1; + } else { + error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput); + } + } + } + DISPLAY("\n"); + } + return error; +} + #ifndef ZSTD_NOCOMPRESS /* ********************************************************************** @@ -807,9 +909,9 @@ static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs, if (fileWindowLog > ZSTD_WINDOWLOG_MAX) DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n"); comprParams->windowLog = MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog); - if (fileWindowLog > ZSTD_cycleLog(cParams.hashLog, cParams.strategy)) { + if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) { if (!prefs->ldmFlag) - DISPLAYLEVEL(1, "long mode automaticaly triggered\n"); + DISPLAYLEVEL(1, "long mode automatically triggered\n"); FIO_setLdmFlag(prefs, 1); } if (cParams.strategy >= ZSTD_btopt) { @@ -838,8 +940,10 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, /* need to update memLimit before calling createDictBuffer * because of memLimit check inside it */ - if (prefs->patchFromMode) - FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), maxSrcFileSize, cLevel); + if (prefs->patchFromMode) { + unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize; + FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel); + } ress.dstBuffer = malloc(ress.dstBufferSize); ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */ if (!ress.srcBuffer || !ress.dstBuffer) @@ -881,6 +985,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) ); /* multi-threading */ #ifdef ZSTD_MULTITHREAD DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers); @@ -902,12 +1007,12 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, return ress; } -static void FIO_freeCResources(cRess_t ress) +static void FIO_freeCResources(const cRess_t* const ress) { - free(ress.srcBuffer); - free(ress.dstBuffer); - free(ress.dictBuffer); - ZSTD_freeCStream(ress.cctx); /* never fails */ + free(ress->srcBuffer); + free(ress->dstBuffer); + free(ress->dictBuffer); + ZSTD_freeCStream(ress->cctx); /* never fails */ } @@ -1172,7 +1277,8 @@ FIO_compressLz4Frame(cRess_t* ress, static unsigned long long -FIO_compressZstdFrame(FIO_prefs_t* const prefs, +FIO_compressZstdFrame(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, const cRess_t* ressPtr, const char* srcFileName, U64 fileSize, int compressionLevel, U64* readsize) @@ -1254,11 +1360,24 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs, (unsigned)(zfp.consumed >> 20), (unsigned)(zfp.produced >> 20), cShare ); - } else { /* summarized notifications if == 2; */ - DISPLAYLEVEL(2, "\rRead : %u ", (unsigned)(zfp.consumed >> 20)); + } else { /* summarized notifications if == 2 */ + DISPLAYLEVEL(2, "\r%79s\r", ""); /* Clear out the current displayed line */ + if (fCtx->nbFilesTotal > 1) { + size_t srcFileNameSize = strlen(srcFileName); + /* Ensure that the string we print is roughly the same size each time */ + if (srcFileNameSize > 18) { + const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; + DISPLAYLEVEL(2, "Compress: %u/%u files. Current: ...%s ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName); + } else { + DISPLAYLEVEL(2, "Compress: %u/%u files. Current: %*s ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName); + } + } + DISPLAYLEVEL(2, "Read : %2u ", (unsigned)(zfp.consumed >> 20)); if (fileSize != UTIL_FILESIZE_UNKNOWN) - DISPLAYLEVEL(2, "/ %u ", (unsigned)(fileSize >> 20)); - DISPLAYLEVEL(2, "MB ==> %2.f%% ", cShare); + DISPLAYLEVEL(2, "/ %2u ", (unsigned)(fileSize >> 20)); + DISPLAYLEVEL(2, "MB ==> %2.f%%", cShare); DELAY_NEXT_UPDATE(); } @@ -1369,7 +1488,8 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs, * 1 : missing or pb opening srcFileName */ static int -FIO_compressFilename_internal(FIO_prefs_t* const prefs, +FIO_compressFilename_internal(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, cRess_t ress, const char* dstFileName, const char* srcFileName, int compressionLevel) @@ -1385,7 +1505,7 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs, switch (prefs->compressionType) { default: case FIO_zstdCompression: - compressedfilesize = FIO_compressZstdFrame(prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize); + compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize); break; case FIO_gzipCompression: @@ -1421,18 +1541,24 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs, } /* Status */ + fCtx->totalBytesInput += (size_t)readsize; + fCtx->totalBytesOutput += (size_t)compressedfilesize; DISPLAYLEVEL(2, "\r%79s\r", ""); - if (readsize == 0) { - DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n", - srcFileName, - (unsigned long long)readsize, (unsigned long long) compressedfilesize, - dstFileName); - } else { - DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", - srcFileName, - (double)compressedfilesize / readsize * 100, - (unsigned long long)readsize, (unsigned long long) compressedfilesize, - dstFileName); + if (g_display_prefs.displayLevel >= 2 && + !fCtx->hasStdoutOutput && + (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) { + if (readsize == 0) { + DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n", + srcFileName, + (unsigned long long)readsize, (unsigned long long) compressedfilesize, + dstFileName); + } else { + DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", + srcFileName, + (double)compressedfilesize / readsize * 100, + (unsigned long long)readsize, (unsigned long long) compressedfilesize, + dstFileName); + } } /* Elapsed Time and CPU Load */ @@ -1457,7 +1583,8 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs, * @return : 0 : compression completed correctly, * 1 : pb */ -static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, +static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, cRess_t ress, const char* dstFileName, const char* srcFileName, @@ -1471,7 +1598,7 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, if (ress.dstFile == NULL) { closeDstFile = 1; DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName); - ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName); + ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName); if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ /* Must only be added after FIO_openDstFile() succeeds. * Otherwise we may delete the destination file if it already exists, @@ -1480,11 +1607,12 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, addHandler(dstFileName); if ( strcmp (srcFileName, stdinmark) - && UTIL_getFileStat(srcFileName, &statbuf)) + && UTIL_stat(srcFileName, &statbuf) + && UTIL_isRegularFileStat(&statbuf) ) transfer_permissions = 1; } - result = FIO_compressFilename_internal(prefs, ress, dstFileName, srcFileName, compressionLevel); + result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); if (closeDstFile) { FILE* const dstFile = ress.dstFile; @@ -1498,14 +1626,11 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, result=1; } if ( (result != 0) /* operation failure */ - && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null */ && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ ) { - FIO_remove(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */ - } else if ( strcmp(dstFileName, stdoutmark) - && strcmp(dstFileName, nulmark) - && transfer_permissions) { - DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: transfering permissions into dst: %s \n", dstFileName); + FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */ + } else if (transfer_permissions) { + DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: transferring permissions into dst: %s \n", dstFileName); UTIL_setFileStat(dstFileName, &statbuf); } else { DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: do not transfer permissions into dst: %s \n", dstFileName); @@ -1536,7 +1661,8 @@ static const char *compressedFileExtensions[] = { * 1 : missing or pb opening srcFileName */ static int -FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, +FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, cRess_t ress, const char* dstFileName, const char* srcFileName, @@ -1569,7 +1695,7 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, ress.srcFile = FIO_openSrcFile(srcFileName); if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ - result = FIO_compressFilename_dstFile(prefs, ress, dstFileName, srcFileName, compressionLevel); + result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); fclose(ress.srcFile); ress.srcFile = NULL; @@ -1581,21 +1707,22 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, * delete both the source and destination files. */ clearHandler(); - if (FIO_remove(srcFileName)) + if (FIO_removeFile(srcFileName)) EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); } return result; } -int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dstFileName, +int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName, const char* srcFileName, const char* dictFileName, int compressionLevel, ZSTD_compressionParameters comprParams) { cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams); - int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); + int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); +#define DISPLAY_LEVEL_DEFAULT 2 - FIO_freeCResources(ress); + FIO_freeCResources(&ress); return result; } @@ -1656,45 +1783,79 @@ static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsig * or into one file each (outFileName == NULL, but suffix != NULL), * or into a destination folder (specified with -O) */ -int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, - const char** inFileNamesTable, unsigned nbFiles, +int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** inFileNamesTable, + const char* outMirroredRootDirName, const char* outDirName, const char* outFileName, const char* suffix, const char* dictFileName, int compressionLevel, ZSTD_compressionParameters comprParams) { + int status; int error = 0; cRess_t ress = FIO_createCResources(prefs, dictFileName, - FIO_getLargestFileSize(inFileNamesTable, nbFiles), + FIO_getLargestFileSize(inFileNamesTable, fCtx->nbFilesTotal), compressionLevel, comprParams); /* init */ assert(outFileName != NULL || suffix != NULL); if (outFileName != NULL) { /* output into a single destination (stdout typically) */ - ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName); + if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { + FIO_freeCResources(&ress); + return 1; + } + ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName); if (ress.dstFile == NULL) { /* could not open outFileName */ error = 1; } else { - unsigned u; - for (u=0; u<nbFiles; u++) - error |= FIO_compressFilename_srcFile(prefs, ress, outFileName, inFileNamesTable[u], compressionLevel); + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { + status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } if (fclose(ress.dstFile)) EXM_THROW(29, "Write error (%s) : cannot properly close %s", strerror(errno), outFileName); ress.dstFile = NULL; } } else { - unsigned u; - for (u=0; u<nbFiles; u++) { - const char* const srcFileName = inFileNamesTable[u]; - const char* const dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */ - error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); + if (outMirroredRootDirName) + UTIL_mirrorSourceFilesDirectories(inFileNamesTable, fCtx->nbFilesTotal, outMirroredRootDirName); + + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { + const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx]; + const char* dstFileName = NULL; + if (outMirroredRootDirName) { + char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); + if (validMirroredDirName) { + dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix); + free(validMirroredDirName); + } else { + DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName); + error=1; + continue; + } + } else { + dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */ + } + status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + if (!status) fCtx->nbFilesProcessed++; + error |= status; } + if (outDirName) - FIO_checkFilenameCollisions(inFileNamesTable ,nbFiles); + FIO_checkFilenameCollisions(inFileNamesTable , fCtx->nbFilesTotal); + } + + if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) { + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2, "%d files compressed : %.2f%% (%6zu => %6zu bytes)\n", fCtx->nbFilesProcessed, + (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100, + fCtx->totalBytesInput, fCtx->totalBytesOutput); } - FIO_freeCResources(ress); + FIO_freeCResources(&ress); return error; } @@ -1730,6 +1891,8 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi if (ress.dctx==NULL) EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); + CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag)); + ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_DStreamOutSize(); @@ -1923,7 +2086,7 @@ FIO_zstdErrorHelp(const FIO_prefs_t* const prefs, */ #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) static unsigned long long -FIO_decompressZstdFrame(dRess_t* ress, FILE* finput, +FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, const FIO_prefs_t* const prefs, const char* srcFileName, U64 alreadyDecoded) /* for multi-frames streams */ @@ -1961,8 +2124,22 @@ FIO_decompressZstdFrame(dRess_t* ress, FILE* finput, /* Write block */ storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips); frameSize += outBuff.pos; - DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ", - srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); + if (!fCtx->hasStdoutOutput) { + if (fCtx->nbFilesTotal > 1) { + size_t srcFileNameSize = strlen(srcFileName); + if (srcFileNameSize > 18) { + const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; + DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: ...%s : %u MB... ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); + } else { + DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: %s : %u MB... ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); + } + } else { + DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ", + srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); + } + } if (inBuff.pos > 0) { memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos); @@ -2220,7 +2397,8 @@ FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, * @return : 0 : OK * 1 : error */ -static int FIO_decompressFrames(dRess_t ress, FILE* srcFile, +static int FIO_decompressFrames(FIO_ctx_t* const fCtx, + dRess_t ress, FILE* srcFile, const FIO_prefs_t* const prefs, const char* dstFileName, const char* srcFileName) { @@ -2249,7 +2427,7 @@ static int FIO_decompressFrames(dRess_t ress, FILE* srcFile, return 1; } if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) { - unsigned long long const frameSize = FIO_decompressZstdFrame(&ress, srcFile, prefs, srcFileName, filesize); + unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ @@ -2291,8 +2469,14 @@ static int FIO_decompressFrames(dRess_t ress, FILE* srcFile, } } /* for each frame */ /* Final Status */ + fCtx->totalBytesOutput += (size_t)filesize; DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); + /* No status message in pipe mode (stdin - stdout) or multi-files mode */ + if (g_display_prefs.displayLevel >= 2) { + if (fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3) { + DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); + } + } return 0; } @@ -2304,7 +2488,8 @@ static int FIO_decompressFrames(dRess_t ress, FILE* srcFile, @return : 0 : OK 1 : operation aborted */ -static int FIO_decompressDstFile(FIO_prefs_t* const prefs, +static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, dRess_t ress, FILE* srcFile, const char* dstFileName, const char* srcFileName) { @@ -2316,7 +2501,7 @@ static int FIO_decompressDstFile(FIO_prefs_t* const prefs, if ((ress.dstFile == NULL) && (prefs->testMode==0)) { releaseDstFile = 1; - ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName); + ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName); if (ress.dstFile==NULL) return 1; /* Must only be added after FIO_openDstFile() succeeds. @@ -2326,11 +2511,12 @@ static int FIO_decompressDstFile(FIO_prefs_t* const prefs, addHandler(dstFileName); if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */ - && UTIL_getFileStat(srcFileName, &statbuf) ) + && UTIL_stat(srcFileName, &statbuf) + && UTIL_isRegularFileStat(&statbuf) ) transfer_permissions = 1; } - result = FIO_decompressFrames(ress, srcFile, prefs, dstFileName, srcFileName); + result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName); if (releaseDstFile) { FILE* const dstFile = ress.dstFile; @@ -2342,15 +2528,11 @@ static int FIO_decompressDstFile(FIO_prefs_t* const prefs, } if ( (result != 0) /* operation failure */ - && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null (#316) */ && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ ) { - FIO_remove(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */ - } else { /* operation success */ - if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */ - && strcmp(dstFileName, nulmark) /* special case : don't chmod /dev/null */ - && transfer_permissions ) /* file permissions correctly extracted from src */ - UTIL_setFileStat(dstFileName, &statbuf); /* transfer file permissions from src into dst */ + FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */ + } else if ( transfer_permissions /* file permissions correctly extracted from src */ ) { + UTIL_setFileStat(dstFileName, &statbuf); /* transfer file permissions from src into dst */ } } @@ -2363,7 +2545,7 @@ static int FIO_decompressDstFile(FIO_prefs_t* const prefs, @return : 0 : OK 1 : error */ -static int FIO_decompressSrcFile(FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName) +static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName) { FILE* srcFile; int result; @@ -2377,7 +2559,7 @@ static int FIO_decompressSrcFile(FIO_prefs_t* const prefs, dRess_t ress, const c if (srcFile==NULL) return 1; ress.srcBufferLoaded = 0; - result = FIO_decompressDstFile(prefs, ress, srcFile, dstFileName, srcFileName); + result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName); /* Close file */ if (fclose(srcFile)) { @@ -2391,7 +2573,7 @@ static int FIO_decompressSrcFile(FIO_prefs_t* const prefs, dRess_t ress, const c * delete both the source and destination files. */ clearHandler(); - if (FIO_remove(srcFileName)) { + if (FIO_removeFile(srcFileName)) { /* failed to remove src file */ DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); return 1; @@ -2401,13 +2583,13 @@ static int FIO_decompressSrcFile(FIO_prefs_t* const prefs, dRess_t ress, const c -int FIO_decompressFilename(FIO_prefs_t* const prefs, +int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName, const char* srcFileName, const char* dictFileName) { dRess_t const ress = FIO_createDResources(prefs, dictFileName); - int const decodingError = FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName); + int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); FIO_freeDResources(ress); return decodingError; @@ -2416,6 +2598,9 @@ int FIO_decompressFilename(FIO_prefs_t* const prefs, static const char *suffixList[] = { ZSTD_EXTENSION, TZSTD_EXTENSION, +#ifndef ZSTD_NODECOMPRESS + ZSTD_ALT_EXTENSION, +#endif #ifdef ZSTD_GZDECOMPRESS GZ_EXTENSION, TGZ_EXTENSION, @@ -2531,39 +2716,64 @@ FIO_determineDstName(const char* srcFileName, const char* outDirName) /* note : dstFileNameBuffer memory is not going to be free */ } - int -FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, - const char** srcNamesTable, unsigned nbFiles, +FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** srcNamesTable, + const char* outMirroredRootDirName, const char* outDirName, const char* outFileName, const char* dictFileName) { + int status; int error = 0; dRess_t ress = FIO_createDResources(prefs, dictFileName); if (outFileName) { - unsigned u; + if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { + FIO_freeDResources(ress); + return 1; + } if (!prefs->testMode) { - ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName); + ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName); if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); } - for (u=0; u<nbFiles; u++) - error |= FIO_decompressSrcFile(prefs, ress, outFileName, srcNamesTable[u]); + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { + status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } if ((!prefs->testMode) && (fclose(ress.dstFile))) EXM_THROW(72, "Write error : %s : cannot properly close output file", strerror(errno)); } else { - unsigned u; - for (u=0; u<nbFiles; u++) { /* create dstFileName */ - const char* const srcFileName = srcNamesTable[u]; - const char* const dstFileName = FIO_determineDstName(srcFileName, outDirName); - if (dstFileName == NULL) { error=1; continue; } + if (outMirroredRootDirName) + UTIL_mirrorSourceFilesDirectories(srcNamesTable, fCtx->nbFilesTotal, outMirroredRootDirName); - error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName); + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */ + const char* const srcFileName = srcNamesTable[fCtx->currFileIdx]; + const char* dstFileName = NULL; + if (outMirroredRootDirName) { + char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); + if (validMirroredDirName) { + dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName); + free(validMirroredDirName); + } else { + DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName); + } + } else { + dstFileName = FIO_determineDstName(srcFileName, outDirName); + } + if (dstFileName == NULL) { error=1; continue; } + status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); + if (!status) fCtx->nbFilesProcessed++; + error |= status; } if (outDirName) - FIO_checkFilenameCollisions(srcNamesTable ,nbFiles); + FIO_checkFilenameCollisions(srcNamesTable , fCtx->nbFilesTotal); } + + if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0) + DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput); FIO_freeDResources(ress); return error; diff --git a/programs/fileio.h b/programs/fileio.h index 2fbf01f82824..05e6d06815f7 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -44,6 +44,7 @@ extern "C" { #define ZSTD_EXTENSION ".zst" #define TZSTD_EXTENSION ".tzst" +#define ZSTD_ALT_EXTENSION ".zstd" /* allow decompression of .zstd files */ #define LZ4_EXTENSION ".lz4" #define TLZ4_EXTENSION ".tlz4" @@ -59,11 +60,18 @@ typedef struct FIO_prefs_s FIO_prefs_t; FIO_prefs_t* FIO_createPreferences(void); void FIO_freePreferences(FIO_prefs_t* const prefs); +/* Mutable struct containing relevant context and state regarding (de)compression with respect to file I/O */ +typedef struct FIO_ctx_s FIO_ctx_t; + +FIO_ctx_t* FIO_createContext(void); +void FIO_freeContext(FIO_ctx_t* const fCtx); + typedef struct FIO_display_prefs_s FIO_display_prefs_t; /*-************************************* * Parameters ***************************************/ +/* FIO_prefs_t functions */ void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType); void FIO_overwriteMode(FIO_prefs_t* const prefs); void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt); @@ -97,19 +105,24 @@ void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompresse void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value); void FIO_setContentSize(FIO_prefs_t* const prefs, int value); +/* FIO_ctx_t functions */ +void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value); +void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value); +void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames); + /*-************************************* * Single File functions ***************************************/ /** FIO_compressFilename() : * @return : 0 == ok; 1 == pb with src file. */ -int FIO_compressFilename (FIO_prefs_t* const prefs, +int FIO_compressFilename (FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* outfilename, const char* infilename, const char* dictFileName, int compressionLevel, ZSTD_compressionParameters comprParams); /** FIO_decompressFilename() : * @return : 0 == ok; 1 == pb with src file. */ -int FIO_decompressFilename (FIO_prefs_t* const prefs, +int FIO_decompressFilename (FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* outfilename, const char* infilename, const char* dictFileName); int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel); @@ -120,8 +133,10 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis ***************************************/ /** FIO_compressMultipleFilenames() : * @return : nb of missing files */ -int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, - const char** inFileNamesTable, unsigned nbFiles, +int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** inFileNamesTable, + const char* outMirroredDirName, const char* outDirName, const char* outFileName, const char* suffix, const char* dictFileName, int compressionLevel, @@ -129,8 +144,10 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, /** FIO_decompressMultipleFilenames() : * @return : nb of missing or skipped files */ -int FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, - const char** srcNamesTable, unsigned nbFiles, +int FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** srcNamesTable, + const char* outMirroredDirName, const char* outDirName, const char* outFileName, const char* dictFileName); diff --git a/programs/platform.h b/programs/platform.h index 2b4b9f2d8677..68be70bb333b 100644 --- a/programs/platform.h +++ b/programs/platform.h @@ -102,6 +102,12 @@ extern "C" { # define PLATFORM_POSIX_VERSION 1 # endif +# ifdef __UCLIBC__ +# ifndef __USE_MISC +# define __USE_MISC /* enable st_mtim on uclibc */ +# endif +# endif + # else /* non-unix target platform (like Windows) */ # define PLATFORM_POSIX_VERSION 0 # endif diff --git a/programs/timefn.h b/programs/timefn.h index eb3c130934eb..5d2818e8a1b7 100644 --- a/programs/timefn.h +++ b/programs/timefn.h @@ -28,7 +28,11 @@ extern "C" { ******************************************/ #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include <stdint.h> +# if defined(_AIX) +# include <inttypes.h> +# else +# include <stdint.h> /* intptr_t */ +# endif typedef uint64_t PTime; /* Precise Time */ #else typedef unsigned long long PTime; /* does not support compilers without long long support */ diff --git a/programs/util.c b/programs/util.c index ab1abd3b1862..5386d005c26c 100644 --- a/programs/util.c +++ b/programs/util.c @@ -28,7 +28,7 @@ extern "C" { # include <io.h> /* _chmod */ #else # include <unistd.h> /* chown, stat */ -# if PLATFORM_POSIX_VERSION < 200809L +# if PLATFORM_POSIX_VERSION < 200809L || !defined(st_mtime) # include <utime.h> /* utime */ # else # include <fcntl.h> /* AT_FDCWD */ @@ -45,7 +45,6 @@ extern "C" { # include <string.h> /* strerror, memcpy */ #endif /* #ifdef _WIN32 */ - /*-**************************************** * Internal Macros ******************************************/ @@ -88,6 +87,28 @@ UTIL_STATIC void* UTIL_realloc(void *ptr, size_t size) ******************************************/ int g_utilDisplayLevel; +int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg, + const char* acceptableLetters, int hasStdinInput) { + int ch, result; + + if (hasStdinInput) { + UTIL_DISPLAY("stdin is an input - not proceeding.\n"); + return 1; + } + + UTIL_DISPLAY("%s", prompt); + ch = getchar(); + result = 0; + if (strchr(acceptableLetters, ch) == NULL) { + UTIL_DISPLAY("%s", abortMsg); + result = 1; + } + /* flush the rest */ + while ((ch!=EOF) && (ch!='\n')) + ch = getchar(); + return result; +} + /*-************************************* * Constants @@ -100,48 +121,50 @@ int g_utilDisplayLevel; * Functions ***************************************/ -int UTIL_fileExist(const char* filename) +int UTIL_stat(const char* filename, stat_t* statbuf) { - stat_t statbuf; #if defined(_MSC_VER) - int const stat_error = _stat64(filename, &statbuf); + return !_stat64(filename, statbuf); +#elif defined(__MINGW32__) && defined (__MSVCRT__) + return !_stati64(filename, statbuf); #else - int const stat_error = stat(filename, &statbuf); + return !stat(filename, statbuf); #endif - return !stat_error; } int UTIL_isRegularFile(const char* infilename) { stat_t statbuf; - return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */ + return UTIL_stat(infilename, &statbuf) && UTIL_isRegularFileStat(&statbuf); } -int UTIL_getFileStat(const char* infilename, stat_t *statbuf) +int UTIL_isRegularFileStat(const stat_t* statbuf) { - int r; #if defined(_MSC_VER) - r = _stat64(infilename, statbuf); - if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */ + return (statbuf->st_mode & S_IFREG) != 0; #else - r = stat(infilename, statbuf); - if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */ + return S_ISREG(statbuf->st_mode) != 0; #endif - return 1; } /* like chmod, but avoid changing permission of /dev/null */ -int UTIL_chmod(char const* filename, mode_t permissions) +int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions) { - if (!strcmp(filename, "/dev/null")) return 0; /* pretend success, but don't change anything */ + stat_t localStatBuf; + if (statbuf == NULL) { + if (!UTIL_stat(filename, &localStatBuf)) return 0; + statbuf = &localStatBuf; + } + if (!UTIL_isRegularFileStat(statbuf)) return 0; /* pretend success, but don't change anything */ return chmod(filename, permissions); } -int UTIL_setFileStat(const char *filename, stat_t *statbuf) +int UTIL_setFileStat(const char *filename, const stat_t *statbuf) { int res = 0; - if (!UTIL_isRegularFile(filename)) + stat_t curStatBuf; + if (!UTIL_stat(filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf)) return -1; /* set access and modification times */ @@ -169,7 +192,7 @@ int UTIL_setFileStat(const char *filename, stat_t *statbuf) res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */ #endif - res += UTIL_chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */ + res += UTIL_chmod(filename, &curStatBuf, statbuf->st_mode & 07777); /* Copy file permissions */ errno = 0; return -res; /* number of errors is returned */ @@ -178,14 +201,16 @@ int UTIL_setFileStat(const char *filename, stat_t *statbuf) int UTIL_isDirectory(const char* infilename) { stat_t statbuf; + return UTIL_stat(infilename, &statbuf) && UTIL_isDirectoryStat(&statbuf); +} + +int UTIL_isDirectoryStat(const stat_t* statbuf) +{ #if defined(_MSC_VER) - int const r = _stat64(infilename, &statbuf); - if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; + return (statbuf->st_mode & _S_IFDIR) != 0; #else - int const r = stat(infilename, &statbuf); - if (!r && S_ISDIR(statbuf.st_mode)) return 1; + return S_ISDIR(statbuf->st_mode) != 0; #endif - return 0; } int UTIL_compareStr(const void *p1, const void *p2) { @@ -204,8 +229,8 @@ int UTIL_isSameFile(const char* fName1, const char* fName2) #else { stat_t file1Stat; stat_t file2Stat; - return UTIL_getFileStat(fName1, &file1Stat) - && UTIL_getFileStat(fName2, &file2Stat) + return UTIL_stat(fName1, &file1Stat) + && UTIL_stat(fName2, &file2Stat) && (file1Stat.st_dev == file2Stat.st_dev) && (file1Stat.st_ino == file2Stat.st_ino); } @@ -218,13 +243,23 @@ int UTIL_isFIFO(const char* infilename) /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ #if PLATFORM_POSIX_VERSION >= 200112L stat_t statbuf; - int const r = UTIL_getFileStat(infilename, &statbuf); - if (!r && S_ISFIFO(statbuf.st_mode)) return 1; + if (UTIL_stat(infilename, &statbuf) && UTIL_isFIFOStat(&statbuf)) return 1; #endif (void)infilename; return 0; } +/* UTIL_isFIFO : distinguish named pipes */ +int UTIL_isFIFOStat(const stat_t* statbuf) +{ +/* macro guards, as defined in : https://linux.die.net/man/2/lstat */ +#if PLATFORM_POSIX_VERSION >= 200112L + if (S_ISFIFO(statbuf->st_mode)) return 1; +#endif + (void)statbuf; + return 0; +} + int UTIL_isLink(const char* infilename) { /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ @@ -239,23 +274,22 @@ int UTIL_isLink(const char* infilename) U64 UTIL_getFileSize(const char* infilename) { - if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN; - { int r; + stat_t statbuf; + if (!UTIL_stat(infilename, &statbuf)) return UTIL_FILESIZE_UNKNOWN; + return UTIL_getFileSizeStat(&statbuf); +} + +U64 UTIL_getFileSizeStat(const stat_t* statbuf) +{ + if (!UTIL_isRegularFileStat(statbuf)) return UTIL_FILESIZE_UNKNOWN; #if defined(_MSC_VER) - struct __stat64 statbuf; - r = _stat64(infilename, &statbuf); - if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; + if (!(statbuf->st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; #elif defined(__MINGW32__) && defined (__MSVCRT__) - struct _stati64 statbuf; - r = _stati64(infilename, &statbuf); - if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; + if (!(statbuf->st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; #else - struct stat statbuf; - r = stat(infilename, &statbuf); - if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN; + if (!S_ISREG(statbuf->st_mode)) return UTIL_FILESIZE_UNKNOWN; #endif - return (U64)statbuf.st_size; - } + return (U64)statbuf->st_size; } @@ -332,11 +366,12 @@ UTIL_createFileNamesTable_fromFileName(const char* inputFileName) char* buf; size_t bufSize; size_t pos = 0; + stat_t statbuf; - if (!UTIL_fileExist(inputFileName) || !UTIL_isRegularFile(inputFileName)) + if (!UTIL_stat(inputFileName, &statbuf) || !UTIL_isRegularFileStat(&statbuf)) return NULL; - { U64 const inputFileSize = UTIL_getFileSize(inputFileName); + { U64 const inputFileSize = UTIL_getFileSizeStat(&statbuf); if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE) return NULL; bufSize = (size_t)(inputFileSize + 1); /* (+1) to add '\0' at the end of last filename */ @@ -633,6 +668,290 @@ const char* UTIL_getFileExtension(const char* infilename) return extension; } +static int pathnameHas2Dots(const char *pathname) +{ + return NULL != strstr(pathname, ".."); +} + +static int isFileNameValidForMirroredOutput(const char *filename) +{ + return !pathnameHas2Dots(filename); +} + + +#define DIR_DEFAULT_MODE 0755 +static mode_t getDirMode(const char *dirName) +{ + stat_t st; + if (!UTIL_stat(dirName, &st)) { + UTIL_DISPLAY("zstd: failed to get DIR stats %s: %s\n", dirName, strerror(errno)); + return DIR_DEFAULT_MODE; + } + if (!UTIL_isDirectoryStat(&st)) { + UTIL_DISPLAY("zstd: expected directory: %s\n", dirName); + return DIR_DEFAULT_MODE; + } + return st.st_mode; +} + +static int makeDir(const char *dir, mode_t mode) +{ +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) + int ret = _mkdir(dir); + (void) mode; +#else + int ret = mkdir(dir, mode); +#endif + if (ret != 0) { + if (errno == EEXIST) + return 0; + UTIL_DISPLAY("zstd: failed to create DIR %s: %s\n", dir, strerror(errno)); + } + return ret; +} + +/* this function requires a mutable input string */ +static void convertPathnameToDirName(char *pathname) +{ + size_t len = 0; + char* pos = NULL; + /* get dir name from pathname similar to 'dirname()' */ + assert(pathname != NULL); + + /* remove trailing '/' chars */ + len = strlen(pathname); + assert(len > 0); + while (pathname[len] == PATH_SEP) { + pathname[len] = '\0'; + len--; + } + if (len == 0) return; + + /* if input is a single file, return '.' instead. i.e. + * "xyz/abc/file.txt" => "xyz/abc" + "./file.txt" => "." + "file.txt" => "." + */ + pos = strrchr(pathname, PATH_SEP); + if (pos == NULL) { + pathname[0] = '.'; + pathname[1] = '\0'; + } else { + *pos = '\0'; + } +} + +/* pathname must be valid */ +static const char* trimLeadingRootChar(const char *pathname) +{ + assert(pathname != NULL); + if (pathname[0] == PATH_SEP) + return pathname + 1; + return pathname; +} + +/* pathname must be valid */ +static const char* trimLeadingCurrentDirConst(const char *pathname) +{ + assert(pathname != NULL); + if ((pathname[0] == '.') && (pathname[1] == PATH_SEP)) + return pathname + 2; + return pathname; +} + +static char* +trimLeadingCurrentDir(char *pathname) +{ + /* 'union charunion' can do const-cast without compiler warning */ + union charunion { + char *chr; + const char* cchr; + } ptr; + ptr.cchr = trimLeadingCurrentDirConst(pathname); + return ptr.chr; +} + +/* remove leading './' or '/' chars here */ +static const char * trimPath(const char *pathname) +{ + return trimLeadingRootChar( + trimLeadingCurrentDirConst(pathname)); +} + +static char* mallocAndJoin2Dir(const char *dir1, const char *dir2) +{ + const size_t dir1Size = strlen(dir1); + const size_t dir2Size = strlen(dir2); + char *outDirBuffer, *buffer, trailingChar; + + assert(dir1 != NULL && dir2 != NULL); + outDirBuffer = (char *) malloc(dir1Size + dir2Size + 2); + CONTROL(outDirBuffer != NULL); + + memcpy(outDirBuffer, dir1, dir1Size); + outDirBuffer[dir1Size] = '\0'; + + if (dir2[0] == '.') + return outDirBuffer; + + buffer = outDirBuffer + dir1Size; + trailingChar = *(buffer - 1); + if (trailingChar != PATH_SEP) { + *buffer = PATH_SEP; + buffer++; + } + memcpy(buffer, dir2, dir2Size); + buffer[dir2Size] = '\0'; + + return outDirBuffer; +} + +/* this function will return NULL if input srcFileName is not valid name for mirrored output path */ +char* UTIL_createMirroredDestDirName(const char* srcFileName, const char* outDirRootName) +{ + char* pathname = NULL; + if (!isFileNameValidForMirroredOutput(srcFileName)) + return NULL; + + pathname = mallocAndJoin2Dir(outDirRootName, trimPath(srcFileName)); + + convertPathnameToDirName(pathname); + return pathname; +} + +static int +mirrorSrcDir(char* srcDirName, const char* outDirName) +{ + mode_t srcMode; + int status = 0; + char* newDir = mallocAndJoin2Dir(outDirName, trimPath(srcDirName)); + if (!newDir) + return -ENOMEM; + + srcMode = getDirMode(srcDirName); + status = makeDir(newDir, srcMode); + free(newDir); + return status; +} + +static int +mirrorSrcDirRecursive(char* srcDirName, const char* outDirName) +{ + int status = 0; + char* pp = trimLeadingCurrentDir(srcDirName); + char* sp = NULL; + + while ((sp = strchr(pp, PATH_SEP)) != NULL) { + if (sp != pp) { + *sp = '\0'; + status = mirrorSrcDir(srcDirName, outDirName); + if (status != 0) + return status; + *sp = PATH_SEP; + } + pp = sp + 1; + } + status = mirrorSrcDir(srcDirName, outDirName); + return status; +} + +static void +makeMirroredDestDirsWithSameSrcDirMode(char** srcDirNames, unsigned nbFile, const char* outDirName) +{ + unsigned int i = 0; + for (i = 0; i < nbFile; i++) + mirrorSrcDirRecursive(srcDirNames[i], outDirName); +} + +static int +firstIsParentOrSameDirOfSecond(const char* firstDir, const char* secondDir) +{ + size_t firstDirLen = strlen(firstDir), + secondDirLen = strlen(secondDir); + return firstDirLen <= secondDirLen && + (secondDir[firstDirLen] == PATH_SEP || secondDir[firstDirLen] == '\0') && + 0 == strncmp(firstDir, secondDir, firstDirLen); +} + +static int compareDir(const void* pathname1, const void* pathname2) { + /* sort it after remove the leading '/' or './'*/ + const char* s1 = trimPath(*(char * const *) pathname1); + const char* s2 = trimPath(*(char * const *) pathname2); + return strcmp(s1, s2); +} + +static void +makeUniqueMirroredDestDirs(char** srcDirNames, unsigned nbFile, const char* outDirName) +{ + unsigned int i = 0, uniqueDirNr = 0; + char** uniqueDirNames = NULL; + + if (nbFile == 0) + return; + + uniqueDirNames = (char** ) malloc(nbFile * sizeof (char *)); + CONTROL(uniqueDirNames != NULL); + + /* if dirs is "a/b/c" and "a/b/c/d", we only need call: + * we just need "a/b/c/d" */ + qsort((void *)srcDirNames, nbFile, sizeof(char*), compareDir); + + uniqueDirNr = 1; + uniqueDirNames[uniqueDirNr - 1] = srcDirNames[0]; + for (i = 1; i < nbFile; i++) { + char* prevDirName = srcDirNames[i - 1]; + char* currDirName = srcDirNames[i]; + + /* note: we alwasy compare trimmed path, i.e.: + * src dir of "./foo" and "/foo" will be both saved into: + * "outDirName/foo/" */ + if (!firstIsParentOrSameDirOfSecond(trimPath(prevDirName), + trimPath(currDirName))) + uniqueDirNr++; + + /* we need maintain original src dir name instead of trimmed + * dir, so we can retrive the original src dir's mode_t */ + uniqueDirNames[uniqueDirNr - 1] = currDirName; + } + + makeMirroredDestDirsWithSameSrcDirMode(uniqueDirNames, uniqueDirNr, outDirName); + + free(uniqueDirNames); +} + +static void +makeMirroredDestDirs(char** srcFileNames, unsigned nbFile, const char* outDirName) +{ + unsigned int i = 0; + for (i = 0; i < nbFile; ++i) + convertPathnameToDirName(srcFileNames[i]); + makeUniqueMirroredDestDirs(srcFileNames, nbFile, outDirName); +} + +void UTIL_mirrorSourceFilesDirectories(const char** inFileNames, unsigned int nbFile, const char* outDirName) +{ + unsigned int i = 0, validFilenamesNr = 0; + char** srcFileNames = (char **) malloc(nbFile * sizeof (char *)); + CONTROL(srcFileNames != NULL); + + /* check input filenames is valid */ + for (i = 0; i < nbFile; ++i) { + if (isFileNameValidForMirroredOutput(inFileNames[i])) { + char* fname = STRDUP(inFileNames[i]); + CONTROL(fname != NULL); + srcFileNames[validFilenamesNr++] = fname; + } + } + + if (validFilenamesNr > 0) { + makeDir(outDirName, DIR_DEFAULT_MODE); + makeMirroredDestDirs(srcFileNames, validFilenamesNr, outDirName); + } + + for (i = 0; i < validFilenamesNr; i++) + free(srcFileNames[i]); + free(srcFileNames); +} FileNamesTable* UTIL_createExpandedFNT(const char** inputNames, size_t nbIfns, int followLinks) diff --git a/programs/util.h b/programs/util.h index 8e187e4f2999..25fa3f53aab4 100644 --- a/programs/util.h +++ b/programs/util.h @@ -93,6 +93,13 @@ extern "C" { ******************************************/ extern int g_utilDisplayLevel; +/** + * Displays a message prompt and returns success (0) if first character from stdin + * matches any from acceptableLetters. Otherwise, returns failure (1) and displays abortMsg. + * If any of the inputs are stdin itself, then automatically return failure (1). + */ +int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg, const char* acceptableLetters, int hasStdinInput); + /*-**************************************** * File functions @@ -100,12 +107,57 @@ extern int g_utilDisplayLevel; #if defined(_MSC_VER) typedef struct __stat64 stat_t; typedef int mode_t; +#elif defined(__MINGW32__) && defined (__MSVCRT__) + typedef struct _stati64 stat_t; #else typedef struct stat stat_t; #endif +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ +#define PATH_SEP '\\' +#define STRDUP(s) _strdup(s) +#else +#define PATH_SEP '/' +#include <libgen.h> +#define STRDUP(s) strdup(s) +#endif + +/** + * Calls platform's equivalent of stat() on filename and writes info to statbuf. + * Returns success (1) or failure (0). + */ +int UTIL_stat(const char* filename, stat_t* statbuf); + +/** + * Instead of getting a file's stats, this updates them with the info in the + * provided stat_t. Currently sets owner, group, atime, and mtime. Will only + * update this info for regular files. + */ +int UTIL_setFileStat(const char* filename, const stat_t* statbuf); + +/* + * These helpers operate on a pre-populated stat_t, i.e., the result of + * calling one of the above functions. + */ + +int UTIL_isRegularFileStat(const stat_t* statbuf); +int UTIL_isDirectoryStat(const stat_t* statbuf); +int UTIL_isFIFOStat(const stat_t* statbuf); +U64 UTIL_getFileSizeStat(const stat_t* statbuf); + +/** + * Like chmod(), but only modifies regular files. Provided statbuf may be NULL, + * in which case this function will stat() the file internally, in order to + * check whether it should be modified. + */ +int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions); + +/* + * In the absence of a pre-existing stat result on the file in question, these + * functions will do a stat() call internally and then use that result to + * compute the needed information. + */ -int UTIL_fileExist(const char* filename); int UTIL_isRegularFile(const char* infilename); int UTIL_isDirectory(const char* infilename); int UTIL_isSameFile(const char* file1, const char* file2); @@ -116,11 +168,12 @@ int UTIL_isFIFO(const char* infilename); #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) U64 UTIL_getFileSize(const char* infilename); U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles); -int UTIL_getFileStat(const char* infilename, stat_t* statbuf); -int UTIL_setFileStat(const char* filename, stat_t* statbuf); -int UTIL_chmod(char const* filename, mode_t permissions); /*< like chmod, but avoid changing permission of /dev/null */ + int UTIL_compareStr(const void *p1, const void *p2); const char* UTIL_getFileExtension(const char* infilename); +void UTIL_mirrorSourceFilesDirectories(const char** fileNamesTable, unsigned int nbFiles, const char *outDirName); +char* UTIL_createMirroredDestDirName(const char* srcFileName, const char* outDirRootName); + /*-**************************************** @@ -207,6 +260,7 @@ void UTIL_refFilename(FileNamesTable* fnt, const char* filename); # define UTIL_HAS_CREATEFILELIST #elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */ # define UTIL_HAS_CREATEFILELIST +# define UTIL_HAS_MIRRORFILELIST #else /* do not define UTIL_HAS_CREATEFILELIST */ #endif diff --git a/programs/zstd.1 b/programs/zstd.1 index 9ba0b4fa4162..a8fc277672b6 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -1,5 +1,5 @@ . -.TH "ZSTD" "1" "May 2020" "zstd 1.4.5" "User Commands" +.TH "ZSTD" "1" "December 2020" "zstd 1.4.8" "User Commands" . .SH "NAME" \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files @@ -99,10 +99,19 @@ Display information related to a zstd compressed file, such as size, ratio, and \fB\-#\fR: \fB#\fR compression level [1\-19] (default: 3) . .IP "\(bu" 4 +\fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\. +. +.IP "\(bu" 4 \fB\-\-fast[=#]\fR: switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\. . .IP "\(bu" 4 -\fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\. +\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBWORKERS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\. +. +.IP "\(bu" 4 +\fB\-\-single\-thread\fR: Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\. +. +.IP "\(bu" 4 +\fB\-\-adapt[=min=#,max=#]\fR : \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\. . .IP "\(bu" 4 \fB\-\-long[=#]\fR: enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\. @@ -111,43 +120,40 @@ Display information related to a zstd compressed file, such as size, ratio, and Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\. . .IP "\(bu" 4 -\fB\-\-patch\-from=FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that windowSize > srcSize\. -. -.IP -Note: cannot use both this and \-D together Note: \fB\-\-long\fR mode will be automatically activated if chainLog < fileLog (fileLog being the windowLog requried to cover the whole file)\. You can also manually force it\. Node: for all levels, you can use \-\-patch\-from in \-\-single\-thread mode to improve compression ratio at the cost of speed Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e 4096), and by setting a large \fB\-\-zstd=chainLog=\fR +\fB\-D DICT\fR: use \fBDICT\fR as Dictionary to compress or decompress FILE(s) . .IP "\(bu" 4 -\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, Zstandard uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (ie\. you can increase or decrease it)\. +\fB\-\-patch\-from FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that windowSize > srcSize\. . .IP -This is also used during compression when using with \-\-patch\-from=\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MB)\. +Note: cannot use both this and \-D together Note: \fB\-\-long\fR mode will be automatically activated if chainLog < fileLog (fileLog being the windowLog required to cover the whole file)\. You can also manually force it\. Node: for all levels, you can use \-\-patch\-from in \-\-single\-thread mode to improve compression ratio at the cost of speed Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e 4096), and by setting a large \fB\-\-zstd=chainLog=\fR . .IP "\(bu" 4 -\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\. +\fB\-\-rsyncable\fR : \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\. . .IP "\(bu" 4 -\fB\-\-single\-thread\fR: Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\. +\fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled) . .IP "\(bu" 4 -\fB\-\-adapt[=min=#,max=#]\fR : \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\. +\fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \-\-content\-size (meaning that the original size will be placed in the header)\. . .IP "\(bu" 4 -\fB\-\-stream\-size=#\fR : Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\. +\fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\. . .IP "\(bu" 4 -\fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\. +\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, Zstandard uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (ie\. you can increase or decrease it)\. . -.IP "\(bu" 4 -\fB\-\-rsyncable\fR : \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\. +.IP +This is also used during compression when using with \-\-patch\-from=\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MB)\. . .IP "\(bu" 4 -\fB\-D file\fR: use \fBfile\fR as Dictionary to compress or decompress FILE(s) +\fB\-\-stream\-size=#\fR : Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\. . .IP "\(bu" 4 -\fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\. +\fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\. . .IP "\(bu" 4 -\fB\-o file\fR: save result into \fBfile\fR (only possible with a single \fIINPUT\-FILE\fR) +\fB\-o FILE\fR: save result into \fBFILE\fR . .IP "\(bu" 4 \fB\-f\fR, \fB\-\-force\fR: overwrite output without prompting, and (de)compress symbolic links @@ -159,10 +165,7 @@ This is also used during compression when using with \-\-patch\-from=\. In this \fB\-\-[no\-]sparse\fR: enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\. . .IP "\(bu" 4 -\fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \-\-content\-size (meaning that the original size will be placed in the header)\. -. -.IP "\(bu" 4 -\fB\-\-rm\fR: remove source file(s) after successful compression or decompression +\fB\-\-rm\fR: remove source file(s) after successful compression or decompression\. If used in combination with \-o, will trigger a confirmation prompt (which can be silenced with \-f), as this is a destructive operation\. . .IP "\(bu" 4 \fB\-k\fR, \fB\-\-keep\fR: keep source file(s) after successful compression or decompression\. This is the default behavior\. @@ -171,25 +174,28 @@ This is also used during compression when using with \-\-patch\-from=\. In this \fB\-r\fR: operate recursively on directories . .IP "\(bu" 4 -\fB\-\-filelist=FILE\fR read a list of files to process as content from \fBFILE\fR\. Format is compatible with \fBls\fR output, with one file per line\. +\fB\-\-filelist FILE\fR read a list of files to process as content from \fBFILE\fR\. Format is compatible with \fBls\fR output, with one file per line\. . .IP "\(bu" 4 -\fB\-\-output\-dir\-flat[=dir]\fR: resulting files are stored into target \fBdir\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBdir\fR, while in combination with \fB\-f\fR, the last file will be present instead\. +\fB\-\-output\-dir\-flat DIR\fR: resulting files are stored into target \fBDIR\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBDIR\fR, while in combination with \fB\-f\fR, the last file will be present instead\. . .IP "\(bu" 4 -\fB\-\-format=FORMAT\fR: compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\. +\fB\-\-output\-dir\-mirror DIR\fR: similar to \fB\-\-output\-dir\-flat\fR, the output files are stored underneath target \fBDIR\fR directory, but this option will replicate input directory hierarchy into output \fBDIR\fR\. +. +.IP +If input directory contains "\.\.", the files in this directory will be ignored\. If input directory is an absolute directory (i\.e\. "/var/tmp/abc"), it will be stored into the "output\-dir/var/tmp/abc"\. If there are multiple input files or directories, name collision resolution will follow the same rules as \fB\-\-output\-dir\-flat\fR\. . .IP "\(bu" 4 -\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit +\fB\-\-format=FORMAT\fR: compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\. . .IP "\(bu" 4 -\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. +\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit . .IP "\(bu" 4 -\fB\-v\fR, \fB\-\-verbose\fR: verbose mode +\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-q\fR will only display the version number, suitable for machine reading\. . .IP "\(bu" 4 -\fB\-\-show\-default\-cparams\fR: Shows the default compresssion parameters that will be used for a particular src file\. If the provided src file is not a regular file (eg\. named pipe), the cli will just output the default paramters\. That is, the parameters that are used when the src size is unknown\. +\fB\-v\fR, \fB\-\-verbose\fR: verbose mode, display more information . .IP "\(bu" 4 \fB\-q\fR, \fB\-\-quiet\fR: suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\. @@ -198,7 +204,7 @@ This is also used during compression when using with \-\-patch\-from=\. In this \fB\-\-no\-progress\fR: do not display the progress bar, but keep all other messages\. . .IP "\(bu" 4 -\fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled) +\fB\-\-show\-default\-cparams\fR: Shows the default compression parameters that will be used for a particular src file\. If the provided src file is not a regular file (eg\. named pipe), the cli will just output the default parameters\. That is, the parameters that are used when the src size is unknown\. . .IP "\(bu" 4 \fB\-\-\fR: All arguments after \fB\-\-\fR are treated as files @@ -206,7 +212,16 @@ This is also used during compression when using with \-\-patch\-from=\. In this .IP "" 0 . .SS "Restricted usage of Environment Variables" -Using environment variables to set parameters has security implications\. Therefore, this avenue is intentionally restricted\. Only \fBZSTD_CLEVEL\fR is supported currently, for setting compression level\. \fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\. It can be overridden by corresponding command line arguments\. +Using environment variables to set parameters has security implications\. Therefore, this avenue is intentionally restricted\. Only \fBZSTD_CLEVEL\fR and \fBZSTD_NBTHREADS\fR are currently supported\. They set the compression level and number of threads to use during compression, respectively\. +. +.P +\fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\. +. +.P +\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \'ZSTD_NBTHREADS\fBhas a default value of (\fR1\fB), and is capped at ZSTDMT_NBWORKERS_MAX==200\.\fRzstd` must be compiled with multithread support for this to have any effect\. +. +.P +They can both be overridden by corresponding command line arguments: \fB\-#\fR for compression level and \fB\-T#\fR for number of compression threads\. . .SH "DICTIONARY BUILDER" \fBzstd\fR offers \fIdictionary\fR compression, which greatly improves efficiency on small files and messages\. It\'s possible to train \fBzstd\fR with a set of samples, the result of which is saved into a file called a \fBdictionary\fR\. Then during compression and decompression, reference the same dictionary, using command \fB\-D dictionaryFileName\fR\. Compression of small files similar to the sample set will be greatly improved\. diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 550e2e53fe5f..73670daf6dc2 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -102,6 +102,9 @@ the last one takes effect. * `-#`: `#` compression level \[1-19] (default: 3) +* `--ultra`: + unlocks high compression levels 20+ (maximum 22), using a lot more memory. + Note that decompression will also require more memory when using these levels. * `--fast[=#]`: switch to ultra-fast compression levels. If `=#` is not present, it defaults to `1`. @@ -109,9 +112,28 @@ the last one takes effect. at the cost of some compression ratio. This setting overwrites compression level if one was set previously. Similarly, if a compression level is set after `--fast`, it overrides it. -* `--ultra`: - unlocks high compression levels 20+ (maximum 22), using a lot more memory. - Note that decompression will also require more memory when using these levels. +* `-T#`, `--threads=#`: + Compress using `#` working threads (default: 1). + If `#` is 0, attempt to detect and use the number of physical CPU cores. + In all cases, the nb of threads is capped to ZSTDMT_NBWORKERS_MAX==200. + This modifier does nothing if `zstd` is compiled without multithread support. +* `--single-thread`: + Does not spawn a thread for compression, use a single thread for both I/O and compression. + In this mode, compression is serialized with I/O, which is slightly slower. + (This is different from `-T1`, which spawns 1 compression thread in parallel of I/O). + This mode is the only one available when multithread support is disabled. + Single-thread mode features lower memory usage. + Final compressed result is slightly different from `-T1`. +* `--adapt[=min=#,max=#]` : + `zstd` will dynamically adapt compression level to perceived I/O conditions. + Compression level adaptation can be observed live by using command `-v`. + Adaptation can be constrained between supplied `min` and `max` levels. + The feature works when combined with multi-threading and `--long` mode. + It does not work with `--single-thread`. + It sets window size to 8 MB by default (can be changed manually, see `wlog`). + Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible. + _note_ : at the time of this writing, `--adapt` can remain stuck at low speed + when combined with multiple worker threads (>=2). * `--long[=#]`: enables long distance matching with `#` `windowLog`, if not `#` is not present it defaults to `27`. @@ -122,20 +144,40 @@ the last one takes effect. Note: If `windowLog` is set to larger than 27, `--long=windowLog` or `--memory=windowSize` needs to be passed to the decompressor. -* `--patch-from=FILE`: +* `-D DICT`: + use `DICT` as Dictionary to compress or decompress FILE(s) +* `--patch-from FILE`: Specify the file to be used as a reference point for zstd's diff engine. This is effectively dictionary compression with some convenient parameter selection, namely that windowSize > srcSize. Note: cannot use both this and -D together Note: `--long` mode will be automatically activated if chainLog < fileLog - (fileLog being the windowLog requried to cover the whole file). You + (fileLog being the windowLog required to cover the whole file). You can also manually force it. Node: for all levels, you can use --patch-from in --single-thread mode to improve compression ratio at the cost of speed - Note: for level 19, you can get increased compression ratio at the cost - of speed by specifying `--zstd=targetLength=` to be something large + Note: for level 19, you can get increased compression ratio at the cost + of speed by specifying `--zstd=targetLength=` to be something large (i.e 4096), and by setting a large `--zstd=chainLog=` +* `--rsyncable` : + `zstd` will periodically synchronize the compression state to make the + compressed file more rsync-friendly. There is a negligible impact to + compression ratio, and the faster compression levels will see a small + compression speed hit. + This feature does not work with `--single-thread`. You probably don't want + to use it with long range mode, since it will decrease the effectiveness of + the synchronization points, but your milage may vary. +* `-C`, `--[no-]check`: + add integrity check computed from uncompressed data (default: enabled) +* `--[no-]content-size`: + enable / disable whether or not the original size of the file is placed in + the header of the compressed file. The default option is + --content-size (meaning that the original size will be placed in the header). +* `--no-dictID`: + do not store dictionary ID within frame header (dictionary compression). + The decoder will have to rely on implicit knowledge about which dictionary to use, + it won't be able to check if it's correct. * `-M#`, `--memory=#`: Set a memory usage limit. By default, Zstandard uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can @@ -144,28 +186,6 @@ the last one takes effect. This is also used during compression when using with --patch-from=. In this case, this parameter overrides that maximum size allowed for a dictionary. (128 MB). -* `-T#`, `--threads=#`: - Compress using `#` working threads (default: 1). - If `#` is 0, attempt to detect and use the number of physical CPU cores. - In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==200. - This modifier does nothing if `zstd` is compiled without multithread support. -* `--single-thread`: - Does not spawn a thread for compression, use a single thread for both I/O and compression. - In this mode, compression is serialized with I/O, which is slightly slower. - (This is different from `-T1`, which spawns 1 compression thread in parallel of I/O). - This mode is the only one available when multithread support is disabled. - Single-thread mode features lower memory usage. - Final compressed result is slightly different from `-T1`. -* `--adapt[=min=#,max=#]` : - `zstd` will dynamically adapt compression level to perceived I/O conditions. - Compression level adaptation can be observed live by using command `-v`. - Adaptation can be constrained between supplied `min` and `max` levels. - The feature works when combined with multi-threading and `--long` mode. - It does not work with `--single-thread`. - It sets window size to 8 MB by default (can be changed manually, see `wlog`). - Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible. - _note_ : at the time of this writing, `--adapt` can remain stuck at low speed - when combined with multiple worker threads (>=2). * `--stream-size=#` : Sets the pledged source size of input coming from a stream. This value must be exact, as it will be included in the produced frame header. Incorrect stream sizes will cause an error. @@ -178,22 +198,8 @@ the last one takes effect. expected. This feature allows for controlling the guess when needed. Exact guesses result in better compression ratios. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation. -* `--rsyncable` : - `zstd` will periodically synchronize the compression state to make the - compressed file more rsync-friendly. There is a negligible impact to - compression ratio, and the faster compression levels will see a small - compression speed hit. - This feature does not work with `--single-thread`. You probably don't want - to use it with long range mode, since it will decrease the effectiveness of - the synchronization points, but your milage may vary. -* `-D file`: - use `file` as Dictionary to compress or decompress FILE(s) -* `--no-dictID`: - do not store dictionary ID within frame header (dictionary compression). - The decoder will have to rely on implicit knowledge about which dictionary to use, - it won't be able to check if it's correct. -* `-o file`: - save result into `file` (only possible with a single _INPUT-FILE_) +* `-o FILE`: + save result into `FILE` * `-f`, `--force`: overwrite output without prompting, and (de)compress symbolic links * `-c`, `--stdout`: @@ -206,27 +212,34 @@ the last one takes effect. default: enabled when output is into a file, and disabled when output is stdout. This setting overrides default and can force sparse mode over stdout. -* `--[no-]content-size`: - enable / disable whether or not the original size of the file is placed in - the header of the compressed file. The default option is - --content-size (meaning that the original size will be placed in the header). * `--rm`: - remove source file(s) after successful compression or decompression + remove source file(s) after successful compression or decompression. If used in combination with + -o, will trigger a confirmation prompt (which can be silenced with -f), as this is a destructive operation. * `-k`, `--keep`: keep source file(s) after successful compression or decompression. This is the default behavior. * `-r`: operate recursively on directories -* `--filelist=FILE` +* `--filelist FILE` read a list of files to process as content from `FILE`. Format is compatible with `ls` output, with one file per line. -* `--output-dir-flat[=dir]`: - resulting files are stored into target `dir` directory, +* `--output-dir-flat DIR`: + resulting files are stored into target `DIR` directory, instead of same directory as origin file. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name. - Collision resolution ensures first file with a given name will be present in `dir`, + Collision resolution ensures first file with a given name will be present in `DIR`, while in combination with `-f`, the last file will be present instead. +* `--output-dir-mirror DIR`: + similar to `--output-dir-flat`, + the output files are stored underneath target `DIR` directory, + but this option will replicate input directory hierarchy into output `DIR`. + + If input directory contains "..", the files in this directory will be ignored. + If input directory is an absolute directory (i.e. "/var/tmp/abc"), + it will be stored into the "output-dir/var/tmp/abc". + If there are multiple input files or directories, + name collision resolution will follow the same rules as `--output-dir-flat`. * `--format=FORMAT`: compress and decompress in other formats. If compiled with support, zstd can compress to or decompress from other compression algorithm @@ -238,21 +251,19 @@ the last one takes effect. display version number and exit. Advanced : `-vV` also displays supported formats. `-vvV` also displays POSIX support. + `-q` will only display the version number, suitable for machine reading. * `-v`, `--verbose`: - verbose mode -* `--show-default-cparams`: - Shows the default compresssion parameters that will be used for a - particular src file. If the provided src file is not a regular file - (eg. named pipe), the cli will just output the default paramters. - That is, the parameters that are used when the src size is - unknown. + verbose mode, display more information * `-q`, `--quiet`: suppress warnings, interactivity, and notifications. specify twice to suppress errors too. * `--no-progress`: do not display the progress bar, but keep all other messages. -* `-C`, `--[no-]check`: - add integrity check computed from uncompressed data (default: enabled) +* `--show-default-cparams`: + Shows the default compression parameters that will be used for a + particular src file. If the provided src file is not a regular file + (eg. named pipe), the cli will just output the default parameters. + That is, the parameters that are used when the src size is unknown. * `--`: All arguments after `--` are treated as files @@ -260,11 +271,20 @@ the last one takes effect. Using environment variables to set parameters has security implications. Therefore, this avenue is intentionally restricted. -Only `ZSTD_CLEVEL` is supported currently, for setting compression level. +Only `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` are currently supported. +They set the compression level and number of threads to use during compression, respectively. + `ZSTD_CLEVEL` can be used to set the level between 1 and 19 (the "normal" range). If the value of `ZSTD_CLEVEL` is not a valid integer, it will be ignored with a warning message. `ZSTD_CLEVEL` just replaces the default compression level (`3`). -It can be overridden by corresponding command line arguments. + +`ZSTD_NBTHREADS` can be used to set the number of threads `zstd` will attempt to use during compression. +If the value of `ZSTD_NBTHREADS` is not a valid unsigned integer, it will be ignored with a warning message. +'ZSTD_NBTHREADS` has a default value of (`1`), and is capped at ZSTDMT_NBWORKERS_MAX==200. `zstd` must be +compiled with multithread support for this to have any effect. + +They can both be overridden by corresponding command line arguments: +`-#` for compression level and `-T#` for number of compression threads. DICTIONARY BUILDER diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 70e2b70666bf..9b6f91533462 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -20,7 +20,9 @@ # define ZSTDCLI_CLEVEL_MAX 19 /* without using --ultra */ #endif - +#ifndef ZSTDCLI_NBTHREADS_DEFAULT +# define ZSTDCLI_NBTHREADS_DEFAULT 1 +#endif /*-************************************ * Dependencies @@ -146,9 +148,26 @@ static void usage_advanced(const char* programName) #ifdef UTIL_HAS_CREATEFILELIST DISPLAYOUT( " -r : operate recursively on directories \n"); - DISPLAYOUT( "--filelist=FILE : read list of files to operate upon from FILE \n"); - DISPLAYOUT( "--output-dir-flat=DIR : all resulting files are stored into DIR \n"); + DISPLAYOUT( "--filelist FILE : read list of files to operate upon from FILE \n"); + DISPLAYOUT( "--output-dir-flat DIR : processed files are stored into DIR \n"); +#endif + +#ifdef UTIL_HAS_MIRRORFILELIST + DISPLAYOUT( "--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure \n"); +#endif + + +#ifndef ZSTD_NOCOMPRESS + DISPLAYOUT( "--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled)"); +#ifndef ZSTD_NODECOMPRESS + DISPLAYOUT( ". If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate)."); #endif +#else +#ifdef ZSTD_NOCOMPRESS + DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate)."); +#endif +#endif /* ZSTD_NOCOMPRESS */ + DISPLAYOUT( "\n"); DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n"); @@ -170,7 +189,6 @@ static void usage_advanced(const char* programName) DISPLAYOUT( "--size-hint=# optimize compression parameters for streaming input of approximately this size \n"); DISPLAYOUT( "--target-compressed-block-size=# : generate compressed block of approximately targeted size \n"); DISPLAYOUT( "--no-dictID : don't write dictID into header (dictionary compression only) \n"); - DISPLAYOUT( "--[no-]check : add XXH64 integrity checksum to frame (default: enabled) \n"); DISPLAYOUT( "--[no-]compress-literals : force (un)compressed literals \n"); DISPLAYOUT( "--format=zstd : compress files to the .zst format (default) \n"); @@ -544,6 +562,11 @@ static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressi static void printVersion(void) { + if (g_displayLevel < DISPLAY_LEVEL_DEFAULT) { + DISPLAYOUT("%s\n", ZSTD_VERSION_STRING); + return; + } + DISPLAYOUT(WELCOME_MESSAGE); if (g_displayLevel >= 3) { /* format support */ @@ -577,6 +600,7 @@ static void printVersion(void) /* Environment variables for parameter setting */ #define ENV_CLEVEL "ZSTD_CLEVEL" +#define ENV_NBTHREADS "ZSTD_NBTHREADS" /* takes lower precedence than directly specifying -T# in the CLI */ /* pick up environment variable */ static int init_cLevel(void) { @@ -606,8 +630,51 @@ static int init_cLevel(void) { return ZSTDCLI_CLEVEL_DEFAULT; } -#define ZSTD_NB_STRATEGIES 9 +#ifdef ZSTD_MULTITHREAD +static unsigned init_nbThreads(void) { + const char* const env = getenv(ENV_NBTHREADS); + if (env != NULL) { + const char* ptr = env; + if ((*ptr>='0') && (*ptr<='9')) { + unsigned nbThreads; + if (readU32FromCharChecked(&ptr, &nbThreads)) { + DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env); + return ZSTDCLI_NBTHREADS_DEFAULT; + } else if (*ptr == 0) { + return nbThreads; + } + } + DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env); + } + + return ZSTDCLI_NBTHREADS_DEFAULT; +} +#endif + +#define NEXT_FIELD(ptr) { \ + if (*argument == '=') { \ + ptr = ++argument; \ + argument += strlen(ptr); \ + } else { \ + argNb++; \ + if (argNb >= argCount) { \ + DISPLAY("error: missing command argument \n"); \ + CLEAN_RETURN(1); \ + } \ + ptr = argv[argNb]; \ + assert(ptr != NULL); \ + if (ptr[0]=='-') { \ + DISPLAY("error: command cannot be separated from its argument by another command \n"); \ + CLEAN_RETURN(1); \ +} } } +#define NEXT_UINT32(val32) { \ + const char* __nb; \ + NEXT_FIELD(__nb); \ + val32 = readU32FromChar(&__nb); \ +} + +#define ZSTD_NB_STRATEGIES 9 static const char* ZSTD_strategyMap[ZSTD_NB_STRATEGIES + 1] = { "", "ZSTD_fast", "ZSTD_dfast", "ZSTD_greedy", "ZSTD_lazy", "ZSTD_lazy2", "ZSTD_btlazy2", "ZSTD_btopt", "ZSTD_btultra", "ZSTD_btultra2"}; @@ -630,7 +697,7 @@ int main(int const argCount, const char* argv[]) int argNb, followLinks = 0, forceStdout = 0, - lastCommand = 0, + hasStdout = 0, ldmFlag = 0, main_pause = 0, nbWorkers = 0, @@ -638,12 +705,7 @@ int main(int const argCount, const char* argv[]) adaptMin = MINCLEVEL, adaptMax = MAXCLEVEL, rsyncable = 0, - nextArgumentIsOutFileName = 0, - nextArgumentIsOutDirName = 0, - nextArgumentIsMaxDict = 0, - nextArgumentIsDictID = 0, nextArgumentsAreFiles = 0, - nextEntryIsDictionary = 0, operationResult = 0, separateFiles = 0, setRealTimePrio = 0, @@ -656,6 +718,7 @@ int main(int const argCount, const char* argv[]) size_t blockSize = 0; FIO_prefs_t* const prefs = FIO_createPreferences(); + FIO_ctx_t* const fCtx = FIO_createContext(); zstd_operation_mode operation = zom_compress; ZSTD_compressionParameters compressionParams; int cLevel = init_cLevel(); @@ -667,6 +730,7 @@ int main(int const argCount, const char* argv[]) const char* programName = argv[0]; const char* outFileName = NULL; const char* outDirName = NULL; + const char* outMirroredDirName = NULL; const char* dictFileName = NULL; const char* patchFromDictFileName = NULL; const char* suffix = ZSTD_EXTENSION; @@ -695,7 +759,7 @@ int main(int const argCount, const char* argv[]) if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); } programName = lastNameFromPath(programName); #ifdef ZSTD_MULTITHREAD - nbWorkers = 1; + nbWorkers = init_nbThreads(); #endif /* preset behaviors */ @@ -756,13 +820,10 @@ int main(int const argCount, const char* argv[]) if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; } if (!strcmp(argument, "--test")) { operation=zom_test; continue; } if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; } - if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */ - if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */ if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; } if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; } if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; } if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; } - if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; } if (!strcmp(argument, "--show-default-cparams")) { showDefaultCParams = 1; continue; } if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; } if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; } @@ -785,6 +846,7 @@ int main(int const argCount, const char* argv[]) if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; } if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; } if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; } + /* long commands with arguments */ #ifndef ZSTD_NODICT if (longCommandWArg(&argument, "--train-cover")) { @@ -821,19 +883,22 @@ int main(int const argCount, const char* argv[]) continue; } #endif - if (longCommandWArg(&argument, "--threads=")) { nbWorkers = (int)readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; } + if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; } + if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; } + if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; } if (longCommandWArg(&argument, "--block-size=")) { blockSize = readSizeTFromChar(&argument); continue; } - if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; } + if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badusage(programName); CLEAN_RETURN(1); } continue; } if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readSizeTFromChar(&argument); continue; } if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; } if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; } - if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; } - if (longCommandWArg(&argument, "--patch-from=")) { patchFromDictFileName = argument; continue; } + if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; } +#ifdef UTIL_HAS_MIRRORFILELIST + if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; } +#endif + if (longCommandWArg(&argument, "--patch-from")) { NEXT_FIELD(patchFromDictFileName); continue; } if (longCommandWArg(&argument, "--long")) { unsigned ldmWindowLog = 0; ldmFlag = 1; @@ -877,8 +942,10 @@ int main(int const argCount, const char* argv[]) } #endif - if (longCommandWArg(&argument, "--filelist=")) { - UTIL_refFilename(file_of_names, argument); + if (longCommandWArg(&argument, "--filelist")) { + const char* listName; + NEXT_FIELD(listName); + UTIL_refFilename(file_of_names, listName); continue; } @@ -887,10 +954,7 @@ int main(int const argCount, const char* argv[]) argument++; while (argument[0]!=0) { - if (lastCommand) { - DISPLAY("error : command must be followed by argument \n"); - CLEAN_RETURN(1); - } + #ifndef ZSTD_NOCOMPRESS /* compression Level */ if ((*argument>='0') && (*argument<='9')) { @@ -921,7 +985,7 @@ int main(int const argCount, const char* argv[]) case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break; /* Use file content as dictionary */ - case 'D': nextEntryIsDictionary = 1; lastCommand = 1; argument++; break; + case 'D': argument++; NEXT_FIELD(dictFileName); break; /* Overwrite */ case 'f': FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; argument++; break; @@ -942,7 +1006,7 @@ int main(int const argCount, const char* argv[]) case 't': operation=zom_test; argument++; break; /* destination file name */ - case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break; + case 'o': argument++; NEXT_FIELD(outFileName); break; /* limit memory */ case 'M': @@ -1013,10 +1077,9 @@ int main(int const argCount, const char* argv[]) /* Select compressibility of synthetic sample */ case 'P': - { argument++; + argument++; compressibility = (double)readU32FromChar(&argument) / 100; - } - break; + break; /* unknown command */ default : badusage(programName); CLEAN_RETURN(1); @@ -1025,51 +1088,10 @@ int main(int const argCount, const char* argv[]) continue; } /* if (argument[0]=='-') */ - if (nextArgumentIsMaxDict) { /* kept available for compatibility with old syntax ; will be removed one day */ - nextArgumentIsMaxDict = 0; - lastCommand = 0; - maxDictSize = readU32FromChar(&argument); - continue; - } - - if (nextArgumentIsDictID) { /* kept available for compatibility with old syntax ; will be removed one day */ - nextArgumentIsDictID = 0; - lastCommand = 0; - dictID = readU32FromChar(&argument); - continue; - } - - if (nextEntryIsDictionary) { - nextEntryIsDictionary = 0; - lastCommand = 0; - dictFileName = argument; - continue; - } - - if (nextArgumentIsOutFileName) { - nextArgumentIsOutFileName = 0; - lastCommand = 0; - outFileName = argument; - if (!strcmp(outFileName, "-")) outFileName = stdoutmark; - continue; - } - - if (nextArgumentIsOutDirName) { - nextArgumentIsOutDirName = 0; - lastCommand = 0; - outDirName = argument; - continue; - } - /* none of the above : add filename to list */ UTIL_refFilename(filenames, argument); } - if (lastCommand) { /* forgotten argument */ - DISPLAY("error : command must be followed by argument \n"); - CLEAN_RETURN(1); - } - /* Welcome message (if verbose) */ DISPLAYLEVEL(3, WELCOME_MESSAGE); @@ -1222,7 +1244,7 @@ int main(int const argCount, const char* argv[]) /* Check if input/output defined as console; trigger an error in this case */ if (!strcmp(filenames->fileNames[0], stdinmark) && IS_CONSOLE(stdin) ) { - badusage(programName); + DISPLAYLEVEL(1, "stdin is a console, aborting\n"); CLEAN_RETURN(1); } if ( outFileName && !strcmp(outFileName, stdoutmark) @@ -1230,7 +1252,7 @@ int main(int const argCount, const char* argv[]) && !strcmp(filenames->fileNames[0], stdinmark) && !forceStdout && operation!=zom_decompress ) { - badusage(programName); + DISPLAYLEVEL(1, "stdout is a console, aborting\n"); CLEAN_RETURN(1); } @@ -1259,12 +1281,16 @@ int main(int const argCount, const char* argv[]) DISPLAY("error : can't use --patch-from=# on multiple files \n"); CLEAN_RETURN(1); } + + /* No status message in pipe mode (stdin - stdout) */ + hasStdout = outFileName && !strcmp(outFileName,stdoutmark); - /* No status message in pipe mode (stdin - stdout) or multi-files mode */ - if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1; - if ((filenames->tableSize > 1) & (g_displayLevel==2)) g_displayLevel=1; + if (hasStdout && (g_displayLevel==2)) g_displayLevel=1; /* IO Stream/File */ + FIO_setHasStdoutOutput(fCtx, hasStdout); + FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize); + FIO_determineHasStdinInput(fCtx, filenames); FIO_setNotificationLevel(g_displayLevel); FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL); if (memLimit == 0) { @@ -1323,9 +1349,9 @@ int main(int const argCount, const char* argv[]) } if ((filenames->tableSize==1) && outFileName) - operationResult = FIO_compressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams); + operationResult = FIO_compressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams); else - operationResult = FIO_compressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); + operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); #else (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); @@ -1333,9 +1359,9 @@ int main(int const argCount, const char* argv[]) } else { /* decompression or test */ #ifndef ZSTD_NODECOMPRESS if (filenames->tableSize == 1 && outFileName) { - operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName); + operationResult = FIO_decompressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName); } else { - operationResult = FIO_decompressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outDirName, outFileName, dictFileName); + operationResult = FIO_decompressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, dictFileName); } #else DISPLAY("Decompression not supported \n"); @@ -1344,6 +1370,7 @@ int main(int const argCount, const char* argv[]) _end: FIO_freePreferences(prefs); + FIO_freeContext(fCtx); if (main_pause) waitEnter(); UTIL_freeFileNamesTable(filenames); UTIL_freeFileNamesTable(file_of_names); diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1 index fe1d29bbb062..4d143b598735 100644 --- a/programs/zstdgrep.1 +++ b/programs/zstdgrep.1 @@ -1,5 +1,5 @@ . -.TH "ZSTDGREP" "1" "May 2020" "zstd 1.4.5" "User Commands" +.TH "ZSTDGREP" "1" "December 2020" "zstd 1.4.8" "User Commands" . .SH "NAME" \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files diff --git a/programs/zstdless.1 b/programs/zstdless.1 index d54c6400cee2..43f235453ee6 100644 --- a/programs/zstdless.1 +++ b/programs/zstdless.1 @@ -1,5 +1,5 @@ . -.TH "ZSTDLESS" "1" "May 2020" "zstd 1.4.5" "User Commands" +.TH "ZSTDLESS" "1" "December 2020" "zstd 1.4.8" "User Commands" . .SH "NAME" \fBzstdless\fR \- view zstandard\-compressed files |
