summaryrefslogtreecommitdiff
path: root/usr.bin/mkuzip
diff options
context:
space:
mode:
authorConrad Meyer <cem@FreeBSD.org>2019-08-13 23:32:56 +0000
committerConrad Meyer <cem@FreeBSD.org>2019-08-13 23:32:56 +0000
commiteefd8f96fb3b3e378cbf948a14dcfde112f12dfe (patch)
tree43be3a9c423158f83bcb63f72fc0e628f2bb25cf /usr.bin/mkuzip
parent443b0ad7864d6c6b16eada04255311758c29f108 (diff)
downloadsrc-test-eefd8f96fb3b3e378cbf948a14dcfde112f12dfe.tar.gz
src-test-eefd8f96fb3b3e378cbf948a14dcfde112f12dfe.zip
geom_uzip(4), mkuzip(8): Add Zstd image mode
The Zstd format bumps the CLOOP major number to 4 to avoid incompatibility with older systems. Support in geom_uzip(4) is conditional on the ZSTDIO kernel option, which is enabled in amd64 GENERIC, but not all in-tree configurations. mkuzip(8) was modified slightly to always initialize the nblocks + 1'th offset in the CLOOP file format. Previously, it was only initialized in the case where the final compressed block happened to be unaligned w.r.t. DEV_BSIZE. The "Fake" last+1 block change in r298619 means that the final compressed block's 'blen' was never correct unless the compressed uzip image happened to be BSIZE-aligned. This happened in about 1 out of every 512 cases. The zlib and lzma decompressors are probably tolerant of extra trash following the frame they were told to decode, but Zstd complains that the input size is incorrect. Correspondingly, geom_uzip(4) was modified slightly to avoid trashing the nblocks + 1'th offset when it is known to be initialized to a good value. This corrects the calculated final real cluster compressed length to match that printed by mkuzip(8). mkuzip(8) was refactored somewhat to reduce code duplication and increase ease of adding other compression formats. * Input block size validation was pulled out of individual compression init routines into main(). * Init routines now validate a user-provided compression level or select an algorithm-specific default, if none was provided. * A new interface for calculating the maximal compressed size of an incompressible input block was added for each driver. The generic code uses it to validate against MAXPHYS as well as to allocate compression result buffers in the generic code. * Algorithm selection is now driven by a table lookup, to increase ease of adding other formats in the future. mkuzip(8) gained the ability to explicitly specify a compression level with '-C'. The prior defaults -- 9 for zlib and 6 for lzma -- are maintained. The new zstd default is 9, to match zlib. Rather than select lzma or zlib with '-L' or its absense, respectively, a new argument '-A <algorithm>' is provided to select 'zlib', 'lzma', or 'zstd'. '-L' is considered deprecated, but will probably never be removed. All of the new features were documented in mkuzip.8; the page was also cleaned up slightly. Relnotes: yes
Notes
Notes: svn path=/head/; revision=351005
Diffstat (limited to 'usr.bin/mkuzip')
-rw-r--r--usr.bin/mkuzip/Makefile6
-rw-r--r--usr.bin/mkuzip/mkuz_cfg.h2
-rw-r--r--usr.bin/mkuzip/mkuz_cloop.h2
-rw-r--r--usr.bin/mkuzip/mkuz_conveyor.c7
-rw-r--r--usr.bin/mkuzip/mkuz_format.h7
-rw-r--r--usr.bin/mkuzip/mkuz_lzma.c57
-rw-r--r--usr.bin/mkuzip/mkuz_lzma.h5
-rw-r--r--usr.bin/mkuzip/mkuz_zlib.c51
-rw-r--r--usr.bin/mkuzip/mkuz_zlib.h5
-rw-r--r--usr.bin/mkuzip/mkuz_zstd.c95
-rw-r--r--usr.bin/mkuzip/mkuz_zstd.h38
-rw-r--r--usr.bin/mkuzip/mkuzip.8166
-rw-r--r--usr.bin/mkuzip/mkuzip.c94
-rw-r--r--usr.bin/mkuzip/mkuzip.h3
14 files changed, 407 insertions, 131 deletions
diff --git a/usr.bin/mkuzip/Makefile b/usr.bin/mkuzip/Makefile
index ed31b78f2d31e..c60fd7f5ca546 100644
--- a/usr.bin/mkuzip/Makefile
+++ b/usr.bin/mkuzip/Makefile
@@ -3,10 +3,12 @@
PROG= mkuzip
MAN= mkuzip.8
SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c mkuz_conveyor.c \
- mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c
+ mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c mkuz_zstd.c
+
+CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib
#CFLAGS+= -DMKUZ_DEBUG
-LIBADD= z md lzma pthread
+LIBADD= lzma md pthread z zstd
.include <bsd.prog.mk>
diff --git a/usr.bin/mkuzip/mkuz_cfg.h b/usr.bin/mkuzip/mkuz_cfg.h
index fc88ef29198fc..a27d98a6ff60a 100644
--- a/usr.bin/mkuzip/mkuz_cfg.h
+++ b/usr.bin/mkuzip/mkuz_cfg.h
@@ -39,4 +39,6 @@ struct mkuz_cfg {
const char *iname;
off_t isize;
const struct mkuz_format *handler;
+ size_t cbound_blksz;
+ int comp_level;
};
diff --git a/usr.bin/mkuzip/mkuz_cloop.h b/usr.bin/mkuzip/mkuz_cloop.h
index 4ed7c5026391f..fabf80a53b122 100644
--- a/usr.bin/mkuzip/mkuz_cloop.h
+++ b/usr.bin/mkuzip/mkuz_cloop.h
@@ -39,9 +39,11 @@
#define CLOOP_MAJVER_2 '2'
#define CLOOP_MAJVER_3 '3'
+#define CLOOP_MAJVER_4 '4'
#define CLOOP_COMP_LIBZ 'V'
#define CLOOP_COMP_LZMA 'L'
+#define CLOOP_COMP_ZSTD 'Z'
struct cloop_header {
char magic[CLOOP_MAGIC_LEN]; /* cloop magic */
diff --git a/usr.bin/mkuzip/mkuz_conveyor.c b/usr.bin/mkuzip/mkuz_conveyor.c
index 856d445cce50a..2cfae1c0775c3 100644
--- a/usr.bin/mkuzip/mkuz_conveyor.c
+++ b/usr.bin/mkuzip/mkuz_conveyor.c
@@ -42,8 +42,8 @@ __FBSDID("$FreeBSD$");
#include "mkuz_conveyor.h"
#include "mkuz_cfg.h"
#include "mkuzip.h"
-#include "mkuz_format.h"
#include "mkuz_blk.h"
+#include "mkuz_format.h"
#include "mkuz_fqueue.h"
#include "mkuz_blk_chain.h"
@@ -67,7 +67,7 @@ cworker(void *p)
cfp = cwp->cfp;
cvp = cwp->cvp;
free(cwp);
- c_ctx = cfp->handler->f_init(cfp->blksz);
+ c_ctx = cfp->handler->f_init(&cfp->comp_level);
for (;;) {
iblk = mkuz_fqueue_deq(cvp->wrk_queue);
if (iblk == MKUZ_BLK_EOF) {
@@ -80,7 +80,8 @@ cworker(void *p)
/* All zeroes block */
oblk = mkuz_blk_ctor(0);
} else {
- oblk = cfp->handler->f_compress(c_ctx, iblk);
+ oblk = mkuz_blk_ctor(cfp->cbound_blksz);
+ cfp->handler->f_compress(c_ctx, iblk, oblk);
if (cfp->en_dedup != 0) {
compute_digest(oblk);
}
diff --git a/usr.bin/mkuzip/mkuz_format.h b/usr.bin/mkuzip/mkuz_format.h
index 817c0121ed855..ddee771eab87e 100644
--- a/usr.bin/mkuzip/mkuz_format.h
+++ b/usr.bin/mkuzip/mkuz_format.h
@@ -26,12 +26,15 @@
* $FreeBSD$
*/
-DEFINE_RAW_METHOD(f_init, void *, uint32_t);
-DEFINE_RAW_METHOD(f_compress, struct mkuz_blk *, void *, const struct mkuz_blk *);
+DEFINE_RAW_METHOD(f_compress_bound, size_t, size_t);
+DEFINE_RAW_METHOD(f_init, void *, int *);
+DEFINE_RAW_METHOD(f_compress, void, void *, const struct mkuz_blk *, struct mkuz_blk *);
struct mkuz_format {
+ const char *option;
const char *magic;
const char *default_sufx;
+ f_compress_bound_t f_compress_bound;
f_init_t f_init;
f_compress_t f_compress;
};
diff --git a/usr.bin/mkuzip/mkuz_lzma.c b/usr.bin/mkuzip/mkuz_lzma.c
index 8810d2ef0c56a..bab2820f7c38a 100644
--- a/usr.bin/mkuzip/mkuz_lzma.c
+++ b/usr.bin/mkuzip/mkuz_lzma.c
@@ -35,61 +35,55 @@ __FBSDID("$FreeBSD$");
#include <lzma.h>
#include "mkuzip.h"
-#include "mkuz_lzma.h"
#include "mkuz_blk.h"
-
-#define USED_BLOCKSIZE DEV_BSIZE
+#include "mkuz_lzma.h"
struct mkuz_lzma {
lzma_filter filters[2];
lzma_options_lzma opt_lzma;
lzma_stream strm;
- uint32_t blksz;
};
-static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT;
+size_t
+mkuz_lzma_cbound(size_t blksz)
+{
+ return (lzma_stream_buffer_bound(blksz));
+}
void *
-mkuz_lzma_init(uint32_t blksz)
+mkuz_lzma_init(int *comp_level)
{
struct mkuz_lzma *ulp;
- if (blksz % USED_BLOCKSIZE != 0) {
- errx(1, "cluster size should be multiple of %d",
- USED_BLOCKSIZE);
- /* Not reached */
- }
- if (blksz > MAXPHYS) {
- errx(1, "cluster size is too large");
+ if (*comp_level == USE_DEFAULT_LEVEL)
+ *comp_level = LZMA_PRESET_DEFAULT;
+ if (*comp_level < 0 || *comp_level > 9)
+ errx(1, "provided compression level %d is invalid",
+ *comp_level);
/* Not reached */
- }
+
ulp = mkuz_safe_zmalloc(sizeof(struct mkuz_lzma));
/* Init lzma encoder */
- ulp->strm = lzma_stream_init;
- if (lzma_lzma_preset(&ulp->opt_lzma, LZMA_PRESET_DEFAULT))
+ ulp->strm = (lzma_stream)LZMA_STREAM_INIT;
+ if (lzma_lzma_preset(&ulp->opt_lzma, *comp_level))
errx(1, "Error loading LZMA preset");
ulp->filters[0].id = LZMA_FILTER_LZMA2;
ulp->filters[0].options = &ulp->opt_lzma;
ulp->filters[1].id = LZMA_VLI_UNKNOWN;
- ulp->blksz = blksz;
-
return (void *)ulp;
}
-struct mkuz_blk *
-mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk)
+void
+mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{
lzma_ret ret;
- struct mkuz_blk *rval;
struct mkuz_lzma *ulp;
ulp = (struct mkuz_lzma *)p;
- rval = mkuz_blk_ctor(ulp->blksz * 2);
-
ret = lzma_stream_encoder(&ulp->strm, ulp->filters, LZMA_CHECK_CRC32);
if (ret != LZMA_OK) {
if (ret == LZMA_MEMLIMIT_ERROR)
@@ -99,23 +93,20 @@ mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk)
}
ulp->strm.next_in = iblk->data;
- ulp->strm.avail_in = ulp->blksz;
- ulp->strm.next_out = rval->data;
- ulp->strm.avail_out = rval->alen;
+ ulp->strm.avail_in = iblk->info.len;
+ ulp->strm.next_out = oblk->data;
+ ulp->strm.avail_out = oblk->alen;
ret = lzma_code(&ulp->strm, LZMA_FINISH);
- if (ret != LZMA_STREAM_END) {
- /* Error */
+ if (ret != LZMA_STREAM_END)
errx(1, "lzma_code FINISH failed, code=%d, pos(in=%zd, "
- "out=%zd)", ret, (ulp->blksz - ulp->strm.avail_in),
- (ulp->blksz * 2 - ulp->strm.avail_out));
- }
+ "out=%zd)", ret, (iblk->info.len - ulp->strm.avail_in),
+ (oblk->alen - ulp->strm.avail_out));
#if 0
lzma_end(&ulp->strm);
#endif
- rval->info.len = rval->alen - ulp->strm.avail_out;
- return (rval);
+ oblk->info.len = oblk->alen - ulp->strm.avail_out;
}
diff --git a/usr.bin/mkuzip/mkuz_lzma.h b/usr.bin/mkuzip/mkuz_lzma.h
index bba45425343cb..920acc67164d5 100644
--- a/usr.bin/mkuzip/mkuz_lzma.h
+++ b/usr.bin/mkuzip/mkuz_lzma.h
@@ -38,5 +38,6 @@
"exit $?\n"
#define DEFAULT_SUFX_LZMA ".ulzma"
-void *mkuz_lzma_init(uint32_t);
-struct mkuz_blk *mkuz_lzma_compress(void *, const struct mkuz_blk *);
+size_t mkuz_lzma_cbound(size_t);
+void *mkuz_lzma_init(int *);
+void mkuz_lzma_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);
diff --git a/usr.bin/mkuzip/mkuz_zlib.c b/usr.bin/mkuzip/mkuz_zlib.c
index 4b191f945cdac..fa2519ffb60dd 100644
--- a/usr.bin/mkuzip/mkuz_zlib.c
+++ b/usr.bin/mkuzip/mkuz_zlib.c
@@ -34,54 +34,51 @@ __FBSDID("$FreeBSD$");
#include <zlib.h>
#include "mkuzip.h"
-#include "mkuz_zlib.h"
#include "mkuz_blk.h"
+#include "mkuz_zlib.h"
struct mkuz_zlib {
- uLongf oblen;
- uint32_t blksz;
+ int comp_level;
};
+size_t
+mkuz_zlib_cbound(size_t blksz)
+{
+ return (compressBound(blksz));
+}
+
void *
-mkuz_zlib_init(uint32_t blksz)
+mkuz_zlib_init(int *comp_level)
{
struct mkuz_zlib *zp;
- if (blksz % DEV_BSIZE != 0) {
- errx(1, "cluster size should be multiple of %d",
- DEV_BSIZE);
+ if (*comp_level == USE_DEFAULT_LEVEL)
+ *comp_level = Z_BEST_COMPRESSION;
+ if (*comp_level < Z_BEST_SPEED || *comp_level > Z_BEST_COMPRESSION)
+ errx(1, "provided compression level %d is invalid",
+ *comp_level);
/* Not reached */
- }
- if (compressBound(blksz) > MAXPHYS) {
- errx(1, "cluster size is too large");
- /* Not reached */
- }
+
zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib));
- zp->oblen = compressBound(blksz);
- zp->blksz = blksz;
+ zp->comp_level = *comp_level;
- return (void *)zp;
+ return (zp);
}
-struct mkuz_blk *
-mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk)
+void
+mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{
uLongf destlen_z;
- struct mkuz_blk *rval;
struct mkuz_zlib *zp;
zp = (struct mkuz_zlib *)p;
- rval = mkuz_blk_ctor(zp->oblen);
-
- destlen_z = rval->alen;
- if (compress2(rval->data, &destlen_z, iblk->data, zp->blksz,
- Z_BEST_COMPRESSION) != Z_OK) {
- errx(1, "can't compress data: compress2() "
- "failed");
+ destlen_z = oblk->alen;
+ if (compress2(oblk->data, &destlen_z, iblk->data, iblk->info.len,
+ zp->comp_level) != Z_OK) {
+ errx(1, "can't compress data: compress2() failed");
/* Not reached */
}
- rval->info.len = (uint32_t)destlen_z;
- return (rval);
+ oblk->info.len = (uint32_t)destlen_z;
}
diff --git a/usr.bin/mkuzip/mkuz_zlib.h b/usr.bin/mkuzip/mkuz_zlib.h
index 55e57a610b4bb..ad653b935f22d 100644
--- a/usr.bin/mkuzip/mkuz_zlib.h
+++ b/usr.bin/mkuzip/mkuz_zlib.h
@@ -32,5 +32,6 @@
"(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \
"mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
-void *mkuz_zlib_init(uint32_t);
-struct mkuz_blk *mkuz_zlib_compress(void *, const struct mkuz_blk *);
+size_t mkuz_zlib_cbound(size_t);
+void *mkuz_zlib_init(int *);
+void mkuz_zlib_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);
diff --git a/usr.bin/mkuzip/mkuz_zstd.c b/usr.bin/mkuzip/mkuz_zstd.c
new file mode 100644
index 0000000000000..d59cb47917d29
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_zstd.c
@@ -0,0 +1,95 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <err.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <zstd.h>
+
+#include "mkuzip.h"
+#include "mkuz_blk.h"
+#include "mkuz_zstd.h"
+
+size_t
+mkuz_zstd_cbound(size_t blksz)
+{
+ return (ZSTD_compressBound(blksz));
+}
+
+void *
+mkuz_zstd_init(int *comp_level)
+{
+ ZSTD_CCtx *cctx;
+ size_t rc;
+
+ /* Default chosen for near-parity with mkuzip zlib default. */
+ if (*comp_level == USE_DEFAULT_LEVEL)
+ *comp_level = 9;
+ if (*comp_level < ZSTD_minCLevel() || *comp_level == 0 ||
+ *comp_level > ZSTD_maxCLevel())
+ errx(1, "provided compression level %d is invalid",
+ *comp_level);
+
+ cctx = ZSTD_createCCtx();
+ if (cctx == NULL)
+ errx(1, "could not allocate Zstd context");
+
+ rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel,
+ *comp_level);
+ if (ZSTD_isError(rc))
+ errx(1, "Could not set zstd compression level %d: %s",
+ *comp_level, ZSTD_getErrorName(rc));
+
+ rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
+ if (ZSTD_isError(rc))
+ errx(1, "Could not enable zstd checksum: %s",
+ ZSTD_getErrorName(rc));
+
+ return (cctx);
+}
+
+void
+mkuz_zstd_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
+{
+ ZSTD_CCtx *cctx;
+ size_t rc;
+
+ cctx = p;
+
+ rc = ZSTD_compress2(cctx, oblk->data, oblk->alen, iblk->data,
+ iblk->info.len);
+ if (ZSTD_isError(rc))
+ errx(1, "could not compress data: ZSTD_compress2: %s",
+ ZSTD_getErrorName(rc));
+
+ oblk->info.len = rc;
+}
diff --git a/usr.bin/mkuzip/mkuz_zstd.h b/usr.bin/mkuzip/mkuz_zstd.h
new file mode 100644
index 0000000000000..874e2d82812c8
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_zstd.h
@@ -0,0 +1,38 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define DEFAULT_SUFX_ZSTD ".uzst"
+
+#define CLOOP_MAGIC_ZSTD "#!/bin/sh\n#Z4.0 Format\n" \
+ "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \
+ "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
+
+size_t mkuz_zstd_cbound(size_t);
+void *mkuz_zstd_init(int *);
+void mkuz_zstd_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);
diff --git a/usr.bin/mkuzip/mkuzip.8 b/usr.bin/mkuzip/mkuzip.8
index 8a54aee7ba577..9bf4a0c3f893a 100644
--- a/usr.bin/mkuzip/mkuzip.8
+++ b/usr.bin/mkuzip/mkuzip.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd February 19, 2019
+.Dd August 9, 2019
.Dt MKUZIP 8
.Os
.Sh NAME
@@ -35,7 +35,9 @@
class
.Sh SYNOPSIS
.Nm
-.Op Fl dLSsvZ
+.Op Fl dSsvZ
+.Op Fl A Ar compression_algorithm
+.Op Fl C Ar compression_level
.Op Fl j Ar compression_jobs
.Op Fl o Ar outfile
.Op Fl s Ar cluster_size
@@ -57,17 +59,82 @@ works in two phases:
.It
An
.Ar infile
-image is split into clusters; each cluster is compressed using
-.Xr zlib 3
-or
-.Xr lzma 3 .
+image is split into clusters; each cluster is compressed.
.It
-The resulting set of compressed clusters along with headers that allow
-locating each individual cluster is written to the output file.
+The resulting set of compressed clusters is written to the output file.
+In addition, a
+.Dq table of contents
+header is written which allows for efficient seeking.
.El
.Pp
The options are:
.Bl -tag -width indent
+.It Fl A Op Ar lzma | Ar zlib | Ar zstd
+Select a specific compression algorithm.
+If this option is not provided, the default is
+.Ar zlib .
+.Pp
+The
+.Ar lzma
+algorithm provides noticeable better compression levels than zlib on the same
+data set.
+It has vastly slower compression speed and moderately slower decompression
+speed.
+.Pp
+The
+.Ar zstd
+algorithm provides better compression levels than zlib on the same data set.
+It also has faster compression and decompression speed than zlib.
+In the very high compression
+.Dq level
+settings, it does not offer quite as high a compression ratio as
+.Ar lzma .
+However, its decompression speed does not suffer at high compression
+.Dq levels .
+.It Fl C Ar compression_level
+Select the integer compression level used to parameterize the chosen
+compression algorithm.
+.Pp
+For any given algorithm, a lesser number selects a faster compression mode.
+A greater number selects a slower compression mode.
+Typically, for the same algorithm, a greater
+.Ar compression_level
+provides better final compression ratio.
+.Pp
+For
+.Ar lzma ,
+the range of valid compression levels is
+.Va 0-9 .
+The
+.Nm
+default for lzma is
+.Va 6 .
+.Pp
+For
+.Ar zlib ,
+the range of valid compression levels is
+.Va 1-9 .
+The
+.Nm
+default for zlib is
+.Va 9 .
+.Pp
+For
+.Ar zstd ,
+the range of valid compression levels is currently
+.Va 1-19 .
+The
+.Nm
+default for zstd is
+.Va 9 .
+.It Fl d
+Enable de-duplication.
+When the option is enabled
+.Nm
+detects identical blocks in the input and replaces each subsequent occurrence
+of such block with pointer to the very first one in the output.
+Setting this option results is moderate decrease of compressed image size,
+typically around 3-5% of a final size of the compressed image.
.It Fl j Ar compression_jobs
Specify the number of compression jobs that
.Nm
@@ -77,24 +144,9 @@ to the value of
.Va hw.ncpu
.Xr sysctl 8
variable.
-.It Fl d
-Enable de-duplication.
-When the option is enabled the
-.Nm
-detects identical blocks in the input and replaces each subsequent occurence
-of such block with pointer to the very first one in the output.
-Setting this option results is moderate decrease of compressed image size,
-typically around 3-5% of a final size of the compressed image.
-.It Fl L
-Use
-.Xr lzma 3
-compression algorithm instead of the default
-.Xr zlib 3 .
-The
-.Xr lzma 3
-provides noticeable better compression levels on the same data set
-at the expense of much slower compression speed (10-20x) and somewhat slower
-decompression (2-3x).
+.It Op Fl L
+Legacy flag that indicates the same thing as
+.Dq Fl A Ar lzma .
.It Fl o Ar outfile
Name of the output file
.Ar outfile .
@@ -119,33 +171,44 @@ should be a multiple of 512 bytes.
.It Fl v
Display verbose messages.
.It Fl Z
-Disable zero-blocks detection and elimination.
-When this option is set, the
+Disable zero-block detection and elimination.
+When this option is set,
.Nm
-would compress empty blocks (i.e. clusters that consist of only zero bytes)
-just as it would any other block.
-When the option is not set, the
+compresses blocks of zero bytes just as it would any other block.
+When the option is not set,
.Nm
-detects such blocks and skips them from the output.
+detects and compresses zero blocks in a space-efficient way.
Setting
.Fl Z
-results is slight increase of compressed image size, typically less than 0.1%
-of a final size of the compressed image.
+increases compressed image sizes slightly, typically less than 0.1%.
.El
-.Sh NOTES
-The compression ratio largely depends on the cluster size used.
-.\" The following two sentences are unclear: how can gzip(1) be
-.\" used in a comparable fashion, and wouldn't a gzip-compressed
-.\" image suffer from larger cluster sizes as well?
-For large cluster sizes (16K and higher), typical compression ratios
+.Sh IMPLEMENTATION NOTES
+The compression ratio largely depends on the compression algorithm, level, and
+cluster size used.
+For large cluster sizes (16kB and higher), typical overall image compression
+ratios with
+.Xr zlib 3
are only 1-2% less than those achieved with
-.Xr gzip 1 .
-However, it should be kept in mind that larger cluster
-sizes lead to higher overhead in the
+.Xr gzip 1
+over the entire image.
+However, it should be kept in mind that larger cluster sizes lead to higher
+overhead in the
.Xr geom_uzip 4
class, as the class has to decompress the whole cluster even if
only a few bytes from that cluster have to be read.
.Pp
+Additionally, the threshold at 16-32 kB where a larger cluster size does not
+benefit overall compression ratio is an artifact of the
+.Xr zlib 3
+algorithm in particular.
+.Ar Lzma
+and
+.Ar Zstd will continue to provide better compression ratios as cluster sizes
+are increased, at high enough compression levels.
+The same tradeoff continues to apply: reads in
+.Xr geom_uzip 4
+become more expensive the greater the cluster size.
+.Pp
The
.Nm
utility
@@ -169,12 +232,27 @@ specific feature and while it does not require any changes to on-disk
compressed image format, however it did require some matching changes to the
.Xr geom_uzip 4
to handle resulting images correctly.
+.Pp
+To make use of
+.Ar zstd
+.Nm
+images, the kernel must be configured with
+.Cd ZSTDIO .
+It is enabled by default in many
+.Cd GENERIC
+kernels provided as binary distributions by
+.Fx .
+The status on any particular system can be verified by checking
+.Xr sysctl 8
+.Dv kern.features.geom_uzip_zstd
+for
+.Dq 1 .
.Sh EXIT STATUS
.Ex -std
.Sh SEE ALSO
.Xr gzip 1 ,
.Xr xz 1 ,
-.Xr lzma 3 ,
+.Xr zstd 1 ,
.Xr zlib 3 ,
.Xr geom 4 ,
.Xr geom_uzip 4 ,
diff --git a/usr.bin/mkuzip/mkuzip.c b/usr.bin/mkuzip/mkuzip.c
index be0a9b23acb42..a2763e06440c5 100644
--- a/usr.bin/mkuzip/mkuzip.c
+++ b/usr.bin/mkuzip/mkuzip.c
@@ -51,8 +51,9 @@ __FBSDID("$FreeBSD$");
#include "mkuzip.h"
#include "mkuz_cloop.h"
#include "mkuz_blockcache.h"
-#include "mkuz_zlib.h"
#include "mkuz_lzma.h"
+#include "mkuz_zlib.h"
+#include "mkuz_zstd.h"
#include "mkuz_blk.h"
#include "mkuz_cfg.h"
#include "mkuz_conveyor.h"
@@ -63,18 +64,38 @@ __FBSDID("$FreeBSD$");
#define DEFAULT_CLSTSIZE 16384
-static struct mkuz_format uzip_fmt = {
- .magic = CLOOP_MAGIC_ZLIB,
- .default_sufx = DEFAULT_SUFX_ZLIB,
- .f_init = &mkuz_zlib_init,
- .f_compress = &mkuz_zlib_compress
+enum UZ_ALGORITHM {
+ UZ_ZLIB = 0,
+ UZ_LZMA,
+ UZ_ZSTD,
+ UZ_INVALID
};
-static struct mkuz_format ulzma_fmt = {
- .magic = CLOOP_MAGIC_LZMA,
- .default_sufx = DEFAULT_SUFX_LZMA,
- .f_init = &mkuz_lzma_init,
- .f_compress = &mkuz_lzma_compress
+static const struct mkuz_format uzip_fmts[] = {
+ [UZ_ZLIB] = {
+ .option = "zlib",
+ .magic = CLOOP_MAGIC_ZLIB,
+ .default_sufx = DEFAULT_SUFX_ZLIB,
+ .f_compress_bound = mkuz_zlib_cbound,
+ .f_init = mkuz_zlib_init,
+ .f_compress = mkuz_zlib_compress,
+ },
+ [UZ_LZMA] = {
+ .option = "lzma",
+ .magic = CLOOP_MAGIC_LZMA,
+ .default_sufx = DEFAULT_SUFX_LZMA,
+ .f_compress_bound = mkuz_lzma_cbound,
+ .f_init = mkuz_lzma_init,
+ .f_compress = mkuz_lzma_compress,
+ },
+ [UZ_ZSTD] = {
+ .option = "zstd",
+ .magic = CLOOP_MAGIC_ZSTD,
+ .default_sufx = DEFAULT_SUFX_ZSTD,
+ .f_compress_bound = mkuz_zstd_cbound,
+ .f_init = mkuz_zstd_init,
+ .f_compress = mkuz_zstd_compress,
+ },
};
static struct mkuz_blk *readblock(int, u_int32_t);
@@ -111,6 +132,8 @@ int main(int argc, char **argv)
struct mkuz_blk_info *chit;
size_t ncpusz, ncpu, magiclen;
double st, et;
+ enum UZ_ALGORITHM comp_alg;
+ int comp_level;
st = getdtime();
@@ -129,12 +152,27 @@ int main(int argc, char **argv)
cfs.en_dedup = 0;
summary.en = 0;
summary.f = stderr;
- cfs.handler = &uzip_fmt;
+ comp_alg = UZ_ZLIB;
+ comp_level = USE_DEFAULT_LEVEL;
cfs.nworkers = ncpu;
struct mkuz_blk *iblk, *oblk;
- while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
+ while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
switch(opt) {
+ case 'A':
+ for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
+ if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
+ break;
+ }
+ if (tmp == UZ_INVALID)
+ errx(1, "invalid algorithm specified: %s",
+ optarg);
+ /* Not reached */
+ comp_alg = tmp;
+ break;
+ case 'C':
+ comp_level = atoi(optarg);
+ break;
case 'o':
oname = optarg;
break;
@@ -162,7 +200,7 @@ int main(int argc, char **argv)
break;
case 'L':
- cfs.handler = &ulzma_fmt;
+ comp_alg = UZ_LZMA;
break;
case 'S':
@@ -193,16 +231,32 @@ int main(int argc, char **argv)
/* Not reached */
}
+ cfs.handler = &uzip_fmts[comp_alg];
+
magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
assert(magiclen < sizeof(hdr.magic));
if (cfs.en_dedup != 0) {
- hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
+ /*
+ * Dedupe requires a version 3 format. Don't downgrade newer
+ * formats.
+ */
+ if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
+ hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
hdr.magic[CLOOP_OFS_COMPR] =
tolower(hdr.magic[CLOOP_OFS_COMPR]);
}
- c_ctx = cfs.handler->f_init(cfs.blksz);
+ if (cfs.blksz % DEV_BSIZE != 0)
+ errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
+
+ cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
+ if (cfs.cbound_blksz > MAXPHYS)
+ errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
+ cfs.cbound_blksz, (size_t)MAXPHYS);
+
+ c_ctx = cfs.handler->f_init(&comp_level);
+ cfs.comp_level = comp_level;
cfs.iname = argv[0];
if (oname == NULL) {
@@ -239,6 +293,14 @@ int main(int argc, char **argv)
}
toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
+ /*
+ * Initialize last+1 entry with non-heap trash. If final padding is
+ * added later, it may or may not be overwritten with an offset
+ * representing the length of the final compressed block. If not,
+ * initialize to a defined value.
+ */
+ toc[hdr.nblocks] = 0;
+
cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
if (cfs.fdw < 0) {
diff --git a/usr.bin/mkuzip/mkuzip.h b/usr.bin/mkuzip/mkuzip.h
index f41507c86964b..b4bec58525eed 100644
--- a/usr.bin/mkuzip/mkuzip.h
+++ b/usr.bin/mkuzip/mkuzip.h
@@ -28,6 +28,9 @@
#define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args)
+/* Use an algorithm-specific default level if no explicit level is selected. */
+#define USE_DEFAULT_LEVEL INT_MIN
+
void *mkuz_safe_malloc(size_t);
void *mkuz_safe_zmalloc(size_t);
int mkuz_memvcmp(const void *, unsigned char, size_t);