summaryrefslogtreecommitdiff
path: root/usr.bin/mkuzip
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/mkuzip')
-rw-r--r--usr.bin/mkuzip/Makefile6
-rw-r--r--usr.bin/mkuzip/mkuz_cfg.h2
-rw-r--r--usr.bin/mkuzip/mkuz_cloop.h2
-rw-r--r--usr.bin/mkuzip/mkuz_conveyor.c7
-rw-r--r--usr.bin/mkuzip/mkuz_format.h7
-rw-r--r--usr.bin/mkuzip/mkuz_lzma.c57
-rw-r--r--usr.bin/mkuzip/mkuz_lzma.h5
-rw-r--r--usr.bin/mkuzip/mkuz_zlib.c51
-rw-r--r--usr.bin/mkuzip/mkuz_zlib.h5
-rw-r--r--usr.bin/mkuzip/mkuz_zstd.c95
-rw-r--r--usr.bin/mkuzip/mkuz_zstd.h38
-rw-r--r--usr.bin/mkuzip/mkuzip.8166
-rw-r--r--usr.bin/mkuzip/mkuzip.c94
-rw-r--r--usr.bin/mkuzip/mkuzip.h3
14 files changed, 407 insertions, 131 deletions
diff --git a/usr.bin/mkuzip/Makefile b/usr.bin/mkuzip/Makefile
index ed31b78f2d31..c60fd7f5ca54 100644
--- a/usr.bin/mkuzip/Makefile
+++ b/usr.bin/mkuzip/Makefile
@@ -3,10 +3,12 @@
PROG= mkuzip
MAN= mkuzip.8
SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c mkuz_conveyor.c \
- mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c
+ mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c mkuz_zstd.c
+
+CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib
#CFLAGS+= -DMKUZ_DEBUG
-LIBADD= z md lzma pthread
+LIBADD= lzma md pthread z zstd
.include <bsd.prog.mk>
diff --git a/usr.bin/mkuzip/mkuz_cfg.h b/usr.bin/mkuzip/mkuz_cfg.h
index fc88ef29198f..a27d98a6ff60 100644
--- a/usr.bin/mkuzip/mkuz_cfg.h
+++ b/usr.bin/mkuzip/mkuz_cfg.h
@@ -39,4 +39,6 @@ struct mkuz_cfg {
const char *iname;
off_t isize;
const struct mkuz_format *handler;
+ size_t cbound_blksz;
+ int comp_level;
};
diff --git a/usr.bin/mkuzip/mkuz_cloop.h b/usr.bin/mkuzip/mkuz_cloop.h
index 4ed7c5026391..fabf80a53b12 100644
--- a/usr.bin/mkuzip/mkuz_cloop.h
+++ b/usr.bin/mkuzip/mkuz_cloop.h
@@ -39,9 +39,11 @@
#define CLOOP_MAJVER_2 '2'
#define CLOOP_MAJVER_3 '3'
+#define CLOOP_MAJVER_4 '4'
#define CLOOP_COMP_LIBZ 'V'
#define CLOOP_COMP_LZMA 'L'
+#define CLOOP_COMP_ZSTD 'Z'
struct cloop_header {
char magic[CLOOP_MAGIC_LEN]; /* cloop magic */
diff --git a/usr.bin/mkuzip/mkuz_conveyor.c b/usr.bin/mkuzip/mkuz_conveyor.c
index 856d445cce50..2cfae1c0775c 100644
--- a/usr.bin/mkuzip/mkuz_conveyor.c
+++ b/usr.bin/mkuzip/mkuz_conveyor.c
@@ -42,8 +42,8 @@ __FBSDID("$FreeBSD$");
#include "mkuz_conveyor.h"
#include "mkuz_cfg.h"
#include "mkuzip.h"
-#include "mkuz_format.h"
#include "mkuz_blk.h"
+#include "mkuz_format.h"
#include "mkuz_fqueue.h"
#include "mkuz_blk_chain.h"
@@ -67,7 +67,7 @@ cworker(void *p)
cfp = cwp->cfp;
cvp = cwp->cvp;
free(cwp);
- c_ctx = cfp->handler->f_init(cfp->blksz);
+ c_ctx = cfp->handler->f_init(&cfp->comp_level);
for (;;) {
iblk = mkuz_fqueue_deq(cvp->wrk_queue);
if (iblk == MKUZ_BLK_EOF) {
@@ -80,7 +80,8 @@ cworker(void *p)
/* All zeroes block */
oblk = mkuz_blk_ctor(0);
} else {
- oblk = cfp->handler->f_compress(c_ctx, iblk);
+ oblk = mkuz_blk_ctor(cfp->cbound_blksz);
+ cfp->handler->f_compress(c_ctx, iblk, oblk);
if (cfp->en_dedup != 0) {
compute_digest(oblk);
}
diff --git a/usr.bin/mkuzip/mkuz_format.h b/usr.bin/mkuzip/mkuz_format.h
index 817c0121ed85..ddee771eab87 100644
--- a/usr.bin/mkuzip/mkuz_format.h
+++ b/usr.bin/mkuzip/mkuz_format.h
@@ -26,12 +26,15 @@
* $FreeBSD$
*/
-DEFINE_RAW_METHOD(f_init, void *, uint32_t);
-DEFINE_RAW_METHOD(f_compress, struct mkuz_blk *, void *, const struct mkuz_blk *);
+DEFINE_RAW_METHOD(f_compress_bound, size_t, size_t);
+DEFINE_RAW_METHOD(f_init, void *, int *);
+DEFINE_RAW_METHOD(f_compress, void, void *, const struct mkuz_blk *, struct mkuz_blk *);
struct mkuz_format {
+ const char *option;
const char *magic;
const char *default_sufx;
+ f_compress_bound_t f_compress_bound;
f_init_t f_init;
f_compress_t f_compress;
};
diff --git a/usr.bin/mkuzip/mkuz_lzma.c b/usr.bin/mkuzip/mkuz_lzma.c
index 8810d2ef0c56..bab2820f7c38 100644
--- a/usr.bin/mkuzip/mkuz_lzma.c
+++ b/usr.bin/mkuzip/mkuz_lzma.c
@@ -35,61 +35,55 @@ __FBSDID("$FreeBSD$");
#include <lzma.h>
#include "mkuzip.h"
-#include "mkuz_lzma.h"
#include "mkuz_blk.h"
-
-#define USED_BLOCKSIZE DEV_BSIZE
+#include "mkuz_lzma.h"
struct mkuz_lzma {
lzma_filter filters[2];
lzma_options_lzma opt_lzma;
lzma_stream strm;
- uint32_t blksz;
};
-static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT;
+size_t
+mkuz_lzma_cbound(size_t blksz)
+{
+ return (lzma_stream_buffer_bound(blksz));
+}
void *
-mkuz_lzma_init(uint32_t blksz)
+mkuz_lzma_init(int *comp_level)
{
struct mkuz_lzma *ulp;
- if (blksz % USED_BLOCKSIZE != 0) {
- errx(1, "cluster size should be multiple of %d",
- USED_BLOCKSIZE);
- /* Not reached */
- }
- if (blksz > MAXPHYS) {
- errx(1, "cluster size is too large");
+ if (*comp_level == USE_DEFAULT_LEVEL)
+ *comp_level = LZMA_PRESET_DEFAULT;
+ if (*comp_level < 0 || *comp_level > 9)
+ errx(1, "provided compression level %d is invalid",
+ *comp_level);
/* Not reached */
- }
+
ulp = mkuz_safe_zmalloc(sizeof(struct mkuz_lzma));
/* Init lzma encoder */
- ulp->strm = lzma_stream_init;
- if (lzma_lzma_preset(&ulp->opt_lzma, LZMA_PRESET_DEFAULT))
+ ulp->strm = (lzma_stream)LZMA_STREAM_INIT;
+ if (lzma_lzma_preset(&ulp->opt_lzma, *comp_level))
errx(1, "Error loading LZMA preset");
ulp->filters[0].id = LZMA_FILTER_LZMA2;
ulp->filters[0].options = &ulp->opt_lzma;
ulp->filters[1].id = LZMA_VLI_UNKNOWN;
- ulp->blksz = blksz;
-
return (void *)ulp;
}
-struct mkuz_blk *
-mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk)
+void
+mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{
lzma_ret ret;
- struct mkuz_blk *rval;
struct mkuz_lzma *ulp;
ulp = (struct mkuz_lzma *)p;
- rval = mkuz_blk_ctor(ulp->blksz * 2);
-
ret = lzma_stream_encoder(&ulp->strm, ulp->filters, LZMA_CHECK_CRC32);
if (ret != LZMA_OK) {
if (ret == LZMA_MEMLIMIT_ERROR)
@@ -99,23 +93,20 @@ mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk)
}
ulp->strm.next_in = iblk->data;
- ulp->strm.avail_in = ulp->blksz;
- ulp->strm.next_out = rval->data;
- ulp->strm.avail_out = rval->alen;
+ ulp->strm.avail_in = iblk->info.len;
+ ulp->strm.next_out = oblk->data;
+ ulp->strm.avail_out = oblk->alen;
ret = lzma_code(&ulp->strm, LZMA_FINISH);
- if (ret != LZMA_STREAM_END) {
- /* Error */
+ if (ret != LZMA_STREAM_END)
errx(1, "lzma_code FINISH failed, code=%d, pos(in=%zd, "
- "out=%zd)", ret, (ulp->blksz - ulp->strm.avail_in),
- (ulp->blksz * 2 - ulp->strm.avail_out));
- }
+ "out=%zd)", ret, (iblk->info.len - ulp->strm.avail_in),
+ (oblk->alen - ulp->strm.avail_out));
#if 0
lzma_end(&ulp->strm);
#endif
- rval->info.len = rval->alen - ulp->strm.avail_out;
- return (rval);
+ oblk->info.len = oblk->alen - ulp->strm.avail_out;
}
diff --git a/usr.bin/mkuzip/mkuz_lzma.h b/usr.bin/mkuzip/mkuz_lzma.h
index bba45425343c..920acc67164d 100644
--- a/usr.bin/mkuzip/mkuz_lzma.h
+++ b/usr.bin/mkuzip/mkuz_lzma.h
@@ -38,5 +38,6 @@
"exit $?\n"
#define DEFAULT_SUFX_LZMA ".ulzma"
-void *mkuz_lzma_init(uint32_t);
-struct mkuz_blk *mkuz_lzma_compress(void *, const struct mkuz_blk *);
+size_t mkuz_lzma_cbound(size_t);
+void *mkuz_lzma_init(int *);
+void mkuz_lzma_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);
diff --git a/usr.bin/mkuzip/mkuz_zlib.c b/usr.bin/mkuzip/mkuz_zlib.c
index 4b191f945cda..fa2519ffb60d 100644
--- a/usr.bin/mkuzip/mkuz_zlib.c
+++ b/usr.bin/mkuzip/mkuz_zlib.c
@@ -34,54 +34,51 @@ __FBSDID("$FreeBSD$");
#include <zlib.h>
#include "mkuzip.h"
-#include "mkuz_zlib.h"
#include "mkuz_blk.h"
+#include "mkuz_zlib.h"
struct mkuz_zlib {
- uLongf oblen;
- uint32_t blksz;
+ int comp_level;
};
+size_t
+mkuz_zlib_cbound(size_t blksz)
+{
+ return (compressBound(blksz));
+}
+
void *
-mkuz_zlib_init(uint32_t blksz)
+mkuz_zlib_init(int *comp_level)
{
struct mkuz_zlib *zp;
- if (blksz % DEV_BSIZE != 0) {
- errx(1, "cluster size should be multiple of %d",
- DEV_BSIZE);
+ if (*comp_level == USE_DEFAULT_LEVEL)
+ *comp_level = Z_BEST_COMPRESSION;
+ if (*comp_level < Z_BEST_SPEED || *comp_level > Z_BEST_COMPRESSION)
+ errx(1, "provided compression level %d is invalid",
+ *comp_level);
/* Not reached */
- }
- if (compressBound(blksz) > MAXPHYS) {
- errx(1, "cluster size is too large");
- /* Not reached */
- }
+
zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib));
- zp->oblen = compressBound(blksz);
- zp->blksz = blksz;
+ zp->comp_level = *comp_level;
- return (void *)zp;
+ return (zp);
}
-struct mkuz_blk *
-mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk)
+void
+mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
{
uLongf destlen_z;
- struct mkuz_blk *rval;
struct mkuz_zlib *zp;
zp = (struct mkuz_zlib *)p;
- rval = mkuz_blk_ctor(zp->oblen);
-
- destlen_z = rval->alen;
- if (compress2(rval->data, &destlen_z, iblk->data, zp->blksz,
- Z_BEST_COMPRESSION) != Z_OK) {
- errx(1, "can't compress data: compress2() "
- "failed");
+ destlen_z = oblk->alen;
+ if (compress2(oblk->data, &destlen_z, iblk->data, iblk->info.len,
+ zp->comp_level) != Z_OK) {
+ errx(1, "can't compress data: compress2() failed");
/* Not reached */
}
- rval->info.len = (uint32_t)destlen_z;
- return (rval);
+ oblk->info.len = (uint32_t)destlen_z;
}
diff --git a/usr.bin/mkuzip/mkuz_zlib.h b/usr.bin/mkuzip/mkuz_zlib.h
index 55e57a610b4b..ad653b935f22 100644
--- a/usr.bin/mkuzip/mkuz_zlib.h
+++ b/usr.bin/mkuzip/mkuz_zlib.h
@@ -32,5 +32,6 @@
"(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \
"mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
-void *mkuz_zlib_init(uint32_t);
-struct mkuz_blk *mkuz_zlib_compress(void *, const struct mkuz_blk *);
+size_t mkuz_zlib_cbound(size_t);
+void *mkuz_zlib_init(int *);
+void mkuz_zlib_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);
diff --git a/usr.bin/mkuzip/mkuz_zstd.c b/usr.bin/mkuzip/mkuz_zstd.c
new file mode 100644
index 000000000000..d59cb47917d2
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_zstd.c
@@ -0,0 +1,95 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <err.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <zstd.h>
+
+#include "mkuzip.h"
+#include "mkuz_blk.h"
+#include "mkuz_zstd.h"
+
+size_t
+mkuz_zstd_cbound(size_t blksz)
+{
+ return (ZSTD_compressBound(blksz));
+}
+
+void *
+mkuz_zstd_init(int *comp_level)
+{
+ ZSTD_CCtx *cctx;
+ size_t rc;
+
+ /* Default chosen for near-parity with mkuzip zlib default. */
+ if (*comp_level == USE_DEFAULT_LEVEL)
+ *comp_level = 9;
+ if (*comp_level < ZSTD_minCLevel() || *comp_level == 0 ||
+ *comp_level > ZSTD_maxCLevel())
+ errx(1, "provided compression level %d is invalid",
+ *comp_level);
+
+ cctx = ZSTD_createCCtx();
+ if (cctx == NULL)
+ errx(1, "could not allocate Zstd context");
+
+ rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel,
+ *comp_level);
+ if (ZSTD_isError(rc))
+ errx(1, "Could not set zstd compression level %d: %s",
+ *comp_level, ZSTD_getErrorName(rc));
+
+ rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
+ if (ZSTD_isError(rc))
+ errx(1, "Could not enable zstd checksum: %s",
+ ZSTD_getErrorName(rc));
+
+ return (cctx);
+}
+
+void
+mkuz_zstd_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk)
+{
+ ZSTD_CCtx *cctx;
+ size_t rc;
+
+ cctx = p;
+
+ rc = ZSTD_compress2(cctx, oblk->data, oblk->alen, iblk->data,
+ iblk->info.len);
+ if (ZSTD_isError(rc))
+ errx(1, "could not compress data: ZSTD_compress2: %s",
+ ZSTD_getErrorName(rc));
+
+ oblk->info.len = rc;
+}
diff --git a/usr.bin/mkuzip/mkuz_zstd.h b/usr.bin/mkuzip/mkuz_zstd.h
new file mode 100644
index 000000000000..874e2d82812c
--- /dev/null
+++ b/usr.bin/mkuzip/mkuz_zstd.h
@@ -0,0 +1,38 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define DEFAULT_SUFX_ZSTD ".uzst"
+
+#define CLOOP_MAGIC_ZSTD "#!/bin/sh\n#Z4.0 Format\n" \
+ "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \
+ "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"
+
+size_t mkuz_zstd_cbound(size_t);
+void *mkuz_zstd_init(int *);
+void mkuz_zstd_compress(void *, const struct mkuz_blk *, struct mkuz_blk *);
diff --git a/usr.bin/mkuzip/mkuzip.8 b/usr.bin/mkuzip/mkuzip.8
index 8a54aee7ba57..9bf4a0c3f893 100644
--- a/usr.bin/mkuzip/mkuzip.8
+++ b/usr.bin/mkuzip/mkuzip.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd February 19, 2019
+.Dd August 9, 2019
.Dt MKUZIP 8
.Os
.Sh NAME
@@ -35,7 +35,9 @@
class
.Sh SYNOPSIS
.Nm
-.Op Fl dLSsvZ
+.Op Fl dSsvZ
+.Op Fl A Ar compression_algorithm
+.Op Fl C Ar compression_level
.Op Fl j Ar compression_jobs
.Op Fl o Ar outfile
.Op Fl s Ar cluster_size
@@ -57,17 +59,82 @@ works in two phases:
.It
An
.Ar infile
-image is split into clusters; each cluster is compressed using
-.Xr zlib 3
-or
-.Xr lzma 3 .
+image is split into clusters; each cluster is compressed.
.It
-The resulting set of compressed clusters along with headers that allow
-locating each individual cluster is written to the output file.
+The resulting set of compressed clusters is written to the output file.
+In addition, a
+.Dq table of contents
+header is written which allows for efficient seeking.
.El
.Pp
The options are:
.Bl -tag -width indent
+.It Fl A Op Ar lzma | Ar zlib | Ar zstd
+Select a specific compression algorithm.
+If this option is not provided, the default is
+.Ar zlib .
+.Pp
+The
+.Ar lzma
+algorithm provides noticeable better compression levels than zlib on the same
+data set.
+It has vastly slower compression speed and moderately slower decompression
+speed.
+.Pp
+The
+.Ar zstd
+algorithm provides better compression levels than zlib on the same data set.
+It also has faster compression and decompression speed than zlib.
+In the very high compression
+.Dq level
+settings, it does not offer quite as high a compression ratio as
+.Ar lzma .
+However, its decompression speed does not suffer at high compression
+.Dq levels .
+.It Fl C Ar compression_level
+Select the integer compression level used to parameterize the chosen
+compression algorithm.
+.Pp
+For any given algorithm, a lesser number selects a faster compression mode.
+A greater number selects a slower compression mode.
+Typically, for the same algorithm, a greater
+.Ar compression_level
+provides better final compression ratio.
+.Pp
+For
+.Ar lzma ,
+the range of valid compression levels is
+.Va 0-9 .
+The
+.Nm
+default for lzma is
+.Va 6 .
+.Pp
+For
+.Ar zlib ,
+the range of valid compression levels is
+.Va 1-9 .
+The
+.Nm
+default for zlib is
+.Va 9 .
+.Pp
+For
+.Ar zstd ,
+the range of valid compression levels is currently
+.Va 1-19 .
+The
+.Nm
+default for zstd is
+.Va 9 .
+.It Fl d
+Enable de-duplication.
+When the option is enabled
+.Nm
+detects identical blocks in the input and replaces each subsequent occurrence
+of such block with pointer to the very first one in the output.
+Setting this option results is moderate decrease of compressed image size,
+typically around 3-5% of a final size of the compressed image.
.It Fl j Ar compression_jobs
Specify the number of compression jobs that
.Nm
@@ -77,24 +144,9 @@ to the value of
.Va hw.ncpu
.Xr sysctl 8
variable.
-.It Fl d
-Enable de-duplication.
-When the option is enabled the
-.Nm
-detects identical blocks in the input and replaces each subsequent occurence
-of such block with pointer to the very first one in the output.
-Setting this option results is moderate decrease of compressed image size,
-typically around 3-5% of a final size of the compressed image.
-.It Fl L
-Use
-.Xr lzma 3
-compression algorithm instead of the default
-.Xr zlib 3 .
-The
-.Xr lzma 3
-provides noticeable better compression levels on the same data set
-at the expense of much slower compression speed (10-20x) and somewhat slower
-decompression (2-3x).
+.It Op Fl L
+Legacy flag that indicates the same thing as
+.Dq Fl A Ar lzma .
.It Fl o Ar outfile
Name of the output file
.Ar outfile .
@@ -119,33 +171,44 @@ should be a multiple of 512 bytes.
.It Fl v
Display verbose messages.
.It Fl Z
-Disable zero-blocks detection and elimination.
-When this option is set, the
+Disable zero-block detection and elimination.
+When this option is set,
.Nm
-would compress empty blocks (i.e. clusters that consist of only zero bytes)
-just as it would any other block.
-When the option is not set, the
+compresses blocks of zero bytes just as it would any other block.
+When the option is not set,
.Nm
-detects such blocks and skips them from the output.
+detects and compresses zero blocks in a space-efficient way.
Setting
.Fl Z
-results is slight increase of compressed image size, typically less than 0.1%
-of a final size of the compressed image.
+increases compressed image sizes slightly, typically less than 0.1%.
.El
-.Sh NOTES
-The compression ratio largely depends on the cluster size used.
-.\" The following two sentences are unclear: how can gzip(1) be
-.\" used in a comparable fashion, and wouldn't a gzip-compressed
-.\" image suffer from larger cluster sizes as well?
-For large cluster sizes (16K and higher), typical compression ratios
+.Sh IMPLEMENTATION NOTES
+The compression ratio largely depends on the compression algorithm, level, and
+cluster size used.
+For large cluster sizes (16kB and higher), typical overall image compression
+ratios with
+.Xr zlib 3
are only 1-2% less than those achieved with
-.Xr gzip 1 .
-However, it should be kept in mind that larger cluster
-sizes lead to higher overhead in the
+.Xr gzip 1
+over the entire image.
+However, it should be kept in mind that larger cluster sizes lead to higher
+overhead in the
.Xr geom_uzip 4
class, as the class has to decompress the whole cluster even if
only a few bytes from that cluster have to be read.
.Pp
+Additionally, the threshold at 16-32 kB where a larger cluster size does not
+benefit overall compression ratio is an artifact of the
+.Xr zlib 3
+algorithm in particular.
+.Ar Lzma
+and
+.Ar Zstd will continue to provide better compression ratios as cluster sizes
+are increased, at high enough compression levels.
+The same tradeoff continues to apply: reads in
+.Xr geom_uzip 4
+become more expensive the greater the cluster size.
+.Pp
The
.Nm
utility
@@ -169,12 +232,27 @@ specific feature and while it does not require any changes to on-disk
compressed image format, however it did require some matching changes to the
.Xr geom_uzip 4
to handle resulting images correctly.
+.Pp
+To make use of
+.Ar zstd
+.Nm
+images, the kernel must be configured with
+.Cd ZSTDIO .
+It is enabled by default in many
+.Cd GENERIC
+kernels provided as binary distributions by
+.Fx .
+The status on any particular system can be verified by checking
+.Xr sysctl 8
+.Dv kern.features.geom_uzip_zstd
+for
+.Dq 1 .
.Sh EXIT STATUS
.Ex -std
.Sh SEE ALSO
.Xr gzip 1 ,
.Xr xz 1 ,
-.Xr lzma 3 ,
+.Xr zstd 1 ,
.Xr zlib 3 ,
.Xr geom 4 ,
.Xr geom_uzip 4 ,
diff --git a/usr.bin/mkuzip/mkuzip.c b/usr.bin/mkuzip/mkuzip.c
index be0a9b23acb4..a2763e06440c 100644
--- a/usr.bin/mkuzip/mkuzip.c
+++ b/usr.bin/mkuzip/mkuzip.c
@@ -51,8 +51,9 @@ __FBSDID("$FreeBSD$");
#include "mkuzip.h"
#include "mkuz_cloop.h"
#include "mkuz_blockcache.h"
-#include "mkuz_zlib.h"
#include "mkuz_lzma.h"
+#include "mkuz_zlib.h"
+#include "mkuz_zstd.h"
#include "mkuz_blk.h"
#include "mkuz_cfg.h"
#include "mkuz_conveyor.h"
@@ -63,18 +64,38 @@ __FBSDID("$FreeBSD$");
#define DEFAULT_CLSTSIZE 16384
-static struct mkuz_format uzip_fmt = {
- .magic = CLOOP_MAGIC_ZLIB,
- .default_sufx = DEFAULT_SUFX_ZLIB,
- .f_init = &mkuz_zlib_init,
- .f_compress = &mkuz_zlib_compress
+enum UZ_ALGORITHM {
+ UZ_ZLIB = 0,
+ UZ_LZMA,
+ UZ_ZSTD,
+ UZ_INVALID
};
-static struct mkuz_format ulzma_fmt = {
- .magic = CLOOP_MAGIC_LZMA,
- .default_sufx = DEFAULT_SUFX_LZMA,
- .f_init = &mkuz_lzma_init,
- .f_compress = &mkuz_lzma_compress
+static const struct mkuz_format uzip_fmts[] = {
+ [UZ_ZLIB] = {
+ .option = "zlib",
+ .magic = CLOOP_MAGIC_ZLIB,
+ .default_sufx = DEFAULT_SUFX_ZLIB,
+ .f_compress_bound = mkuz_zlib_cbound,
+ .f_init = mkuz_zlib_init,
+ .f_compress = mkuz_zlib_compress,
+ },
+ [UZ_LZMA] = {
+ .option = "lzma",
+ .magic = CLOOP_MAGIC_LZMA,
+ .default_sufx = DEFAULT_SUFX_LZMA,
+ .f_compress_bound = mkuz_lzma_cbound,
+ .f_init = mkuz_lzma_init,
+ .f_compress = mkuz_lzma_compress,
+ },
+ [UZ_ZSTD] = {
+ .option = "zstd",
+ .magic = CLOOP_MAGIC_ZSTD,
+ .default_sufx = DEFAULT_SUFX_ZSTD,
+ .f_compress_bound = mkuz_zstd_cbound,
+ .f_init = mkuz_zstd_init,
+ .f_compress = mkuz_zstd_compress,
+ },
};
static struct mkuz_blk *readblock(int, u_int32_t);
@@ -111,6 +132,8 @@ int main(int argc, char **argv)
struct mkuz_blk_info *chit;
size_t ncpusz, ncpu, magiclen;
double st, et;
+ enum UZ_ALGORITHM comp_alg;
+ int comp_level;
st = getdtime();
@@ -129,12 +152,27 @@ int main(int argc, char **argv)
cfs.en_dedup = 0;
summary.en = 0;
summary.f = stderr;
- cfs.handler = &uzip_fmt;
+ comp_alg = UZ_ZLIB;
+ comp_level = USE_DEFAULT_LEVEL;
cfs.nworkers = ncpu;
struct mkuz_blk *iblk, *oblk;
- while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
+ while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
switch(opt) {
+ case 'A':
+ for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
+ if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
+ break;
+ }
+ if (tmp == UZ_INVALID)
+ errx(1, "invalid algorithm specified: %s",
+ optarg);
+ /* Not reached */
+ comp_alg = tmp;
+ break;
+ case 'C':
+ comp_level = atoi(optarg);
+ break;
case 'o':
oname = optarg;
break;
@@ -162,7 +200,7 @@ int main(int argc, char **argv)
break;
case 'L':
- cfs.handler = &ulzma_fmt;
+ comp_alg = UZ_LZMA;
break;
case 'S':
@@ -193,16 +231,32 @@ int main(int argc, char **argv)
/* Not reached */
}
+ cfs.handler = &uzip_fmts[comp_alg];
+
magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
assert(magiclen < sizeof(hdr.magic));
if (cfs.en_dedup != 0) {
- hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
+ /*
+ * Dedupe requires a version 3 format. Don't downgrade newer
+ * formats.
+ */
+ if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
+ hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
hdr.magic[CLOOP_OFS_COMPR] =
tolower(hdr.magic[CLOOP_OFS_COMPR]);
}
- c_ctx = cfs.handler->f_init(cfs.blksz);
+ if (cfs.blksz % DEV_BSIZE != 0)
+ errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
+
+ cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
+ if (cfs.cbound_blksz > MAXPHYS)
+ errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
+ cfs.cbound_blksz, (size_t)MAXPHYS);
+
+ c_ctx = cfs.handler->f_init(&comp_level);
+ cfs.comp_level = comp_level;
cfs.iname = argv[0];
if (oname == NULL) {
@@ -239,6 +293,14 @@ int main(int argc, char **argv)
}
toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
+ /*
+ * Initialize last+1 entry with non-heap trash. If final padding is
+ * added later, it may or may not be overwritten with an offset
+ * representing the length of the final compressed block. If not,
+ * initialize to a defined value.
+ */
+ toc[hdr.nblocks] = 0;
+
cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
if (cfs.fdw < 0) {
diff --git a/usr.bin/mkuzip/mkuzip.h b/usr.bin/mkuzip/mkuzip.h
index f41507c86964..b4bec58525ee 100644
--- a/usr.bin/mkuzip/mkuzip.h
+++ b/usr.bin/mkuzip/mkuzip.h
@@ -28,6 +28,9 @@
#define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args)
+/* Use an algorithm-specific default level if no explicit level is selected. */
+#define USE_DEFAULT_LEVEL INT_MIN
+
void *mkuz_safe_malloc(size_t);
void *mkuz_safe_zmalloc(size_t);
int mkuz_memvcmp(const void *, unsigned char, size_t);